diff -Nru atlas-3.10.2/bin/atlas_install.c atlas-3.10.3/bin/atlas_install.c --- atlas-3.10.2/bin/atlas_install.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/atlas_install.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1998 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -369,21 +369,30 @@ { int i; long iret=0; - for (i=0; ln[i]; i++); - if (i > 0) - { - for (i--; i > 0 && !isdigit(ln[i]); i--); - if (i > 0 && ln[i] == '.') /* allow skip of 1 decimal point */ - for (i--; i > 0 && !isdigit(ln[i]); i--); - } - if (i > 0 || (i == 0 && (isdigit(ln[0]) || ln[0] == '.'))) + double d; + + for (i=0; ln[i]; i++); /* find end of string */ + if (!i) /* empty string */ + return(0); /* returns 0 */ +/* + * Now skip any non-digit info at end of string (eg., MHz) + */ + for(i--; i > 0 && !isdigit(ln[i]); i--); + if (!isdigit(ln[i])) /* no digits in string */ + return(0); /* returns 0 */ +/* + * Go backwards until we find a non-digit to possibly end the number + */ + for (i--; i > 0 && isdigit(ln[i]); i--); + if (i > 0) /* may be more to this number */ { - double d; - while(isdigit(ln[i]) && i > 0) i--; - if (!isdigit(ln[i])) i++; - sscanf(ln+i, "%lf", &d); - iret = (int)(d+0.5); + if (ln[i] == '.') + for(i--; i > 0 && isdigit(ln[i]); i--); } + else if (ln[0] != '.' && !isdigit(ln[0])) + i = 1; + sscanf(ln+i, "%lf", &d); + iret = (int)(d+0.5); return(iret); } @@ -444,10 +453,10 @@ GetDate(&month, &day, &year, &hour, &min); fprintf(fp, "\n%s%s%s", sep, sep, sep); if (START) - fprintf(fp, "* BEGAN ATLAS3.10.2 INSTALL OF SECTION %1d-%1d-%1d ON %02d/%02d/%04d AT %02d:%02d *\n", + fprintf(fp, "* BEGAN ATLAS3.10.3 INSTALL OF SECTION %1d-%1d-%1d ON %02d/%02d/%04d AT %02d:%02d *\n", sec, subsec, subsubsec, month, day, year, hour, min); else - fprintf(fp, "* FINISHED ATLAS3.10.2 INSTALL OF SECTION %1d-%1d-%1d ON %02d/%02d/%04d AT %02d:%02d *\n", + fprintf(fp, "* FINISHED ATLAS3.10.3 INSTALL OF SECTION %1d-%1d-%1d ON %02d/%02d/%04d AT %02d:%02d *\n", sec, subsec, subsubsec, month, day, year, hour, min); fprintf(fp, "%s%s%s\n\n\n", sep, sep, sep); fclose(fp); @@ -568,7 +577,7 @@ } /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/bin/atlas_tee.c atlas-3.10.3/bin/atlas_tee.c --- atlas-3.10.2/bin/atlas_tee.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/atlas_tee.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/bin/extract.c atlas-3.10.3/bin/extract.c --- atlas-3.10.2/bin/extract.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/extract.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,6 +1,6 @@ /************************************************************************/ /* Extract v4.0.0 */ -/* (C) Copyright 1994 R. Clint Whaley (rwhaley@cs.utk.edu). */ +/* (C) Copyright 1994,2015 R. Clint Whaley (rwhaley@cs.utk.edu). */ /* This program is distributed under the terms of the Gnu */ /* General Public License (GPL), with the following two exceptions: */ /* (1) Clause (9), dealing with updating the GPL automatically, is */ @@ -13,7 +13,7 @@ /* The full, unaltered, text of the GPL is included at the end of */ /* the program source listing. */ /* ------------------------------------------------------------------ */ -/* Last modified by the author on 10/04/10. */ +/* Last modified by the author on 11/20/15. */ /************************************************************************/ #include @@ -22,9 +22,9 @@ #include #define NAMLEN 1024 -#define LNLEN 2048 +#define LNLEN 8192 #define HANLEN 1024 -#define SUBLEN 1024 +#define SUBLEN 8192 #define F_nFlags 16 #define F_Case 0 @@ -233,7 +233,7 @@ /* RETURNS: nonzero if they are equal, else 0 */ /*****************************************************************************/ { - while( (*p1++ == *p2++) && (N) ) N--; + while( N && (*p1++ == *p2++) ) N--; return(!N); } @@ -762,7 +762,8 @@ { if (ExtDone) return(0); EE->FpIn->LineNo++; - return( (int) fgets(line, LNLEN, EE->FpIn->Fp) ); +/* return( (int) fgets(line, LNLEN, EE->FpIn->Fp) ); */ + return( (fgets(line, LNLEN, EE->FpIn->Fp) != NULL) ); } FILE2 *OpenFile(EXTENV *EE, char *Fnam, char *mode) @@ -1323,6 +1324,14 @@ return(ptr); } +int CountMacros(void) +{ + EXTMAC *p; + int i; + for (i=0,p=MacroBase; p; i++,p = p->next); + return(i); +} + /************************************************************************/ /* The routines push_macro & pop_macro are the heart of extract's */ /* macro substitution utility. A macro is defined by: */ @@ -2019,7 +2028,7 @@ void FindKeyMatch(EXTENV *EE, KEYS *Key) { int j, k, argmatch=0; - char line[LNLEN], tline[LNLEN]; + static char line[LNLEN], tline[LNLEN]; j = Key->HanLen + 1; k = EE->Joining; @@ -2040,13 +2049,13 @@ /*===========================================================================*/ -/* Rest of file is misc catagory :-) */ +/* Rest of file is misc category :-) */ /*===========================================================================*/ int icalc(EXTENV *EE, char line[]) { int i, k=0; - int istack[100]; + int istack[128]; if ( Mcisnum(line[0]) || (line[0] == '-' && Mcisnum(line[1])) ) i = Wstr2int(line, &istack[k]); @@ -2081,6 +2090,53 @@ istack[k-1] = istack[k] % istack[k-1]; k--; break; + case '=': /* boolean comparison */ + istack[k-1] = (istack[k] == istack[k-1]); + k--; + break; + case '!': /* boolean comparison */ + istack[k-1] = (istack[k] != istack[k-1]); + k--; + break; + case '<': /* less than boolean comparison */ + istack[k-1] = (istack[k] < istack[k-1]); + k--; + break; + case '>': /* greater than boolean comparison */ + istack[k-1] = (istack[k] > istack[k-1]); + k--; + break; + case '{': /* less than or equal boolean comparison */ + istack[k-1] = (istack[k] >= istack[k-1]); + k--; + break; + case '}': /* greater than or equal boolean comparison */ + istack[k-1] = (istack[k] >= istack[k-1]); + k--; + break; + case '&': /* bitwise and */ + istack[k-1] = (istack[k] & istack[k-1]); + k--; + break; + case '|': /* bitwise or */ + istack[k-1] = (istack[k] | istack[k-1]); + k--; + break; + case '^': /* bitwise exclusive or */ + istack[k-1] = (istack[k] ^ istack[k-1]); + k--; + break; + case '~': /* bitwise complement */ + istack[k] = ~(istack[k]); + break; + case 'l': /* bitwise left shift */ + istack[k-1] = (istack[k] << istack[k-1]); + k--; + break; + case 'r': /* bitwise right shift */ + istack[k-1] = (istack[k] >> istack[k-1]); + k--; + break; case 'a': /* absolute value */ if (istack[k] < 0) istack[k] = -istack[k]; break; @@ -2190,7 +2246,8 @@ } for(i=k; !Mciswspace(line[i]); i++) tline[i-k] = line[i]; tline[i-k] = '\0'; - defined = (int) FindMac(tline); +/* defined = (int) FindMac(tline); */ + defined = (FindMac(tline) != NULL); if (line[7] == '!') defined = !defined; if (GetLn(EE, line)) KeepOn = 1; else KeepOn = 0; @@ -2542,6 +2599,8 @@ if (wp->next->word[0] == '>') { wp1 = wp; wp0 = wp->next->next; } else if (wp->next->word[0] == '=') comp = '='; else if (wp->next->word[0] == '!') comp = '!'; + else if (wp->next->word[0] == '{') comp = '{'; + else if (wp->next->word[0] == '}') comp = '}'; else if (wp->next->word[0] != '<') ExtErr(EE, "Invalid integer condition: '%s'\n", ln); @@ -2651,15 +2710,22 @@ void HandleIIf(EXTENV *EE, char *ln) /* - * Expects ln of @iif int1 [<,>,=,!] int2 + * Expects ln of @iif int1 [<,>,},{,=,] int2 */ { char ch; int i, j, ia1, ia2; - ch = GetIntComp(EE, ln+5, &i, &j, &ia1, &ia2, NULL, NULL); - if (ch == '=') i = (ia1 == ia2); - else if (ch == '!') i = (ia1 != ia2); - else if (ch == '<') i = (ia1 < ia2); + if (WstrcmpN(ln+5, "@iexp ",6)) /* rest of line is @iexp, */ + i = icalc(EE, ln+5+6); + else + { + ch = GetIntComp(EE, ln+5, &i, &j, &ia1, &ia2, NULL, NULL); + if (ch == '=') i = (ia1 == ia2); + else if (ch == '!') i = (ia1 != ia2); + else if (ch == '<') i = (ia1 < ia2); + else if (ch == '{') i = (ia1 <= ia2); + else if (ch == '}') i = (ia1 >= ia2); + } if (!i) /* skip */ DumpSkip(EE, NULL, "@iif ", "@endiif "); else @@ -2697,6 +2763,8 @@ if (comp == '<') KeepOn = (ia1 < ia2); else if (comp == '=') KeepOn = (ia1 == ia2); else if (comp == '!') KeepOn = (ia1 != ia2); + else if (comp == '{') KeepOn = (ia1 <= ia2); + else if (comp == '}') KeepOn = (ia1 >= ia2); while (KeepOn) { rewind(tfp.Fp); @@ -2704,6 +2772,8 @@ ia1 = Getiarg(EE, A1CONST, ia1, mac1); ia2 = Getiarg(EE, A2CONST, ia2, mac2); if (comp == '<') KeepOn = (ia1 < ia2); + else if (comp == '{') KeepOn = (ia1 <= ia2); + else if (comp == '}') KeepOn = (ia1 >= ia2); else if (comp == '=') KeepOn = (ia1 == ia2); else if (comp == '!') KeepOn = (ia1 != ia2); } @@ -3193,7 +3263,7 @@ /* * Store where my macros begin */ - sprintf(line, "@__MyMacBeg__%d", &EE); + sprintf(line, "@__MyMacBeg__%p", &EE); PushMacro2(&EE, 0, line, ""); EE.MyMacBeg = MacroBase; @@ -3249,7 +3319,7 @@ /* * Pop MyMacBeg */ - sprintf(line, "__MyMagBeg__%d", &EE); + sprintf(line, "@__MyMacBeg__%p", &EE); PopMacro2(&EE, line); /* @@ -3475,6 +3545,13 @@ switch(i) { + case 4: /* alias for @skip does not work in middle of line! */ + if (WstrcmpN(tline, "@// ", 3)) + { + if ( !Use[EC_Skip] ) return(0); + } + else DONE = 0; + break; case 5: if (WstrcmpN(tline, "@iif ", 4)) { @@ -3737,6 +3814,13 @@ if ( Use[EC_Dec] ) ExtWarn(EE, "unmatched @enddeclare"); else return(0); } + if (WstrcmpN(tline, "@print@nmac ", 12)) + { + if (Use[EC_Print]) + fprintf(Warn, "%d:%s", CountMacros(), tline+12); + else + return(0); + } else if (WstrcmpN(tline, "@endextract ", 12)) { if ( Use[EC_EndExt] ) ExtDone = 1; @@ -3759,6 +3843,24 @@ } else DONE = 0; break; + case 15: + if (WstrcmpN(tline, "@print@allmacs ", 15)) + { + if (Use[EC_Print]) + { + int i; + EXTMAC *p; + for (i=0, p=MacroBase; p; i++,p=p->next) + { + fprintf(Warn, "'%10s' -> '%s'\n", p->Handle, p->Sub); + } + fprintf(Warn, "Done %d macros.\n\n", i); + } + else + return(0); + } + else DONE = 0; + break; default: DONE = 0; break; diff -Nru atlas-3.10.2/bin/gemmtst.c atlas-3.10.3/bin/gemmtst.c --- atlas-3.10.2/bin/gemmtst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/gemmtst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -45,6 +45,9 @@ #ifdef ATL_USEPTHREADS #include "atlas_tlvl3.h" #endif +#ifdef TRUST_REF + #include "atlas_reflevel3.h" +#endif #if defined(ATL_RKXOVER) || defined(TEST_RANKK) #define TRUST_BIGNORK void Mjoin(PATL,bignork_mm) @@ -87,7 +90,7 @@ #ifdef ATL_DeclareSlens F77_INTEGER ATL_Slen1, ATL_Slen2; #endif -double time00(); +double time00(void); void printmat(char *mat, int M, int N, TYPE *A, int lda) { @@ -158,6 +161,9 @@ #define trusted_gemm(TA, TB, m, n, k, al, A, lda, B, ldb, be, C, ldc) \ Mjoin(Mjoin(cblas_,PRE),gemm) \ (CblasColMajor, TA, TB, m, n, k, al, A, lda, B, ldb, be, C, ldc) +#elif defined(TRUST_REF) + #define trusted_gemm(TA, TB, m, n, k, al, A, lda, B, ldb, be, C, ldc) \ + Mjoin(PATL,refgemm)(TA, TB, m, n, k, al, A, lda, B, ldb, be, C, ldc) #elif defined(ATL_NOAFFINITY) || defined(ATL_AFFINITY) #define trusted_gemm(TA, TB, m, n, k, al, A, lda, B, ldb, be, C, ldc) \ Mjoin(PATL,tgemm)(TA, TB, m, n, k, al, A, lda, B, ldb, be, C, ldc) @@ -310,15 +316,16 @@ for (i=0; i != nL2; i++) j += L2[i]; }*/ - /* invalidate L2 cache */ - #if !ATL_LINEFLUSH - l2ret = ATL_flushcache( -1 ); - #else - ATL_flushCacheByAddr(ldc*N, C); - ATL_flushCacheByAddr(ldb*Nb, B); - ATL_flushCacheByAddr(lda*Na, A); - #endif - + if (CACHESIZE) + { /* invalidate L2 cache */ + #if !ATL_LINEFLUSH + l2ret = ATL_flushcache( -1 ); + #else + ATL_flushCacheByAddr(ldc*N, C); + ATL_flushCacheByAddr(ldb*Nb, B); + ATL_flushCacheByAddr(lda*Na, A); + #endif + } t0 = time00(); trusted_gemm(TAc, TBc, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); t1 = time00() - t0; @@ -342,7 +349,8 @@ matgen(M, N, D, ldd, M*N); #endif - /* invalidate L2 cache */ + if (CACHESIZE) + { /* invalidate L2 cache */ #if !ATL_LINEFLUSH l2ret = ATL_flushcache( -1 ); #else @@ -350,6 +358,7 @@ ATL_flushCacheByAddr(ldb*Nb, B); ATL_flushCacheByAddr(lda*Na, A); #endif + } t0 = time00(); test_gemm(TAc, TBc, M, N, K, alpha, A, lda, B, ldb, beta, D, ldd); diff -Nru atlas-3.10.2/bin/gpmmtst.c atlas-3.10.3/bin/gpmmtst.c --- atlas-3.10.2/bin/gpmmtst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/gpmmtst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/bin/invtst.c atlas-3.10.3/bin/invtst.c --- atlas-3.10.2/bin/invtst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/invtst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/bin/lanbtst.c atlas-3.10.3/bin/lanbtst.c --- atlas-3.10.2/bin/lanbtst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/lanbtst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/bin/latime.c atlas-3.10.3/bin/latime.c --- atlas-3.10.2/bin/latime.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/latime.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/bin/llttst.c atlas-3.10.3/bin/llttst.c --- atlas-3.10.2/bin/llttst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/llttst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/bin/lutst.c atlas-3.10.3/bin/lutst.c --- atlas-3.10.2/bin/lutst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/lutst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/bin/ormtst.c atlas-3.10.3/bin/ormtst.c --- atlas-3.10.2/bin/ormtst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/ormtst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/bin/printblk.c atlas-3.10.3/bin/printblk.c --- atlas-3.10.2/bin/printblk.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/printblk.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/bin/qrtst.c atlas-3.10.3/bin/qrtst.c --- atlas-3.10.2/bin/qrtst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/qrtst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, R. Clint Whaley, Anthony M. Castaldo diff -Nru atlas-3.10.2/bin/slvtst.c atlas-3.10.3/bin/slvtst.c --- atlas-3.10.2/bin/slvtst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/slvtst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -419,11 +419,15 @@ #ifdef TREAL t0 = X1[i] - X2[i]; t0 = Mabs(t0); + if (t0 != t0) + return(t0); if (t0 > max) max = t0; #else t0 = X1[2*i] - X2[2*i]; t1 = X1[2*i+1] - X2[2*i+1]; t0 = Mabs(t0); t1 = Mabs(t1); t0 += t1; + if (t0 != t0) + return(t0); if (t0 > max) max = t0; #endif } diff -Nru atlas-3.10.2/bin/trtritst.c atlas-3.10.3/bin/trtritst.c --- atlas-3.10.2/bin/trtritst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/trtritst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Peter Soendergaard * * Code contributers : Peter Soendergaard, R. Clint Whaley diff -Nru atlas-3.10.2/bin/uumtst.c atlas-3.10.3/bin/uumtst.c --- atlas-3.10.2/bin/uumtst.c 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/bin/uumtst.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/AMD64K10h32SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/AMD64K10h32SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/AMD64K10h64SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/AMD64K10h64SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/AMDDOZER32AVXFMA4.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/AMDDOZER32AVXFMA4.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/AMDDOZER64AVXFMA4.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/AMDDOZER64AVXFMA4.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARM64a5364.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARM64a5364.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARM64a5764.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARM64a5764.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARM64xgene164.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARM64xgene164.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARMa1532FPV3D32MACNONIEEE.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARMa1532FPV3D32MACNONIEEE.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARMa1532FPV3D32MAC.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARMa1532FPV3D32MAC.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARMa732FPV3D32MACNONIEEE.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARMa732FPV3D32MACNONIEEE.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARMa732FPV3D32MAC.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARMa732FPV3D32MAC.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARMa932FPV3D16MAC.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARMa932FPV3D16MAC.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARMa932FPV3D32MAC.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARMa932FPV3D32MAC.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARMv732NEON.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARMv732NEON.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/ARMv732.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/ARMv732.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/atlas_test1.1.3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/atlas_test1.1.3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/BOZOL1.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/BOZOL1.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/Core232SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/Core232SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/Core264SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/Core264SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/CoreDuo32SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/CoreDuo32SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/Corei132SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/Corei132SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/Corei164SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/Corei164SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/Corei232AVX.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/Corei232AVX.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/Corei264AVX.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/Corei264AVX.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/Corei264SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/Corei264SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/Corei364AVXMAC.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/Corei364AVXMAC.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/Corei464AVXMAC.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/Corei464AVXMAC.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/HAMMER64SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/HAMMER64SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/IA64Itan264.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/IA64Itan264.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/IBMz1032.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/IBMz1032.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/IBMz1064.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/IBMz1064.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/IBMz1364VXZ.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/IBMz1364VXZ.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/IBMz19632.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/IBMz19632.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/IBMz19664.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/IBMz19664.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/K7323DNow.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/K7323DNow.tar.bz2 differ diff -Nru atlas-3.10.2/CONFIG/ARCHS/Make.ext atlas-3.10.3/CONFIG/ARCHS/Make.ext --- atlas-3.10.2/CONFIG/ARCHS/Make.ext 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/ARCHS/Make.ext 2016-07-28 19:43:01.000000000 +0000 @@ -22,20 +22,26 @@ basdr = $(basdRCW)/misc/ARCHDEF files = AMD64K10h32SSE3.tar.bz2 AMD64K10h64SSE3.tar.bz2 \ - AMDDOZER32AVXFMA4.tar.bz2 AMDDOZER64AVXFMA4.tar.bz2 ARMv732.tar.bz2 \ + AMDDOZER32AVXFMA4.tar.bz2 AMDDOZER64AVXFMA4.tar.bz2 \ + ARM64a5364.tar.bz2 ARM64a5764.tar.bz2 ARM64xgene164.tar.bz2 \ + ARMa1532FPV3D32MAC.tar.bz2 ARMa1532FPV3D32MACNONIEEE.tar.bz2 \ + ARMa732FPV3D32MAC.tar.bz2 ARMa732FPV3D32MACNONIEEE.tar.bz2 \ + ARMa932FPV3D16MAC.tar.bz2 ARMa932FPV3D32MAC.tar.bz2 ARMv732.tar.bz2 \ ARMv732NEON.tar.bz2 BOZOL1.tar.bz2 Core232SSE3.tar.bz2 \ Core264SSE3.tar.bz2 CoreDuo32SSE3.tar.bz2 Corei132SSE3.tar.bz2 \ Corei164SSE3.tar.bz2 Corei232AVX.tar.bz2 Corei264AVX.tar.bz2 \ - Corei264SSE3.tar.bz2 Corei364AVXMAC.tar.bz2 CreateDef.sh \ - CreateDirs.sh CreateTar.sh HAMMER64SSE3.tar.bz2 IA64Itan264.tar.bz2 \ - IBMz1032.tar.bz2 IBMz1064.tar.bz2 IBMz19632.tar.bz2 IBMz19664.tar.bz2 \ + Corei264SSE3.tar.bz2 Corei364AVXMAC.tar.bz2 Corei464AVXMAC.tar.bz2 \ + CreateDef.sh CreateDirs.sh CreateTar.sh HAMMER64SSE3.tar.bz2 \ + IA64Itan264.tar.bz2 IBMz1032.tar.bz2 IBMz1064.tar.bz2 \ + IBMz1364VXZ.tar.bz2 IBMz19632.tar.bz2 IBMz19664.tar.bz2 \ K7323DNow.tar.bz2 KillDirs.sh MIPSICE932.tar.bz2 MIPSICE964.tar.bz2 \ MIPSR1xK64.tar.bz2 Makefile P432SSE2.tar.bz2 P4E32SSE3.tar.bz2 \ P4E64SSE3.tar.bz2 PIII32SSE1.tar.bz2 POWER432.tar.bz2 \ - POWER464.tar.bz2 POWER564.tar.bz2 POWER764VSX.tar.bz2 \ - PPCG432AltiVec.tar.bz2 PPCG532AltiVec.tar.bz2 PPCG564AltiVec.tar.bz2 \ - PPRO32.tar.bz2 USIII32.tar.bz2 USIII64.tar.bz2 USIV32.tar.bz2 \ - USIV64.tar.bz2 UST232.tar.bz2 UST264.tar.bz2 atlas_test1.1.3.tar.bz2 \ + POWER464.tar.bz2 POWER564.tar.bz2 POWER764LEVSX.tar.bz2 \ + POWER764VSX.tar.bz2 POWER864LEVSX.tar.bz2 PPCG432AltiVec.tar.bz2 \ + PPCG532AltiVec.tar.bz2 PPCG564AltiVec.tar.bz2 PPRO32.tar.bz2 \ + USIII32.tar.bz2 USIII64.tar.bz2 USIV32.tar.bz2 USIV64.tar.bz2 \ + UST232.tar.bz2 UST264.tar.bz2 atlas_test1.1.3.tar.bz2 \ lapack_test.tar.bz2 negflt.c negmmfile.c negmvfile.c negr1file.c \ x86SSE132SSE1.tar.bz2 x86SSE232SSE2.tar.bz2 x86x8732.tar.bz2 @@ -158,6 +164,12 @@ /tmp/Corei364AVXMAC.tar Corei364AVXMAC bzip2 /tmp/Corei364AVXMAC.tar mv /tmp/Corei364AVXMAC.tar.bz2 ./. +Corei464AVXMAC.tar.bz2 : $(basdr)/Corei464AVXMAC + - rm -f /tmp/Corei464AVXMAC.tar /tmp/Corei464AVXMAC.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/Corei464AVXMAC.tar Corei464AVXMAC + bzip2 /tmp/Corei464AVXMAC.tar + mv /tmp/Corei464AVXMAC.tar.bz2 ./. HAMMER64SSE3.tar.bz2 : $(basdr)/HAMMER64SSE3 - rm -f /tmp/HAMMER64SSE3.tar /tmp/HAMMER64SSE3.tar.bz2 cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ @@ -308,6 +320,18 @@ /tmp/POWER764VSX.tar POWER764VSX bzip2 /tmp/POWER764VSX.tar mv /tmp/POWER764VSX.tar.bz2 ./. +POWER864LEVSX.tar.bz2 : $(basdr)/POWER864LEVSX + - rm -f /tmp/POWER864LEVSX.tar /tmp/POWER864LEVSX.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/POWER864LEVSX.tar POWER864LEVSX + bzip2 /tmp/POWER864LEVSX.tar + mv /tmp/POWER864LEVSX.tar.bz2 ./. +POWER764LEVSX.tar.bz2 : $(basdr)/POWER764LEVSX + - rm -f /tmp/POWER764LEVSX.tar /tmp/POWER764LEVSX.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/POWER764LEVSX.tar POWER764LEVSX + bzip2 /tmp/POWER764LEVSX.tar + mv /tmp/POWER764LEVSX.tar.bz2 ./. IBMz1032.tar.bz2 : $(basdr)/IBMz1032 - rm -f /tmp/IBMz1032.tar /tmp/IBMz1032.tar.bz2 cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ @@ -332,6 +356,66 @@ /tmp/IBMz19632.tar IBMz19632 bzip2 /tmp/IBMz19632.tar mv /tmp/IBMz19632.tar.bz2 ./. +IBMz1364VXZ.tar.bz2 : $(basdr)/IBMz1364VXZ + - rm -f /tmp/IBMz1364VXZ.tar /tmp/IBMz1364VXZ.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/IBMz1364VXZ.tar IBMz1364VXZ + bzip2 /tmp/IBMz1364VXZ.tar + mv /tmp/IBMz1364VXZ.tar.bz2 ./. +ARM64xgene164.tar.bz2 : $(basdr)/ARM64xgene164 + - rm -f /tmp/ARM64xgene164.tar /tmp/ARM64xgene164.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/ARM64xgene164.tar ARM64xgene164 + bzip2 /tmp/ARM64xgene164.tar + mv /tmp/ARM64xgene164.tar.bz2 ./. +ARM64a5764.tar.bz2 : $(basdr)/ARM64a5764 + - rm -f /tmp/ARM64a5764.tar /tmp/ARM64a5764.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/ARM64a5764.tar ARM64a5764 + bzip2 /tmp/ARM64a5764.tar + mv /tmp/ARM64a5764.tar.bz2 ./. +ARM64a5364.tar.bz2 : $(basdr)/ARM64a5364 + - rm -f /tmp/ARM64a5364.tar /tmp/ARM64a5364.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/ARM64a5364.tar ARM64a5364 + bzip2 /tmp/ARM64a5364.tar + mv /tmp/ARM64a5364.tar.bz2 ./. +ARMa932FPV3D16MAC.tar.bz2 : $(basdr)/ARMa932FPV3D16MAC + - rm -f /tmp/ARMa932FPV3D16MAC.tar /tmp/ARMa932FPV3D16MAC.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/ARMa932FPV3D16MAC.tar ARMa932FPV3D16MAC + bzip2 /tmp/ARMa932FPV3D16MAC.tar + mv /tmp/ARMa932FPV3D16MAC.tar.bz2 ./. +ARMa932FPV3D32MAC.tar.bz2 : $(basdr)/ARMa932FPV3D32MAC + - rm -f /tmp/ARMa932FPV3D32MAC.tar /tmp/ARMa932FPV3D32MAC.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/ARMa932FPV3D32MAC.tar ARMa932FPV3D32MAC + bzip2 /tmp/ARMa932FPV3D32MAC.tar + mv /tmp/ARMa932FPV3D32MAC.tar.bz2 ./. +ARMa1532FPV3D32MAC.tar.bz2 : $(basdr)/ARMa1532FPV3D32MAC + - rm -f /tmp/ARMa1532FPV3D32MAC.tar /tmp/ARMa1532FPV3D32MAC.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/ARMa1532FPV3D32MAC.tar ARMa1532FPV3D32MAC + bzip2 /tmp/ARMa1532FPV3D32MAC.tar + mv /tmp/ARMa1532FPV3D32MAC.tar.bz2 ./. +ARMa732FPV3D32MAC.tar.bz2 : $(basdr)/ARMa732FPV3D32MAC + - rm -f /tmp/ARMa732FPV3D32MAC.tar /tmp/ARMa732FPV3D32MAC.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/ARMa732FPV3D32MAC.tar ARMa732FPV3D32MAC + bzip2 /tmp/ARMa732FPV3D32MAC.tar + mv /tmp/ARMa732FPV3D32MAC.tar.bz2 ./. +ARMa1532FPV3D32MACNONIEEE.tar.bz2 : $(basdr)/ARMa1532FPV3D32MACNONIEEE + - rm -f /tmp/ARMa1532FPV3D32MACNONIEEE.tar /tmp/ARMa1532FPV3D32MACNONIEEE.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/ARMa1532FPV3D32MACNONIEEE.tar ARMa1532FPV3D32MACNONIEEE + bzip2 /tmp/ARMa1532FPV3D32MACNONIEEE.tar + mv /tmp/ARMa1532FPV3D32MACNONIEEE.tar.bz2 ./. +ARMa732FPV3D32MACNONIEEE.tar.bz2 : $(basdr)/ARMa732FPV3D32MACNONIEEE + - rm -f /tmp/ARMa732FPV3D32MACNONIEEE.tar /tmp/ARMa732FPV3D32MACNONIEEE.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/ARMa732FPV3D32MACNONIEEE.tar ARMa732FPV3D32MACNONIEEE + bzip2 /tmp/ARMa732FPV3D32MACNONIEEE.tar + mv /tmp/ARMa732FPV3D32MACNONIEEE.tar.bz2 ./. HAMMER32SSE2.tar.bz2 : $(basdr)/HAMMER32SSE2 - rm -f /tmp/HAMMER32SSE2.tar /tmp/HAMMER32SSE2.tar.bz2 diff -Nru atlas-3.10.2/CONFIG/ARCHS/Makefile atlas-3.10.3/CONFIG/ARCHS/Makefile --- atlas-3.10.2/CONFIG/ARCHS/Makefile 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/ARCHS/Makefile 2016-07-28 19:43:01.000000000 +0000 @@ -44,7 +44,7 @@ xnegmv : $(SRCdir)/CONFIG/ARCHS/negmvfile.c $(XCC) $(XCCFLAGS) -o $@ $(SRCdir)/CONFIG/ARCHS/negmvfile.c xnegflt : $(SRCdir)/CONFIG/ARCHS/negflt.c - $(XCC) -O -o xnegflt $(SRCdir)/CONFIG/ARCHS/negflt.c + $(XCC) $(XCCFLAGS) -O -o xnegflt $(SRCdir)/CONFIG/ARCHS/negflt.c ssys_put: - cp $(adefd)/kern/sMULADD $(SYSdir)/res/. Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/MIPSICE932.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/MIPSICE932.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/MIPSICE964.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/MIPSICE964.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/MIPSR1xK64.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/MIPSR1xK64.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/P432SSE2.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/P432SSE2.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/P4E32SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/P4E32SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/P4E64SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/P4E64SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/PIII32SSE1.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/PIII32SSE1.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/POWER432.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/POWER432.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/POWER464.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/POWER464.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/POWER564.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/POWER564.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/POWER764LEVSX.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/POWER764LEVSX.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/POWER764VSX.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/POWER764VSX.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/POWER864LEVSX.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/POWER864LEVSX.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/PPCG432AltiVec.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/PPCG432AltiVec.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/PPCG532AltiVec.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/PPCG532AltiVec.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/PPCG564AltiVec.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/PPCG564AltiVec.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/PPRO32.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/PPRO32.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/USIII32.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/USIII32.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/USIII64.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/USIII64.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/USIV32.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/USIV32.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/USIV64.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/USIV64.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/UST232.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/UST232.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/UST264.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/UST264.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/WIN64/Core264SSE3.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/WIN64/Core264SSE3.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/WIN64/Corei264AVX.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/WIN64/Corei264AVX.tar.bz2 differ diff -Nru atlas-3.10.2/CONFIG/ARCHS/WIN64/Make.ext atlas-3.10.3/CONFIG/ARCHS/WIN64/Make.ext --- atlas-3.10.2/CONFIG/ARCHS/WIN64/Make.ext 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/ARCHS/WIN64/Make.ext 2016-07-28 19:43:02.000000000 +0000 @@ -0,0 +1,41 @@ + +topd = /home/whaley/git/math-atlas/AtlasBase +ext = /home/whaley/git/math-atlas/STAB/xextract +extF = $(ext) -langF -lnlen71 -Remtblank -llwarn2 -LAPACK1 $(incs) +ext9 = $(ext) -langF -lnlen132 -RemtBlank $(incs) +extC = $(ext) -langC -lnlen79 -Remtblank -llwarn2 $(incs) +extM = $(ext) -langM -lnlen79 -llwarn2 $(incs) +incs = -def topd /home/whaley/git/math-atlas/AtlasBase \ + -def incd /home/whaley/git/math-atlas/AtlasBase/Clint \ + -def ext /home/whaley/git/math-atlas/STAB/xextract \ + -def BASEdir /home/whaley/git/math-atlas/AtlasBase/Antoine/ \ + -def basd /home/whaley/git/math-atlas/AtlasBase/Clint + +default: all +force_build: +basd = /home/whaley/git/math-atlas/AtlasBase/Clint +basdRCW = /home/whaley/git/math-atlas/AtlasBase/Clint +basdAPP = /home/whaley/git/math-atlas/AtlasBase/Antoine +basdSTU = /home/whaley/git/math-atlas/AtlasBase/Students +incf = /home/whaley/git/math-atlas/AtlasBase/gen.inc + +files = Core264SSE3.tar.bz2 Corei264AVX.tar.bz2 +basdw = $(basdRCW)/misc/ARCHDEF/WIN64 + +all : $(files) + +Corei264AVX.tar.bz2 : $(basdw)/Corei264AVX + - rm -f /tmp/Corei264AVX.tar /tmp/Corei264AVX.tar.bz2 + cd $(basdw) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/Corei264AVX.tar Corei264AVX + bzip2 /tmp/Corei264AVX.tar + mv /tmp/Corei264AVX.tar.bz2 ./. +Core264SSE3.tar.bz2 : $(basdw)/Core264SSE3 + - rm -f /tmp/Core264SSE3.tar /tmp/Core264SSE3.tar.bz2 + cd $(basdw) ; tar --dereference --exclude 'CVS' -c -f \ + /tmp/Core264SSE3.tar Core264SSE3 + bzip2 /tmp/Core264SSE3.tar + mv /tmp/Core264SSE3.tar.bz2 ./. + +Make.ext : $(topd)/make.base + $(extM) -b $(topd)/make.base -o Make.ext rout=atlas/config/archs/win64 Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/x86SSE132SSE1.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/x86SSE132SSE1.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/x86SSE232SSE2.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/x86SSE232SSE2.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/CONFIG/ARCHS/x86x8732.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/CONFIG/ARCHS/x86x8732.tar.bz2 differ diff -Nru atlas-3.10.2/CONFIG/include/atlas_asm.h atlas-3.10.3/CONFIG/include/atlas_asm.h --- atlas-3.10.2/CONFIG/include/atlas_asm.h 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/include/atlas_asm.h 2016-07-28 19:43:00.000000000 +0000 @@ -6,9 +6,14 @@ #define my_join(pre, nam) pre ## nam #endif -#if (defined(ATL_OS_Win64) && !defined(ATL_USE64BITS)) || \ - defined(ATL_OS_Win9x) || defined(ATL_OS_OSX) || defined(ATL_OS_WinNT) +#if defined(ATL_OS_Win9x) || defined(ATL_OS_OSX) #define ATL_asmdecor(nam) Mjoin(_,nam) +#elif defined(ATL_OS_Win64) || defined(ATL_OS_WinNT) + #if defined(ATL_GAS_WOW64) || defined (ATL_USE64BITS) + #define ATL_asmdecor(nam) nam + #else + #define ATL_asmdecor(nam) Mjoin(_,nam) + #endif #elif defined(ATL_OS_AIX) && defined(ATL_GAS_PPC) #define ATL_asmdecor(nam) Mjoin(.,nam) #elif !defined(ATL_OS_OSX) && defined(ATL_GAS_PPC) && defined(ATL_USE64BITS) diff -Nru atlas-3.10.2/CONFIG/include/atlconf.h atlas-3.10.3/CONFIG/include/atlconf.h --- atlas-3.10.2/CONFIG/include/atlconf.h 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/include/atlconf.h 2016-07-28 19:43:00.000000000 +0000 @@ -18,34 +18,52 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS, AFARM, AFS390}; -#define NMACH 52 +/* + * Corei1: Nahalem / Westmere + * Corei2: ivy bridge, sandy bridge: AVX + * Corei3: haswell: AVXMAC + * Corei3EP: v3 Haswell, E5-26XX + * Corei4: skylake + */ +#define NMACH 62 static char *machnam[NMACH] = - {"UNKNOWN", "POWER3", "POWER4", "POWER5", "PPCG4", "PPCG5", - "POWER6", "POWER7", "POWERe6500", "IBMz9", "IBMz10", "IBMz196", + {"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5", + "POWER6", "POWER7", "POWER8", "POWERe6500", + "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "x86x87", "x86SSE1", "x86SSE2", "x86SSE3", "P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo", "CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3", - "Atom", "P4", "P4E", + "Corei4", "Atom", "P4", "P4E", "Efficeon", "K7", "HAMMER", "AMD64K10h", "AMDLLANO", "AMDDOZER","AMDDRIVER", "UNKNOWNx86", "IA64Itan", "IA64Itan2", "USI", "USII", "USIII", "USIV", "UST1", "UST2", "UnknownUS", - "MIPSR1xK", "MIPSICE9", "ARMv7"}; -enum MACHTYPE {MACHOther, IbmPwr3, IbmPwr4, IbmPwr5, PPCG4, PPCG5, - IbmPwr6, IbmPwr7, Pwre6500, - IbmZ9, IbmZ10, IbmZ196, /* s390(x) in Linux */ + "MIPSR1xK", "MIPSICE9", + "ARMa7", "ARMa9", "ARMa15", "ARMa17", + "ARM64xgene1", "ARM64a53", "ARM64a57"}; +enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5, + IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500, + IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, /* s390(x) in Linux */ x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */ IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS, IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2, - IntCorei3, IntAtom, IntP4, IntP4E, TMEff, + IntCorei3, IntCorei4, IntAtom, IntP4, IntP4E, TMEff, AmdAthlon, AmdHammer, Amd64K10h, AmdLlano, AmdDozer, AmdDriver, x86X, IA64Itan, IA64Itan2, SunUSI, SunUSII, SunUSIII, SunUSIV, SunUST1, SunUST2, SunUSX, MIPSR1xK, /* includes R10K, R12K, R14K, R16K */ MIPSICE9, /* SiCortex ICE9 -- like MIPS5K */ - ARMv7 /* includes Cortex A8, A9 */ + ARM7, /* odroid-little */ + ARM9, /* pandaboard */ + ARM15, /* tegra, odroid-big */ + ARM17, /* lots of tablets */ + ARM64xg, /* includes ARMv8 */ + ARM64a53, + ARM64a57 }; #define MachIsX86(mach_) \ ( (mach_) >= x86x87 && (mach_) <= x86X ) +#define MachIsPWR(mach_) \ + ( (mach_) >= IbmPwr3 && (mach_) <= Pwre6500 ) #define MachIsIA64(mach_) \ ( (mach_) >= IA64Itan && (mach_) <= IA64Itan2 ) #define MachIsUS(mach_) \ @@ -60,9 +78,11 @@ #define MachIsPPC(mach_) \ ( (mach_) >= PPCG4 && (mach_) <= PPCG5 ) #define MachIsARM(mach_) \ - ( (mach_) == ARMv7 ) + ( (mach_) >= ARM7 && (mach_) <= ARM17 ) +#define MachIsARM64(mach_) \ + ( (mach_) >= ARM64xg && || (mach_) <= ARM64a57) #define MachIsS390(mach_) \ - ( (mach_) >= IbmZ9 && (mach_) <= IbmZ196 ) + ( (mach_) >= IbmZ9 && (mach_) <= IbmZ13 ) static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"}; @@ -76,21 +96,23 @@ enum F2CINT {f2c_IntErr=0, FintCint, FintClong, FintClonglong, FintCshort}; enum F2CSTRING {f2c_StrErr=0, fstrSun, fstrCray, fstrStructVal, fstrStructPtr}; -#define NISA 11 +#define NISA 15 static char *ISAXNAM[NISA] = - {"", "VSX", "AltiVec", "AVXMAC", "AVXFMA4", "AVX", "SSE3", "SSE2", "SSE1", - "3DNow", "NEON"}; + {"", "VSX", "VXZ", "AltiVec", + "AVXMAC", "AVXFMA4", "AVX", "SSE3", "SSE2", "SSE1", "3DNow", + "FPV3D2MACNEON", "FPV3D16MACNEON", "FPV3D32MAC", "FPV3D16MAC"}; enum ISAEXT - {ISA_None=0, ISA_VSX, ISA_AV, ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX, - ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow, ISA_NEON}; + {ISA_None=0, ISA_VSX, ISA_VXZ, ISA_AV, + ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX, ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow, + ISA_NEON, ISA_NEON16, ISA_VFP3D32MAC, ISA_VFP3D16MAC}; -#define NASMD 9 +#define NASMD 11 enum ASMDIA {ASM_None=0, gas_x86_32, gas_x86_64, gas_sparc, gas_ppc, gas_parisc, - gas_mips, gas_arm, gas_s390}; + gas_mips, gas_arm, gas_arm64, gas_wow64, gas_s390}; static char *ASMNAM[NASMD] = {"", "GAS_x8632", "GAS_x8664", "GAS_SPARC", "GAS_PPC", "GAS_PARISC", - "GAS_MIPS", "GAS_ARM", "GAS_S390"}; + "GAS_MIPS", "GAS_ARM", "GAS_ARM64", "GAS_WOW64", "GAS_S390"}; /* * Used for archinfo probes (can pack in bitfield) diff -Nru atlas-3.10.2/CONFIG/include/atlconf_misc.h atlas-3.10.3/CONFIG/include/atlconf_misc.h --- atlas-3.10.2/CONFIG/include/atlconf_misc.h 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/include/atlconf_misc.h 2016-07-28 19:43:00.000000000 +0000 @@ -18,12 +18,11 @@ int GetFirstHex(char *ln); long long GetFirstLong(char *ln); long long GetFirstLongHex(char *ln); -long long GetLastLongWithRound(char *ln); double GetFirstDouble(char *ln); int GetLastInt(char *ln); int GetLastHex(char *ln); long long GetLastLong(char *ln); -long long GetLastLongHex(char *ln); +long long GetLastLongWithRound(char *ln); int fNumLines(char *fnam); char *GetPathEnvVar(void); int GetIntBeforeWord(char *word, char *ln); diff -Nru atlas-3.10.2/CONFIG/src/atlbench.c atlas-3.10.3/CONFIG/src/atlbench.c --- atlas-3.10.2/CONFIG/src/atlbench.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/atlbench.c 2016-07-28 19:43:00.000000000 +0000 @@ -102,7 +102,7 @@ { assert(n > 200); remove("big.out"); - sprintf(cmnd, "./bin/x%cmmtst_big -n %d -Test 0 > big.out\n", pre, n); + sprintf(cmnd, "./ATLrun.sh ./bin/ x%cmmtst_big -n %d -Test 0 > big.out\n", pre, n); n -= 200; /* fprintf(stderr, "cmnd='%s'", cmnd); */ } diff -Nru atlas-3.10.2/CONFIG/src/atlcomp.txt atlas-3.10.3/CONFIG/src/atlcomp.txt --- atlas-3.10.2/CONFIG/src/atlcomp.txt 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/atlcomp.txt 2016-07-28 19:43:00.000000000 +0000 @@ -38,15 +38,15 @@ # All non-core x86 like these params for MMFLAGS (probably want -O3 for # Level 1 & 2, but must confirm) # -# Core2/Corei1/Corei2/Corei3 -MACH=Corei3 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,gcc,xcc +# Core2/Corei1/Corei2/Corei3/Corei4 +MACH=Corei3,Corei4 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,gcc,xcc 'gcc' '-fomit-frame-pointer -mfpmath=sse -O2' +MACH=Corei4 OS=all LVL=1100 COMPS=dmc,smc + 'gcc' '-fomit-frame-pointer -mfpmath=sse -O2 -fschedule-insns' MACH=Corei2 OS=Win9x,WinNT,Win64 LVL=1005 COMPS=smc,dmc,skc,dkc,icc 'gcc-4' '-fomit-frame-pointer -mfpmath=sse -O2 -fno-schedule-insns2' MACH=Corei2 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,gcc,xcc 'gcc' '-fomit-frame-pointer -mfpmath=sse -O2 -fno-schedule-insns2' -MACH=Corei2 OS=ALL LVL=50 COMPS=skc,dkc,icc,xcc - 'clang' '-fomit-frame-pointer -mavx -O2 -m64 -Wno-parentheses' MACH=Corei1 OS=Win9x,WinNT,Win64 LVL=1005 COMPS=smc,dmc,skc,dkc,icc,gcc 'gcc-4' '-fomit-frame-pointer -mfpmath=sse -O2 -fno-schedule-insns2' MACH=Corei1 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,gcc @@ -191,9 +191,13 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc 'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave' MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc - 'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops' + 'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -mvrsave -funroll-all-loops' MACH=POWER7 OS=ALL LVL=1010 COMPS=f77 - 'gfortran' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops' + 'gfortran' '-O2 -mvsx -mcpu=power7 -mtune=power7 -mvrsave -funroll-all-loops' +MACH=POWER8 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc + 'gcc' '-O2 -mvsx -mcpu=power8 -mtune=power8 -mvrsave -funroll-all-loops' +MACH=POWER8 OS=ALL LVL=1010 COMPS=f77 + 'gfortran' '-O2 -mvsx -mcpu=power8 -mtune=power8 -mvrsave -funroll-all-loops' MACH=POWER6 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc 'gcc' '-mcpu=power6 -mtune=power6 -maltivec -O3 -fno-schedule-insns -fschedule-insns2 -minsert-sched-nops=2' MACH=POWER5 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc @@ -240,8 +244,14 @@ # ALL march options failed, go back to conservative defaults MACH=IBMz9,IBMz10,IBMz196 OS=ALL LVL=500 COMPS=f77 'gfortran' '-O3 -funroll-loops' -MACH=IBMz9,IBMz10,IBMz196 OS=ALL LVL=500 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc +MACH=IBMz9,IBMz10,IBMz196,IBMz12 OS=ALL LVL=500 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc 'gcc' '-O3 -funroll-loops' +MACH=IBMz13 OS=ALL LVL=1000 COMPS=dmc,skc,dkc,icc,xcc,gcc + 'gcc' '-march=native -O -mvx -mzvector' +MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc + 'gcc' '-march=native -O -mvx -mzvector -fno-peephole -fno-peephole2' +MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77 + 'gfortran' '-march=native -O -mvx -mzvector' # # Windows defaults ; need to make SSE/SSE2 arch dep. # @@ -258,17 +268,70 @@ # # ARM defaults # -MACH=ARMv7 OS=ALL LVL=1000 COMPS=xcc - 'gcc' '-mcpu=cortex-a8 -O1 -mfpu=vfpv3 -mfloat-abi=softfp ' -MACH=ARMv7 OS=ALL LVL=1000 COMPS=smc,skc,gcc,icc - 'gcc' '-O1 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=vfpv3 -mfloat-abi=softfp -fno-expensive-optimizations' -MACH=ARMv7 OS=ALL LVL=1000 COMPS=dmc,dkc - 'gcc' '-O1 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=vfpv3 -mfloat-abi=softfp -fno-schedule-insns2' -MACH=ARMv7 OS=ALL LVL=1000 COMPS=f77 - 'gfortran' '-mcpu=cortex-a8 -mfpu=vfpv3 -mfloat-abi=softfp -O' +MACH=ARMa7,ARMa9,ARMa15 OS=ALL LVL=1000 COMPS=xcc + 'gcc' '-O2' +MACH=ARMa7,ARMa9,ARMa15 OS=ALL LVL=1000 COMPS=smc,skc,gcc,icc + 'gcc' '-O2 -fselective-scheduling2' +MACH=ARMa15 OS=ALL LVL=1000 COMPS=dmc,dkc + 'gcc' '-O2 -fno-schedule-insns -fno-schedule-insns2 -fprefetch-loop-arrays' +MACH=ARMa7,ARMa9,ARMa15 OS=ALL LVL=1000 COMPS=f77 + 'gfortran' '-O2' +MACH=ARMa7 OS=ALL LVL=1100 COMPS=dmc + 'gcc' '-O2 -mcpu=cortex-a7 -fno-schedule-insns -fselective-scheduling2' +MACH=ARMa9 OS=ALL LVL=1100 COMPS=dmc + 'gcc' '-O3 -mcpu=cortex-a9 -fno-schedule-insns -fno-schedule-insns2' +MACH=ARMa9 OS=ALL LVL=1100 COMPS=smc,skc,dkc + 'gcc' '-O2 -mcpu=cortex-a9 -fschedule-insns' +# +# ARM64 XGENE1 defaults +# +MACH=ARM64xgene1 OS=ALL LVL=1000 COMPS=xcc + 'gcc' '-O2' +MACH=ARM64xgene1 OS=ALL LVL=1000 COMPS=icc + 'gcc' ' -mcpu=xgene1 -mtune=xgene1 -O2' +MACH=ARM64xgene1 OS=ALL LVL=1000 COMPS=skc,gcc,dkc + 'gcc' ' -mcpu=xgene1 -mtune=xgene1 -O3 -ftree-vect-loop-version -ftree-vectorize -fprefetch-loop-arrays' +MACH=ARM64xgene1 OS=ALL LVL=1000 COMPS=smc + 'gcc' '-O2 -mcpu=xgene1 -mtune=xgene1 -fprefetch-loop-arrays' +MACH=ARM64xgene1 OS=ALL LVL=1000 COMPS=dmc + 'gcc' '-O1 -mcpu=xgene1 -mtune=xgene1 -fexpensive-optimizations -fprefetch-loop-arrays' +# +# ARM64 cortex-a57 defaults (untested) +# +MACH=ARM64a57 OS=ALL LVL=1000 COMPS=f77 + 'gfortran' '-mcpu=cortex-a57 -O2' +MACH=ARM64a57 OS=ALL LVL=1000 COMPS=xcc + 'gcc' '-mcpu=cortex-a57 -O2' +MACH=ARM64a57 OS=ALL LVL=1000 COMPS=icc + 'gcc' '-mcpu=cortex-a57 -mtune=cortex-a57 -O2' +MACH=ARM64a57 OS=ALL LVL=1000 COMPS=skc,gcc,dkc + 'gcc' ' -mcpu=cortex-a57 -mtune=cortex-a57 -O3' +MACH=ARM64a57 OS=ALL LVL=1100 COMPS=dmc,smc + 'gcc' '-O1 -mcpu=cortex-a57 -mtune=cortex-a57' +MACH=ARM64a57 OS=ALL LVL=1000 COMPS=f77 + 'gfortran' '-O2 -mcpu=cortex-a57' +# +# ARM64 cortex-a53 defaults (untested) +# +MACH=ARM64a53 OS=ALL LVL=1000 COMPS=f77 + 'gfortran' '-march=native -O2' +MACH=ARM64a53 OS=ALL LVL=1000 COMPS=xcc + 'gcc' '-march=native -O2' +MACH=ARM64a53 OS=ALL LVL=1000 COMPS=icc + 'gcc' '-mcpu=cortex-a53 -mtune=cortex-a53 -O2' +MACH=ARM64a53 OS=ALL LVL=1000 COMPS=skc,gcc,dkc + 'gcc' ' -mcpu=cortex-a53 -mtune=cortex-a53 -O3 -ftree-vect-loop-version -ftree-vectorize' +MACH=ARM64a53 OS=ALL LVL=1000 COMPS=smc + 'gcc' '-O2 -mcpu=cortex-a53 -mtune=cortex-a53' +MACH=ARM64a53 OS=ALL LVL=1000 COMPS=dmc + 'gcc' '-O1 -mcpu=cortex-a53 -mtune=cortex-a53 -fexpensive-optimizations' +MACH=ARM64a53 OS=ALL LVL=1000 COMPS=f77 + 'gfortran' '-O2 -mcpu=cortex-a53' # # Generic defaults # +MACH=ALL OS=ALL LVL=4 COMPS=smc,dmc,skc,dkc,icc,gcc,xcc + 'clang' '-O2' MACH=ALL OS=ALL LVL=5 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc 'gcc' '-O -fomit-frame-pointer' MACH=ALL OS=ALL LVL=5 COMPS=f77 diff -Nru atlas-3.10.2/CONFIG/src/atlconf_misc.c atlas-3.10.3/CONFIG/src/atlconf_misc.c --- atlas-3.10.2/CONFIG/src/atlconf_misc.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/atlconf_misc.c 2016-07-28 19:43:00.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1998 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -348,21 +348,30 @@ { int i; long iret=0; - for (i=0; ln[i]; i++); - if (i > 0) + double d; + + for (i=0; ln[i]; i++); /* find end of string */ + if (!i) /* empty string */ + return(0); /* returns 0 */ +/* + * Now skip any non-digit info at end of string (eg., MHz) + */ + for(i--; i > 0 && !isdigit(ln[i]); i--); + if (!isdigit(ln[i])) /* no digits in string */ + return(0); /* returns 0 */ +/* + * Go backwards until we find a non-digit to possibly end the number + */ + for (i--; i > 0 && isdigit(ln[i]); i--); + if (i > 0) /* may be more to this number */ { - for (i--; i > 0 && !isdigit(ln[i]); i--); - if (i > 0 && ln[i] == '.') /* allow skip of 1 decimal point */ - for (i--; i > 0 && !isdigit(ln[i]); i--); - } - if (i > 0 || (i == 0 && (isdigit(ln[0]) || ln[0] == '.'))) - { - double d; - while(isdigit(ln[i]) && i > 0) i--; - if (!isdigit(ln[i])) i++; - sscanf(ln+i, "%lf", &d); - iret = (int)(d+0.5); + if (ln[i] == '.') + for(i--; i > 0 && isdigit(ln[i]); i--); } + else if (ln[0] != '.' && !isdigit(ln[0])) + i = 1; + sscanf(ln+i, "%lf", &d); + iret = (int)(d+0.5); return(iret); } @@ -443,7 +452,7 @@ { char ln[128]; fprintf(fpout, "---------- PRESS ENTER TO CONTINUE ---------- "); - fgets(ln, 128, stdin); + assert(fgets(ln, 128, stdin)); } int DisplayFile(char *fnam, FILE *fpout, int nlines) @@ -563,6 +572,7 @@ else if (strstr(res, "ia64")) fam = AFIA64; else if (strstr(res, "mips")) fam = AFMIPS; else if (strstr(res, "arm")) fam = AFARM; + else if (strstr(res, "aarch64")) fam = AFARM; else if (strstr(res, "s390")) fam = AFS390; else if ( strstr(res, "i686") || strstr(res, "i586") || strstr(res, "i486") || strstr(res, "i386") || @@ -588,6 +598,7 @@ strstr(res, "x86_64") ) fam = AFX86; else if (strstr(res, "mips")) fam = AFMIPS; else if (strstr(res, "arm")) fam = AFARM; + else if (strstr(res, "aarch64")) fam = AFARM; else if (strstr(res, "s390")) fam = AFS390; free(res); } @@ -729,7 +740,7 @@ void GetGccVers(char *gcc, int *comp, int *major, int *minor, int *patch) /* - * comp: 0: gcc; 1: egcs; 2: pgcc, 3: apple's gcc + * comp: 0: gcc; 1: egcs; 2: pgcc, 3: apple's gcc, 4: clang */ { char *cmnd, *res; @@ -744,7 +755,9 @@ free(cmnd); if (res) { - if (strstr(res, "Apple Computer") || strstr(res, "Apple Inc")) + if (strstr(res, "clang") || strstr(res, "LLVM")) + *comp = 4; + else if (strstr(res, "Apple Computer") || strstr(res, "Apple Inc")) *comp = 3; /* * Skip compiler name, which may have digits in it @@ -845,6 +858,29 @@ return(0); } +int CompIsClang(char *comp) +/* + * Tries to detect if compiler is clang w/o scoping name of compiler + */ +{ + char *cmnd, *res; + char *cmpname; + int i, iret=0; + + i = strlen(comp) + 16; + cmnd = malloc(i*sizeof(char)); + assert(cmnd); + sprintf(cmnd, "%s --version", comp); + res = atlsys_1L(NULL, cmnd, 0, 0); + free(cmnd); + if (res) + { + iret = (strstr(res, "clang") || strstr(res, "LLVM")); + free(res); + } + return(iret); +} + int CompIsMinGW(char *comp) /* * Tries to detect if compiler is MinGW compiler diff -Nru atlas-3.10.2/CONFIG/src/backend/archinfo_aix.c atlas-3.10.3/CONFIG/src/backend/archinfo_aix.c --- atlas-3.10.2/CONFIG/src/backend/archinfo_aix.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/archinfo_aix.c 2016-07-28 19:43:01.000000000 +0000 @@ -69,6 +69,8 @@ mach = IbmPwr5; else if (strstr(res, "PowerPC_POWER7")) mach = IbmPwr7; + else if (strstr(res, "PowerPC_POWER8")) + mach = IbmPwr8; else if (strstr(res, "PowerPC_POWER6")) mach = IbmPwr6; else if (strstr(res, "PowerPC_POWER4")) diff -Nru atlas-3.10.2/CONFIG/src/backend/archinfo_linux.c atlas-3.10.3/CONFIG/src/backend/archinfo_linux.c --- atlas-3.10.2/CONFIG/src/backend/archinfo_linux.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/archinfo_linux.c 2016-07-28 19:43:01.000000000 +0000 @@ -77,6 +77,7 @@ else if (strstr(res, "7455")) mach = PPCG4; else if (strstr(res, "PPC970FX")) mach = PPCG5; else if (strstr(res, "PPC970MP")) mach = PPCG5; + else if (strstr(res, "POWER8")) mach = IbmPwr8; else if (strstr(res, "POWER7")) mach = IbmPwr7; else if (strstr(res, "POWER6")) mach = IbmPwr6; else if (strstr(res, "POWER5")) mach = IbmPwr5; @@ -106,14 +107,82 @@ break; case AFARM: res = atlsys_1L(NULL, "fgrep 'Processor' /proc/cpuinfo", 0, 0); + if (!res) + res = atlsys_1L(NULL, "fgrep cpu /proc/cpuinfo", 0, 0); + if (!res) + res = atlsys_1L(NULL, "uname -a", 0, 0); if (res) { - if (strstr(res, "ARMv7") || strstr(res,"v7l")) mach = ARMv7; - free(res); - } - else if ( res=atlsys_1L(NULL, "fgrep cpu /proc/cpuinfo", 0, 0) ) - { - if (strstr(res, "ARMv7") || strstr(res,"v7l")) mach = ARMv7; + if (strstr(res, "ARMv7") || strstr(res,"v7l")) + { + free(res); + res = atlsys_1L(NULL, "fgrep 'CPU part' /proc/cpuinfo", 0, 0); + if (res) + { + char *sp; + sp = strstr(res, "0x"); + if (sp) + { + int i; + if (sscanf(sp, "%x", &i) == 1) + { + i &= 0xFF; + switch(i) + { + case 7: + mach = ARM7; + break; + case 9: + mach = ARM9; + break; + case 15: + mach = ARM15; + break; + case 17: + mach = ARM17; + break; + default:; + } + } + } + } + } + else if (strstr(res, "AArch64") || strstr(res, "aarch64")) + { + free(res); + res = atlsys_1L(NULL, "fgrep 'Hardware' /proc/cpuinfo", 0, 0); + if (res && strstr(res, "X-Gene ")) + mach = ARM64xg; + else + { + if(res) + free(res); + res = atlsys_1L(NULL, "fgrep 'CPU part' /proc/cpuinfo", 0, 0); + if (res) + { + char *sp; + sp = strstr(res, "0x"); + if (sp) + { + int i; + if (sscanf(sp, "%x", &i) == 1) + { + i &= 0xFF; + switch(i) + { + case 3: + mach = ARM64a53; + break; + case 7: + mach = ARM64a57; + break; + default:; + } + } + } + } + } + } free(res); } break; @@ -139,8 +208,7 @@ { if (res[0] != '\0') { - if (strstr(res, "Itanium 2") || strstr(res, "McKinley") || - strstr(res, "IA-64")) + if (strstr(res, "Itanium 2") || strstr(res, "McKinley")) mach = IA64Itan2; else if (strstr(res, "Itanium")) mach = IA64Itan; } @@ -193,6 +261,8 @@ mach = IntCorei3; else if (strstr(res, "2600") || strstr(res, "3770")) mach = IntCorei2; + else if (strstr(res, "6700")) + mach = IntCorei4; else mach = IntCorei1; } @@ -263,7 +333,10 @@ if (strstr(res, "2094") || strstr(res, "2096")) mach = IbmZ9; else if (strstr(res, "2097") || strstr(res, "2098")) mach = IbmZ10; /* we consider anything else to be a z196 or later */ - else mach = IbmZ196; /* looks risky to me, but IBM folks did it */ + else if (strstr(res, "2817") || strstr(res, "2818")) mach = IbmZ196; + else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12; + else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13; + else mach = IbmZ13; /* looks risky to me, but IBM folks did it */ free(res); } break; @@ -390,6 +463,67 @@ free(res); } } + if (!mhz) + { + res = atlsys_1L(NULL, "cpufreq-info -f -m", 0, 0); + if (res) + { + char *sp; + sp = strstr(res, "Hz"); + if (sp) + { + double dmhz; + dmhz = GetFirstDouble(res); + if (dmhz != 0.0) + { + switch(sp[-1]) + { + case 'G': + mhz = dmhz / 1000; + break; + case ' ': + mhz = dmhz / 1000000; + break; + case 'K': + mhz = dmhz / 1000; + break; + case 'M': + default: + mhz = dmhz; + break; + } + } + } + free(res); + } + } + if (!mhz) + { + res = atlsys_1L(NULL, "cpufreq-info -f", 0, 0); + if (res) + { + mhz = GetLastInt(res); /* assumes clock speed given in KHz */ + free(res); + if (mhz) + mhz = mhz / 1000; + } + } +/* + * RCW: Patch supplied by IBM. I have no idea why it is written this way. + * S390 CPU speed is in bogomips rather than mhz + */ + if (!mhz) + { + res = atlsys_1L(NULL, "cat /proc/cpuinfo | fgrep bogomips",0,0); + if (res) + { + double result = GetFirstDouble(res); + result = (result*100.0)/88.0; /* RCW: WTF? */ + /*if (result > 5000) result = 5000;*/ + mhz = result; + free(res); + } + } return(mhz); } diff -Nru atlas-3.10.2/CONFIG/src/backend/archinfo_win.c atlas-3.10.3/CONFIG/src/backend/archinfo_win.c --- atlas-3.10.2/CONFIG/src/backend/archinfo_win.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/archinfo_win.c 2016-07-28 19:43:01.000000000 +0000 @@ -145,6 +145,67 @@ free(res); } } + if (!mhz) + { + res = atlsys_1L(NULL, "cpufreq-info -f -m", 0, 0); + if (res) + { + char *sp; + sp = strstr(res, "Hz"); + if (sp) + { + double dmhz; + dmhz = GetFirstDouble(res); + if (dmhz != 0.0) + { + switch(sp[-1]) + { + case 'G': + mhz = dmhz / 1000; + break; + case ' ': + mhz = dmhz / 1000000; + break; + case 'K': + mhz = dmhz / 1000; + break; + case 'M': + default: + mhz = dmhz; + break; + } + } + } + free(res); + } + } + if (!mhz) + { + res = atlsys_1L(NULL, "cpufreq-info -f", 0, 0); + if (res) + { + mhz = GetLastInt(res); /* assumes clock speed given in KHz */ + free(res); + if (mhz) + mhz = mhz / 1000; + } + } +/* + * RCW: Patch supplied by IBM. I have no idea why it is written this way. + * S390 CPU speed is in bogomips rather than mhz + */ + if (!mhz) + { + res = atlsys_1L(NULL, "cat /proc/cpuinfo | fgrep bogomips",0,0); + if (res) + { + double result = GetFirstDouble(res); + result = (result*100.0)/88.0; /* RCW: WTF? */ + /*if (result > 5000) result = 5000;*/ + mhz = result; + free(res); + } + } return(mhz); } diff -Nru atlas-3.10.2/CONFIG/src/backend/archinfo_x86.c atlas-3.10.3/CONFIG/src/backend/archinfo_x86.c --- atlas-3.10.2/CONFIG/src/backend/archinfo_x86.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/archinfo_x86.c 2016-07-28 19:43:01.000000000 +0000 @@ -335,12 +335,17 @@ case 0x2D: case 0x3A: case 0x2A: + case 0x3E: iret = IntCorei2; break; case 0x45: case 0x3C: + case 0x3F: /* really i3EP, but same for stable */ iret = IntCorei3; break; + case 94: + iret = IntCorei4; + break; default: iret = MACHOther; } diff -Nru atlas-3.10.2/CONFIG/src/backend/cpuid.S atlas-3.10.3/CONFIG/src/backend/cpuid.S --- atlas-3.10.2/CONFIG/src/backend/cpuid.S 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/cpuid.S 2016-07-28 19:43:01.000000000 +0000 @@ -25,6 +25,26 @@ movq -8(%rsp), %rbx ret +#elif defined(ATL_GAS_WOW64) + sub $16, %rsp + movq %rbx, (%rsp) + movq %rcx, %r8 /* r8=out */ +# +# Load input reg, and call cpuid +# + movl %edx, %eax + cpuid +# +# Fill in array (out) entries +# + movl %eax, (%r8) + movl %ebx, 4(%r8) + movl %ecx, 8(%r8) + movl %edx, 12(%r8) + + movq (%rsp), %rbx + add $16, %rsp + ret #else # # Prologue diff -Nru atlas-3.10.2/CONFIG/src/backend/hardfptst.S atlas-3.10.3/CONFIG/src/backend/hardfptst.S --- atlas-3.10.2/CONFIG/src/backend/hardfptst.S 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/hardfptst.S 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,17 @@ +#define ATL_GAS_ARM +#include "atlas_asm.h" +.code 32 +.text +.align 2 +.globl ATL_asmdecor(hardfptst) +.type ATL_asmdecor(hardfptst), %function +/* + * float hardfptst(float f0, float f1, float f2); + * RETURNS: f0 + f1*f2, assuming HARDFP ABI + */ +ATL_asmdecor(hardfptst): + fmuls s1, s1, s2 + fadds s0, s0, s1 + bx lr +.size ATL_asmdecor(hardfptst),.-ATL_asmdecor(hardfptst) + diff -Nru atlas-3.10.2/CONFIG/src/backend/Make.ext atlas-3.10.3/CONFIG/src/backend/Make.ext --- atlas-3.10.2/CONFIG/src/backend/Make.ext 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/Make.ext 2016-07-28 19:43:01.000000000 +0000 @@ -27,20 +27,28 @@ archinfo_sfu.c archinfo_sunos.c archinfo_win.c archinfo_x86.c \ c2cmaster.c c2cslave.c comptestC.c comptestF.f cpuid.S f2cintC.c \ f2cintF.f f2cnameC.c f2cnameF.f f2cstrC.c f2cstrF.f flibchkC.c \ - flibchkF.f probe_3DNow.S probe_AVX.S probe_AVXFMA4.S probe_AVXMAC.S \ - probe_AltiVec.S probe_NEON.S probe_SSE1.S probe_SSE2.S probe_SSE3.S \ - probe_VSX.S probe_aff_BINDP.c probe_aff_CPUSET.c probe_aff_PBIND.c \ + flibchkF.f hardfptst.S probe_3DNow.S probe_AVX.S probe_AVXFMA4.S \ + probe_AVXMAC.S probe_AltiVec.S probe_FPV3D16MAC.S \ + probe_FPV3D16MACNEON.S probe_FPV3D32MAC.S probe_FPV3D32MACNEON.S \ + probe_NEON.S probe_SSE1.S probe_SSE2.S probe_SSE3.S probe_VSX.S \ + probe_aff_BINDP.c probe_aff_CPUSET.c probe_aff_PBIND.c \ probe_aff_PLPA.c probe_aff_RUNON.c probe_aff_SCHED.c \ probe_aff_SETAFFNP.c probe_aff_SETPROCNP.c probe_aff_WIN.c \ - probe_aff_WIN64.c probe_dAVX.c probe_dSSE3.c probe_dvec.c \ - probe_gas_arm.S probe_gas_mips.S probe_gas_parisc.S probe_gas_ppc.S \ - probe_gas_s390.S probe_gas_sparc.S probe_gas_x8632.S \ - probe_gas_x8664.S probe_svec.c probe_this_asm.c + probe_aff_WIN64.c probe_arm32_FPABI.c probe_dAVX.c probe_dSSE3.c \ + probe_dmac.c probe_dvec.c probe_gas_arm.S probe_gas_arm64.S \ + probe_gas_mips.S probe_gas_parisc.S probe_gas_ppc.S probe_gas_s390.S \ + probe_gas_sparc.S probe_gas_wow64.S probe_gas_x8632.S \ + probe_gas_x8664.S probe_smac.c probe_svec.c probe_this_asm.c \ + probe_vxz.c all : $(files) Make.ext : $(topd)/make.base $(extM) -b $(topd)/make.base -o Make.ext rout=atlas/config/src/backend +probe_gas_wow64.S : $(basf) + $(extC) -b $(basf) -o probe_gas_wow64.S rout=probe_gas_wow64.S +hardfptst.S : $(basf) + $(extC) -b $(basf) -o hardfptst.S rout=hardfptst.S probe_gas_x8664.S : $(basf) $(extC) -b $(basf) -o probe_gas_x8664.S rout=probe_gas_x8664.S probe_gas_x8632.S : $(basf) @@ -55,6 +63,8 @@ $(extC) -b $(basf) -o probe_gas_mips.S rout=probe_gas_mips.S probe_gas_arm.S : $(basf) $(extC) -b $(basf) -o probe_gas_arm.S rout=probe_gas_arm.S +probe_gas_arm64.S : $(basf) + $(extC) -b $(basf) -o probe_gas_arm64.S rout=probe_gas_arm64.S probe_gas_s390.S : $(basf) $(extC) -b $(basf) -o probe_gas_s390.S rout=probe_gas_s390.S probe_AVXMAC.S : $(basf) @@ -77,6 +87,14 @@ $(extC) -b $(basf) -o probe_VSX.S rout=probe_VSX.S probe_NEON.S : $(basf) $(extC) -b $(basf) -o probe_NEON.S rout=probe_NEON.S +probe_FPV3D32MACNEON.S : $(basf) + $(extC) -b $(basf) -o probe_FPV3D32MACNEON.S rout=probe_FPV3D32MACNEON.S +probe_FPV3D16MACNEON.S : $(basf) + $(extC) -b $(basf) -o probe_FPV3D16MACNEON.S rout=probe_FPV3D16MACNEON.S +probe_FPV3D32MAC.S : $(basf) + $(extC) -b $(basf) -o probe_FPV3D32MAC.S rout=probe_FPV3D32MAC.S +probe_FPV3D16MAC.S : $(basf) + $(extC) -b $(basf) -o probe_FPV3D16MAC.S rout=probe_FPV3D16MAC.S f2cnameF.f : $(basf) $(extF) -b $(basf) -o f2cnameF.f rout=f2cnameF.f f2cintF.f : $(basf) @@ -87,6 +105,10 @@ $(extF) -b $(basf) -o comptestF.f rout=comptestF.f flibchkF.f : $(basf) $(extF) -b $(basf) -o flibchkF.f rout=flibchkF.f +probe_arm32_FPABI.c : $(basf) + $(extC) -b $(basf) -o probe_arm32_FPABI.c rout=probe_arm32_FPABI +probe_vxz.c : $(basf) + $(extC) -b $(basf) -o probe_vxz.c rout=probe_vxz probe_aff_SETAFFNP.c : $(basf) $(extC) -b $(basf) -o probe_aff_SETAFFNP.c rout=probe_aff_SETAFFNP probe_aff_SETPROCNP.c : $(basf) @@ -113,6 +135,10 @@ $(extC) -b $(basf) -o probe_svec.c rout=probe_svec probe_dvec.c : $(basf) $(extC) -b $(basf) -o probe_dvec.c rout=probe_dvec +probe_dmac.c : $(basf) + $(extC) -b $(basf) -o probe_dmac.c rout=probe_dmac +probe_smac.c : $(basf) + $(extC) -b $(basf) -o probe_smac.c rout=probe_smac probe_dSSE3.c : $(basf) $(extC) -b $(basf) -o probe_dSSE3.c rout=probe_dSSE3 probe_dAVX.c : $(basf) diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_aff_SETAFFNP.c atlas-3.10.3/CONFIG/src/backend/probe_aff_SETAFFNP.c --- atlas-3.10.2/CONFIG/src/backend/probe_aff_SETAFFNP.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_aff_SETAFFNP.c 2016-07-28 19:43:01.000000000 +0000 @@ -16,7 +16,7 @@ { pthread_attr_t attr; pthread_t thr; - cpu_set_t cpuset; + cpu_set_t *cpuset; void *vp, *vpret; if (nargs > 1) @@ -30,9 +30,15 @@ assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE)); #endif pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); /* OK to fail */ - CPU_ZERO(&cpuset); - CPU_SET(rank, &cpuset); - assert(!pthread_attr_setaffinity_np(&attr, sizeof(cpuset), &cpuset)); +/* + * On POWER8/Linux, pthread_attr_setaffinity_np sometimes reallocs() the + * cpuset variable, thus it must be malloced and not taken from stack! + */ + cpuset = malloc(sizeof(cpu_set_t)); + CPU_ZERO(cpuset); + CPU_SET(rank, cpuset); + assert(!pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t),cpuset)); + free(cpuset); assert(!pthread_create(&thr, &attr, DumbTest, vp)); assert(!pthread_attr_destroy(&attr)); assert(!pthread_join(thr, &vpret)); diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_AltiVec.S atlas-3.10.3/CONFIG/src/backend/probe_AltiVec.S --- atlas-3.10.2/CONFIG/src/backend/probe_AltiVec.S 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_AltiVec.S 2016-07-28 19:43:01.000000000 +0000 @@ -5,7 +5,9 @@ * RETURNS: z = x + y * */ +#ifndef ATL_OS_AIX /* pwr8/AIX doesn't know .text anymore */ .text +#endif #if defined(ATL_USE64BITS) && defined (ATL_OS_Linux) && _CALL_ELF != 2 .align 2 .globl ATL_asmdecor(do_vsum) diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_arm32_FPABI.c atlas-3.10.3/CONFIG/src/backend/probe_arm32_FPABI.c --- atlas-3.10.2/CONFIG/src/backend/probe_arm32_FPABI.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_arm32_FPABI.c 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,17 @@ +#include +#define STOMP_FPREGS __asm__ __volatile__ ( \ + "mov r0, #0 ; fmsr s0, r0 ; fcpys s1, s0 ; fcpys s2, s0" \ + : /* no output */ \ + : /* no input */ \ + : "r0", "s0", "s1", "s2" /* clobbered */ \ +) +int main(int nargs, char **args) +{ + float hardfptst(float f0, float f1, float f2); + STOMP_FPREGS; + if (hardfptst(-2.0, 2.0, 4.0) == 6.0) + printf("FPABI='HARDFP'\n"); + else + printf("FPABI='SOFTFP'\n"); + return(0); +} diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_AVXFMA4.S atlas-3.10.3/CONFIG/src/backend/probe_AVXFMA4.S --- atlas-3.10.2/CONFIG/src/backend/probe_AVXFMA4.S 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_AVXFMA4.S 2016-07-28 19:43:01.000000000 +0000 @@ -7,12 +7,21 @@ .text .globl ATL_asmdecor(do_vmacc) ATL_asmdecor(do_vmacc): -#ifdef ATL_GAS_x8664 - vmovupd 0(%rdi), %ymm0 /* load z */ - vmovupd 0(%rsi), %ymm1 /* load x */ - vmovupd 0(%rdx), %ymm2 /* load y */ +#if defined(ATL_GAS_x8664) || defined(ATL_GAS_WOW64) + #ifdef ATL_GAS_x8664 + #define Z %rdi + #define X %rsi + #define Y %rdx + #else + #define Z %rcx + #define X %rdx + #define Y %r8 + #endif + vmovupd (Z), %ymm0 /* load z */ + vmovupd (X), %ymm1 /* load x */ + vmovupd (Y), %ymm2 /* load y */ vfmaddpd %ymm0, %ymm1, %ymm2, %ymm0 /* z += x*y */ - vmovupd %ymm0, 0(%rdi) /* store z */ + vmovupd %ymm0, (Z) /* store z */ ret #else movl 12(%esp), %eax diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_AVXMAC.S atlas-3.10.3/CONFIG/src/backend/probe_AVXMAC.S --- atlas-3.10.2/CONFIG/src/backend/probe_AVXMAC.S 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_AVXMAC.S 2016-07-28 19:43:01.000000000 +0000 @@ -7,12 +7,21 @@ .text .globl ATL_asmdecor(do_vmacc) ATL_asmdecor(do_vmacc): -#ifdef ATL_GAS_x8664 - vmovupd 0(%rdi), %ymm0 /* load z */ - vmovupd 0(%rsi), %ymm1 /* load x */ - vmovupd 0(%rdx), %ymm2 /* load y */ +#if defined(ATL_GAS_x8664) || defined(ATL_GAS_WOW64) + #ifdef ATL_GAS_x8664 + #define Z %rdi + #define X %rsi + #define Y %rdx + #else + #define Z %rcx + #define X %rdx + #define Y %r8 + #endif + vmovupd (Z), %ymm0 /* load z */ + vmovupd (X), %ymm1 /* load x */ + vmovupd (Y), %ymm2 /* load y */ vfmadd231pd %ymm2, %ymm1, %ymm0 /* z += x*y */ - vmovupd %ymm0, 0(%rdi) /* store z */ + vmovupd %ymm0, (Z) /* store z */ ret #else movl 12(%esp), %eax diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_AVX.S atlas-3.10.3/CONFIG/src/backend/probe_AVX.S --- atlas-3.10.2/CONFIG/src/backend/probe_AVX.S 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_AVX.S 2016-07-28 19:43:01.000000000 +0000 @@ -7,13 +7,22 @@ .text .globl ATL_asmdecor(do_vmacc) ATL_asmdecor(do_vmacc): -#ifdef ATL_GAS_x8664 - vmovupd 0(%rdi), %ymm0 /* load z */ - vmovupd 0(%rsi), %ymm1 /* load x */ - vmovupd 0(%rdx), %ymm2 /* load y */ +#if defined(ATL_GAS_x8664) || defined(ATL_GAS_WOW64) + #ifdef ATL_GAS_x8664 + #define Z %rdi + #define X %rsi + #define Y %rdx + #else + #define Z %rcx + #define X %rdx + #define Y %r8 + #endif + vmovupd (Z), %ymm0 /* load z */ + vmovupd (X), %ymm1 /* load x */ + vmovupd (Y), %ymm2 /* load y */ vmulpd %ymm1, %ymm2, %ymm3 vaddpd %ymm3, %ymm0, %ymm0 - vmovupd %ymm0, 0(%rdi) /* store z */ + vmovupd %ymm0, (Z) /* store z */ ret #else movl 12(%esp), %eax diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_dmac.c atlas-3.10.3/CONFIG/src/backend/probe_dmac.c --- atlas-3.10.2/CONFIG/src/backend/probe_dmac.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_dmac.c 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,18 @@ +#include +#include +int main(int nargs, char **args) +{ + double x=2.0, y=4.0, z=(-8.0), ans; + void do_fmac(double*, double*, double*); + + ans = z + x*y; + do_fmac(&z, &x, &y); + if (z != ans) + { + fprintf(stderr, "wanted=%.2f, got=%.2f\n", ans, z); + printf("FAILURE\n"); + exit(1); + } + printf("SUCCESS\n"); + return(0); +} diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_FPV3D16MACNEON.S atlas-3.10.3/CONFIG/src/backend/probe_FPV3D16MACNEON.S --- atlas-3.10.2/CONFIG/src/backend/probe_FPV3D16MACNEON.S 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_FPV3D16MACNEON.S 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,21 @@ +#include "atlas_asm.h" +/* + ARM NEON assembler for: + r0 r1 r2 + void do_vsum(float* z, float* x,float* y) + where x, y, and z are vectors of length 4 + RETURNS: z = x + y +*/ +.code 32 +.fpu neon +.text +.align 2 +.globl ATL_asmdecor(do_vsum) +.type ATL_asmdecor(do_vsum), %function +ATL_asmdecor(do_vsum): + vldm r1, {d0-d1} + vldm r2, {d2-d3} + vadd.f32 d0, d0, d2 + vadd.f32 d1, d1, d3 + vstm r0, {d0-d1} +.size ATL_asmdecor(do_vsum),.-ATL_asmdecor(do_vsum) diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_FPV3D16MAC.S atlas-3.10.3/CONFIG/src/backend/probe_FPV3D16MAC.S --- atlas-3.10.2/CONFIG/src/backend/probe_FPV3D16MAC.S 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_FPV3D16MAC.S 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,21 @@ +#include "atlas_asm.h" +/* + ARM vfpv3 assembler for: + r0 r1 r2 + void do_fmac(double* z, double* x,double* y) + RETURNS: *z += *x * *y +*/ +.code 32 +.fpu vfpv3 +.text +.align 2 +.globl ATL_asmdecor(do_fmac) +.type ATL_asmdecor(do_fmac), %function +ATL_asmdecor(do_fmac): + fldd d0, [r0] + fldd d1, [r1] + fldd d2, [r2] + fmacd d0, d1, d2 + fstd d0, [r0] + bx lr +.size ATL_asmdecor(do_fmac),.-ATL_asmdecor(do_fmac) diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_FPV3D32MACNEON.S atlas-3.10.3/CONFIG/src/backend/probe_FPV3D32MACNEON.S --- atlas-3.10.2/CONFIG/src/backend/probe_FPV3D32MACNEON.S 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_FPV3D32MACNEON.S 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,21 @@ +#include "atlas_asm.h" +/* + ARM NEON assembler for: + r0 r1 r2 + void do_vsum(float* z, float* x,float* y) + where x, y, and z are vectors of length 4 + RETURNS: z = x + y +*/ +.code 32 +.fpu neon +.text +.align 2 +.globl ATL_asmdecor(do_vsum) +.type ATL_asmdecor(do_vsum), %function +ATL_asmdecor(do_vsum): + vldm r1, {d16-d17} + vldm r2, {d18-d19} + vadd.f32 d16, d16, d18 + vadd.f32 d17, d17, d19 + vstm r0, {d16-d17} +.size ATL_asmdecor(do_vsum),.-ATL_asmdecor(do_vsum) diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_FPV3D32MAC.S atlas-3.10.3/CONFIG/src/backend/probe_FPV3D32MAC.S --- atlas-3.10.2/CONFIG/src/backend/probe_FPV3D32MAC.S 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_FPV3D32MAC.S 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,21 @@ +#include "atlas_asm.h" +/* + ARM vfpv3 assembler for: + r0 r1 r2 + void do_fmac(double* z, double* x,double* y) + RETURNS: *z += *x * *y +*/ +.code 32 +.fpu vfpv3 +.text +.align 2 +.globl ATL_asmdecor(do_fmac) +.type ATL_asmdecor(do_fmac), %function +ATL_asmdecor(do_fmac): + fldd d31, [r0] + fldd d30, [r1] + fldd d29, [r2] + fmacd d31, d30, d29 + fstd d31, [r0] + bx lr +.size ATL_asmdecor(do_fmac),.-ATL_asmdecor(do_fmac) diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_gas_arm64.S atlas-3.10.3/CONFIG/src/backend/probe_gas_arm64.S --- atlas-3.10.2/CONFIG/src/backend/probe_gas_arm64.S 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_gas_arm64.S 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,14 @@ +#include "atlas_asm.h" +# +# Linux ARM assembly for: +# int asm_probe(int i) +# RETURNS: i*3 +# + +.text +.globl ATL_asmdecor(asm_probe) +.type ATL_asmdecor(asm_probe), %function +ATL_asmdecor(asm_probe): + add x0, x0, x0, LSL #1 + ret +.size ATL_asmdecor(asm_probe),.-ATL_asmdecor(asm_probe) diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_gas_wow64.S atlas-3.10.3/CONFIG/src/backend/probe_gas_wow64.S --- atlas-3.10.2/CONFIG/src/backend/probe_gas_wow64.S 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_gas_wow64.S 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,11 @@ +#include "atlas_asm.h" +# WOW64 assembler for: +# int asm_probe(int i) +# RETURNS: i*3 +# +.text +.global ATL_asmdecor(asm_probe) +ATL_asmdecor(asm_probe): + xor %rax, %rax + lea (%ecx, %ecx,2), %eax + ret diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_smac.c atlas-3.10.3/CONFIG/src/backend/probe_smac.c --- atlas-3.10.2/CONFIG/src/backend/probe_smac.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_smac.c 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,18 @@ +#include +#include +int main(int nargs, char **args) +{ + float x=2.0, y=4.0, z=(-8.0), ans; + void do_fmac(float*, float*, float*); + + ans = z + x*y; + do_fmac(&z, &x, &y); + if (z != ans) + { + fprintf(stderr, "wanted=%.2f, got=%.2f\n", ans, z); + printf("FAILURE\n"); + exit(1); + } + printf("SUCCESS\n"); + return(0); +} diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_SSE1.S atlas-3.10.3/CONFIG/src/backend/probe_SSE1.S --- atlas-3.10.2/CONFIG/src/backend/probe_SSE1.S 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_SSE1.S 2016-07-28 19:43:01.000000000 +0000 @@ -13,6 +13,12 @@ addps %xmm1, %xmm0 movups %xmm0, 0(%rdi) ret +#elif defined(ATL_GAS_WOW64) + movups 0(%rdx), %xmm0 + movups 0(%r8), %xmm1 + addps %xmm1, %xmm0 + movups %xmm0, 0(%rcx) + ret #else movl 12(%esp), %eax movups 0(%eax), %xmm0 diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_SSE2.S atlas-3.10.3/CONFIG/src/backend/probe_SSE2.S --- atlas-3.10.2/CONFIG/src/backend/probe_SSE2.S 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_SSE2.S 2016-07-28 19:43:01.000000000 +0000 @@ -13,6 +13,12 @@ addpd %xmm1, %xmm0 movupd %xmm0, 0(%rdi) ret +#elif defined(ATL_GAS_WOW64) + movupd 0(%rdx), %xmm0 + movupd 0(%r8), %xmm1 + addpd %xmm1, %xmm0 + movupd %xmm0, 0(%rcx) + ret #else movl 12(%esp), %eax movupd 0(%eax), %xmm0 diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_SSE3.S atlas-3.10.3/CONFIG/src/backend/probe_SSE3.S --- atlas-3.10.2/CONFIG/src/backend/probe_SSE3.S 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_SSE3.S 2016-07-28 19:43:01.000000000 +0000 @@ -13,6 +13,12 @@ haddpd %xmm1, %xmm0 movupd %xmm0, 0(%rdi) ret +#elif defined(ATL_GAS_WOW64) + movupd 0(%rdx), %xmm0 + movupd 0(%r8), %xmm1 + haddpd %xmm1, %xmm0 + movupd %xmm0, 0(%rcx) + ret #else movl 12(%esp), %eax movupd 0(%eax), %xmm0 diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_VSX.S atlas-3.10.3/CONFIG/src/backend/probe_VSX.S --- atlas-3.10.2/CONFIG/src/backend/probe_VSX.S 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_VSX.S 2016-07-28 19:43:01.000000000 +0000 @@ -5,7 +5,9 @@ * RETURNS: z = x + y * */ +#ifndef ATL_OS_AIX /* pwr8/AIX doesn't know .text anymore */ .text +#endif #if defined(ATL_USE64BITS) && defined (ATL_OS_Linux) && _CALL_ELF != 2 .align 2 .globl ATL_asmdecor(do_vsum) diff -Nru atlas-3.10.2/CONFIG/src/backend/probe_vxz.c atlas-3.10.3/CONFIG/src/backend/probe_vxz.c --- atlas-3.10.2/CONFIG/src/backend/probe_vxz.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/backend/probe_vxz.c 2016-07-28 19:43:01.000000000 +0000 @@ -0,0 +1,12 @@ +#include +void do_vsum(double *z, double *x, double *y) // RETURNS: z = x + y +{ + vector double vx, vy; + vx[0] = x[0]; + vx[1] = x[1]; + vy[0] = y[0]; + vy[1] = y[1]; + vy += vx; + z[0] = vy[0]; + z[1] = vy[1]; +} diff -Nru atlas-3.10.2/CONFIG/src/config.c atlas-3.10.3/CONFIG/src/config.c --- atlas-3.10.2/CONFIG/src/config.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/config.c 2016-07-28 19:43:00.000000000 +0000 @@ -826,7 +826,7 @@ assert(!system("make xspew")); compsflags = Comps2Flags(comps); /* Xlate comp/flag array to xspew flags */ - i = strlen(frm) + 11*13 + strlen(srcdir) + strlen(bindir); + i = strlen(frm) + 11*14 + strlen(srcdir) + strlen(bindir); if (cdefs) i += strlen(cdefs); if (f2cdefs) @@ -1019,7 +1019,6 @@ *flush = 0; *ptrbits = 0; *mhz = 0; - *mach = 0; *vec = 0; *asmb = 0; *OS = 0; @@ -1053,6 +1052,19 @@ } } break; + case 'A': + if (++i >= nargs) + PrintUsage(args[0], i, "out of arguments"); + if(args[i][0] >= '0' && args[i][0] <= '9') /* giving a # */ + *mach = atoi(args[i]); + else /* giving a architecture name */ + { + for (k=1; k < NMACH; k++) + if (!strcmp(args[i], machnam[k])) + break; + *mach = (k == NMACH) ? 0 : k; + } + break; case 'f': if (++i >= nargs) PrintUsage(args[0], i, "out of arguments"); @@ -1068,11 +1080,6 @@ PrintUsage(args[0], i, "out of arguments"); *mhz = atoi(args[i]); break; - case 'A': - if (++i >= nargs) - PrintUsage(args[0], i, "out of arguments"); - *mach = atoi(args[i]); - break; case 'V': if (++i >= nargs) PrintUsage(args[0], i, "out of arguments"); @@ -1129,6 +1136,8 @@ *NoF77 = k; else if (!strcmp(sp0, "nocygwin")) *NoCygwin = k; + else if (!strcmp(sp0, "cripple-perf")) + *ThrChk = !k; else if (!strcmp(sp0, "kern")) gcc3 = sp; else if (!strcmp(sp0, "ADdir") || !strcmp(sp0, "addir")) @@ -1219,7 +1228,10 @@ { if (++i >= nargs) PrintUsage(args[0], i, "out of arguments"); - *gccflags = args[i]; + if (args[i-2][1] == 'C') + comps[GCC_] = args[i]; + else + *gccflags = args[i]; } else { @@ -1277,7 +1289,7 @@ int main(int nargs, char **args) { enum OSTYPE OS; - enum MACHTYPE mach; + enum MACHTYPE mach=MACHOther; int i, verb, asmb, f2cname, f2cint, f2cstr, ncpu, nof77, nocygwin; int thrchk, mhz, omp, AntThr, lapackref; int j, k, h, vecexts; @@ -1303,6 +1315,20 @@ asmb = ProbeAsm(verb, targarg, OS); else if (asmb < 0) asmb = 0; + if (asmb == gas_arm) + { + char defs[32], *sp; + sprintf(defs, "-DATL_OS_%s", osnam[OS]); + sp = GetStrProbe(verb, defs, "ARM_HARDFP", "FPABI"); + if (sp) + { + if (!strcmp(sp, "HARDFP")) + cdefs = NewAppendedString(cdefs, "-DATL_ARM_HARDFP=1"); + else if (!strcmp(sp, "SOFTFP")) + cdefs = NewAppendedString(cdefs, "-DATL_ARM_SOFTFP=1"); + free(sp); + } + } if (!vecexts) vecexts = ProbeVecs(verb, targarg, OS, asmb); else if (vecexts < 0) @@ -1317,36 +1343,51 @@ pmake = ProbePmake(verb, OS, ncpu); if (ptrbits == 0) { - if (asmb == gas_x86_64) + if (asmb == gas_x86_64 || asmb == gas_wow64) ptrbits = 64; else ptrbits = ProbePtrbits(verb, targarg, OS, asmb); } if (ProbeCPUThrottle(verb, targarg, OS, asmb)) { + char ln[64]; fprintf(stderr, "It appears you have cpu throttling enabled, which makes timings\n"); fprintf(stderr, - "unreliable and an ATLAS install nonsensical. Aborting.\n"); - fprintf(stderr, - "See ATLAS/INSTALL.txt for further information\n"); - if (thrchk) exit(1); - else fprintf(stderr, "Ignoring CPU throttling by user override!\n\n"); + "unreliable and an ATLAS install nonsensical.\n\n"); + + fprintf + (stderr, +"OS-controlled CPU throttling is so course grained, that timings become\n" +"essentially random. What this means for an ATLAS install is that ATLAS\n" +"cannot tell the difference between a good and bad kernel, and so the\n" +"tuning step may result in arbitrarily bad performance. If you don't care\n" +"about performance, you are usually better off just using the reference BLAS.\n" +"\nIf you fear overheating, setting clock speed to some lower, constant\n" +"value should give you a decent install.\n\n" +"Hardware-controlled throttling is usually much finer grained, and therefore\n" +"may result in mediocre tuning, but this will depend quite bit on luck.\n\n" +"If your machine has OS throttling enabled, it is critical that you disable\n" +"it (with something like cpufreq-set). See INSTALL.txt for details.\n\n" +"If you you do not care at all about performance, you can rerun configure\n" +"with --cripple-atlas-performance to proceed in the face of throttling.\n" +"Do not do this unless you really don't care about performance.\n" +"If you are able to turn off throttling, rerun configure as normal\n" +"once you have done so.\n\n" + ); + if (thrchk) + { + fprintf(stderr, "Aborting due to throttling\n"); + exit(1); + } } /* * Override 32/64 bit assembler if asked */ - if (asmb == gas_x86_64 && ptrbits == 32) + if ((asmb == gas_x86_64 || asmb == gas_wow64) && ptrbits == 32) asmb = gas_x86_32; else if (asmb == gas_x86_32 && ptrbits == 64) asmb = gas_x86_64; -/* - * Now that we've detected architecture stuff, kill assembly dialect for - * 64-bit install of Win64, since we can't use our normal AMD64 assembly - * there due to incompatable ABI - */ - if (OS == OSWin64 && ptrbits == 64) - asmb = ASM_None; sp = ProbeComp(verb, targarg, OS, mach, comps, nof77, nocygwin, ptrbits, vecexts); diff -Nru atlas-3.10.2/CONFIG/src/IsGcc.c atlas-3.10.3/CONFIG/src/IsGcc.c --- atlas-3.10.2/CONFIG/src/IsGcc.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/IsGcc.c 2016-07-28 19:43:00.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/CONFIG/src/Makefile atlas-3.10.3/CONFIG/src/Makefile --- atlas-3.10.2/CONFIG/src/Makefile 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/Makefile 2016-07-28 19:43:00.000000000 +0000 @@ -145,12 +145,45 @@ # Machine-level probes run on machine lib being compiled for # These probes do not call system, but actually do something # +IRun_ARM_HARDFP : + $(CC) $(CCFLAGS) -o xprobe_hardfp \ + $(SRCdir)/backend/probe_arm32_FPABI.c \ + $(SRCdir)/backend/hardfptst.S + $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_hardfp args="$(args)" \ + redir=config0.out + - cat config0.out IRun_NEON : $(CC) $(CCFLAGS) -mfpu=neon -o xprobe_neon \ $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_NEON.S $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_neon args="$(args)" \ redir=config0.out - cat config0.out +IRun_VXZ : + $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz \ + $(SRCdir)/backend/probe_dvec.c $(SRCdir)/backend/probe_vxz.c + $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_vxz args="$(args)" \ + redir=config0.out + - cat config0.out +IRun_FPV3D16MAC : + $(CC) $(CCFLAGS) -mfpu=vfpv3-d16 -o xprobe_fpv3d16mac $(SRCdir)/backend/probe_dmac.c $(SRCdir)/backend/probe_FPV3D16MAC.S + $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_fpv3d16mac args="$(args)" \ + redir=config0.out + - cat config0.out +IRun_FPV3D32MAC : + $(CC) $(CCFLAGS) -mfpu=vfpv3 -o xprobe_fpv3d32mac $(SRCdir)/backend/probe_dmac.c $(SRCdir)/backend/probe_FPV3D32MAC.S + $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_fpv3d32mac args="$(args)" \ + redir=config0.out + - cat config0.out +IRun_FPV3D16MACNEON : + $(CC) $(CCFLAGS) -mfpu=neon-fp16 -o xprobe_fpv3d16macneon $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_FPV3D16MACNEON.S + $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_fpv3d16macneon args="$(args)" \ + redir=config0.out + - cat config0.out +IRun_FPV3D32MACNEON : + $(CC) $(CCFLAGS) -mfpu=neon -o xprobe_fpv3d32macneon $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_FPV3D32MACNEON.S + $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_fpv3d32macneon args="$(args)" \ + redir=config0.out + - cat config0.out IRun_3DNow : $(CC) $(CCFLAGS) -o xprobe_3dnow $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_3DNow.S $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_3dnow args="$(args)" \ @@ -265,6 +298,11 @@ $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_gas_s390 args="$(args)" \ redir=config0.out - cat config0.out +IRun_GAS_ARM64 : + $(CC) $(CCFLAGS) -o xprobe_gas_arm64 $(SRCdir)/backend/probe_this_asm.c $(SRCdir)/backend/probe_gas_arm64.S + $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_gas_arm64 args="$(args)" \ + redir=config0.out + - cat config0.out IRun_GAS_ARM : $(CC) $(CCFLAGS) -o xprobe_gas_arm $(SRCdir)/backend/probe_this_asm.c $(SRCdir)/backend/probe_gas_arm.S $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_gas_arm args="$(args)" \ @@ -296,6 +334,13 @@ redir=config0.out - cat config0.out +IRun_GAS_WOW64 : + $(CC) $(CCFLAGS) -DATL_USE64BITS=1 -o xprobe_gas_wow64 \ + $(SRCdir)/backend/probe_this_asm.c $(SRCdir)/backend/probe_gas_wow64.S + $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_gas_wow64 args="$(args)" \ + redir=config0.out + - cat config0.out + IRunC2C : - rm -f config0.out xc2c c2cslave.o $(CC1) $(CC1FLAGS) -c $(SRCdir)/backend/c2cslave.c @@ -430,6 +475,9 @@ xprobe_x8684 : $(SRCdir)/backend/probe_this_asm.c $(SRCdir)/backend/probe_gas_x8684.S $(CC) $(CCFLAGS) -o xprobe_x8684 $(SRCdir)/backend/probe_this_asm.c \ $(SRCdir)/backend/probe_gas_x8684.S +xprobe_wow64 : $(SRCdir)/backend/probe_this_asm.c $(SRCdir)/backend/probe_gas_wow64.S + $(CC) $(CCFLAGS) -o xprobe_wow64 $(SRCdir)/backend/probe_this_asm.c \ + $(SRCdir)/backend/probe_gas_wow64.S DoNothing: diff -Nru atlas-3.10.2/CONFIG/src/mgwcmp.c atlas-3.10.3/CONFIG/src/mgwcmp.c --- atlas-3.10.2/CONFIG/src/mgwcmp.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/mgwcmp.c 2016-07-28 19:43:01.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/CONFIG/src/probe_aff.c atlas-3.10.3/CONFIG/src/probe_aff.c --- atlas-3.10.2/CONFIG/src/probe_aff.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/probe_aff.c 2016-07-28 19:43:01.000000000 +0000 @@ -213,7 +213,7 @@ * handle these non-power2 cases. */ maxID = (1 << (maxlog2+1)); - IDs = malloc(sizeof(int)*maxID); + IDs = malloc(sizeof(int)*(maxID+1)); assert(IDs); /* * Now simply try all IDs between these regions, and record the good ones diff -Nru atlas-3.10.2/CONFIG/src/probe_arch.c atlas-3.10.3/CONFIG/src/probe_arch.c --- atlas-3.10.2/CONFIG/src/probe_arch.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/probe_arch.c 2016-07-28 19:43:01.000000000 +0000 @@ -121,7 +121,7 @@ /* * If Assembler right or unspecified, try x86 probe */ - if (asmd == gas_x86_32 || asmd == gas_x86_64) + if (asmd == gas_x86_32 || asmd == gas_x86_64 || asmd == gas_wow64) { frm = "make IRunArchInfo_x86 MYFLAGS=\"-DATL_OS_%s -DATL_%s\" args=\"%s\" %s | fgrep '%s'"; i = strlen(frm) + strlen(osnam[OS]) + strlen(ASMNAM[asmd]) + strlen(flag) @@ -227,7 +227,8 @@ if (flags & Parch) { arch = ProbeOneInt(OS, asmd, targ, "-a", "MACHTYPE=", &sure); - if (arch == MACHOther && (asmd == gas_x86_32 || asmd == gas_x86_64)) + if (arch == MACHOther && + (asmd == gas_x86_32 || asmd == gas_x86_64 || asmd == gas_wow64)) arch = x86X; if (flags & Pverb) printf("Architecture detected as %s.\n", machnam[arch]); @@ -243,7 +244,7 @@ ProbeOneInt(OS, asmd, targ, "-t", "CPU THROTTLE=", &sure)); if (flags & P64) { - if (asmd == gas_x86_64) + if (asmd == gas_x86_64 || asmd == gas_wow64) { sure = 1; bits = 64; diff -Nru atlas-3.10.2/CONFIG/src/probe_comp.c atlas-3.10.3/CONFIG/src/probe_comp.c --- atlas-3.10.2/CONFIG/src/probe_comp.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/probe_comp.c 2016-07-28 19:43:01.000000000 +0000 @@ -452,6 +452,8 @@ vp = "-mavx2 -mfma"; else if (vecexts & (1< 1) @@ -499,7 +509,8 @@ else if (strstr(p->comp, "gcc")) p->flags = NewAppendedString(p->flags, vp); #else - if (strstr(p->comp, "gcc") || strstr(p->comp, "gfortran")) + if (strstr(p->comp, "gcc") || strstr(p->comp, "gfortran") || + strstr(p->comp, "clang")) p->flags = NewAppendedString(p->flags, vp); #endif } @@ -588,6 +599,9 @@ return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); if (MachIsS390(arch)) return((ptrbits == 64) ? "-m64" : "-m31"); + if (OS == OSAIX) + return((ptrbits == 64) ? "-maix64" : "-maix32"); + if (!CompIsGcc(comp)) { /* @@ -621,7 +635,7 @@ } return(sp); } -char *GetStandardCompName(char *comp) +char *GetStandardCompName(char *comp, int f77) { int i, j, k; char *ucomp; @@ -632,29 +646,24 @@ { GetGccVers(comp, &k, &j, &k, &k); if (j < 4) - { - if (i == F77_) - ucomp = "g77"; - else - ucomp = "gcc"; - } - else if (i == F77_) - ucomp = "gfortran"; + ucomp = (f77) ? "g77" : "gcc"; else - ucomp = "gcc"; + ucomp = (f77) ? "gfortran" : "gcc"; } + else if (CompIsClang(comp)) + ucomp = (f77) ? "gfortran" : "clang"; else ucomp = NameWithoutPath(comp); return(ucomp); } -char *GetWinComp(enum OSTYPE OS, char *comp, char *bindir) +char *GetWinComp(enum OSTYPE OS, char *comp, char *bindir, int if77) { char *ln; char *ucomp; int i; if (!OSIsWin(OS)) return(NULL); - ucomp = GetStandardCompName(comp); + ucomp = GetStandardCompName(comp, if77); if (!strcmp(ucomp, "icc") || !strcmp(ucomp, "icl")) ucomp = "ATLwin_icc"; else if (!strcmp(ucomp, "ifort") || !strcmp(ucomp, "ivf")) @@ -841,6 +850,7 @@ char *ucomp, *dcomp, *flg, *sp, *sp2, *mgwd=NULL, *mgwc=NULL, *mgwf=NULL; char *cmnd, *res; int i, j, k, h; + const int WIN64_32 = (OS == OSWin64 && ptrbits == 32); /* * Look through input compilers; any of them that is simply "gcc" gets replaced * with goodgcc; If user has overridden with path or specific name (gcc-4) @@ -859,10 +869,9 @@ } /* - * If the user requests MinGW install, or is on 64-bit platform, build - * wrapper code + * If the user requests MinGW install build wrapper code */ - if (nocygwin || (OS == OSWin64 && ptrbits == 64)) + if (nocygwin) { int fndit; mgwd = SetupMinGW(verb, ptrbits, nof77); @@ -912,7 +921,7 @@ flg = NewStringCopy(usrcomps[NCOMP+i]?usrcomps[NCOMP+i]:p->flags); if (usrcomps[NCOMP*2+i]) flg = NewAppendedString(flg, usrcomps[NCOMP*2+i]); - freeme = sp = GetWinComp(OS, p->comp, bindir); + freeme = sp = GetWinComp(OS, p->comp, bindir, (i==F77_)); if (!sp) { /* @@ -934,13 +943,10 @@ else sp = p->comp; } - if (ptrbits) + if (ptrbits && (!WIN64_32 || i != XCC_)) { - if (OS == OSWin64 && ptrbits == 64 && i == XCC_) - flg = NewAppendedString(flg, "-m32"); - else - flg = NewAppendedString(flg, - GetPtrbitsFlag(OS, arch, ptrbits, sp)); + flg = NewAppendedString(flg, + GetPtrbitsFlag(OS, arch, ptrbits, sp)); } if (sp == mgwf) flg = NewAppendedString(flg, "-static"); @@ -975,25 +981,7 @@ else if (!usrcomps[NCOMP+i]) { p = comps[i]; - ucomp = NameWithoutPath(usrcomps[i]); -/* - * Recognize gnu compiler regardless of name string (eg. ev6-gcc-3.2) - */ - if (CompIsGcc(usrcomps[i])) - { - GetGccVers(usrcomps[i], &k, &j, &k, &k); - if (j < 4) - { - if (i == F77_) - ucomp = "g77"; - else - ucomp = "gcc"; - } - else if (i == F77_) - ucomp = "gfortran"; - else - ucomp = goodgcc; - } + ucomp = GetStandardCompName(usrcomps[i], (i==F77_)); for (p=comps[i]; p; p = p->next) { dcomp = NameWithoutPath(p->comp); @@ -1014,7 +1002,7 @@ /* * On windows, build compiler wrapper for MSVC++ or Intel compilers */ - sp = GetWinComp(OS, usrcomps[i], bindir); + sp = GetWinComp(OS, usrcomps[i], bindir, (i==F77_)); if (sp) { free(usrcomps[i]); @@ -1026,13 +1014,10 @@ flg = NewStringCopy(usrcomps[NCOMP+i]?usrcomps[NCOMP+i]:p->flags); if (usrcomps[NCOMP*2+i]) flg = NewAppendedString(flg, usrcomps[NCOMP*2+i]); - if (ptrbits) + if (ptrbits && (!WIN64_32 || i != XCC_)) { - if (OS == OSWin64 && ptrbits == 64 && i == XCC_) - flg = NewAppendedString(flg, "-m32"); - else - flg = NewAppendedString(flg, - GetPtrbitsFlag(OS, arch, ptrbits,usrcomps[i])); + flg = NewAppendedString(flg, + GetPtrbitsFlag(OS, arch, ptrbits,usrcomps[i])); } if (strstr(usrcomps[i], "mgwgfortran")) flg = NewAppendedString(flg, "-static"); @@ -1068,9 +1053,6 @@ for (i=0; i < NCOMP; i++) { sp = GetPtrbitsFlag(OS, arch, ptrbits, usrcomps[i]); - if (OS == OSWin64 && ptrbits == 64 && i == XCC_) - usrcomps[i+NCOMP] = NewAppendedString(usrcomps[i+NCOMP], "-m32"); - else usrcomps[i+NCOMP] = NewAppendedString(usrcomps[i+NCOMP], sp); } } @@ -1139,7 +1121,8 @@ char *targ, int GMAJOR, int GMINOR, - char **gccs /* NULL-terminated list of possible gcc compilers */ + char **gccs, /* NULL-terminated list of possible gcc compilers */ + int USEMINGW /* 0/1: reject/accept MINGW compilers */ ) /* * RETURNS: 0 if no gcc compiler, @@ -1154,6 +1137,8 @@ for (i=0; gccs[i]; i++) { int icmp, major, minor, patch; + if (!USEMINGW && CompIsMinGW(gccs[i])) + continue; GetGccVers(gccs[i], &icmp, &major, &minor, &patch); if (verb) printf(" icmp=%d, maj=%d, min=%d, pat=%d: %s\n", @@ -1220,6 +1205,45 @@ *GMAJOR = 4; switch(arch) { + case IbmZ12: + case IbmZ13: + case IntCorei3: + case IntCorei4: + case IntCorei2: + *GMAJOR = 6; + *GMINOR = 1; + *GPATCH = 0; + break; + case IbmPwr7: + *GMAJOR = 4; + *GMINOR = 8; + *GPATCH = 3; + break; + case IbmPwr8: + *GMAJOR = 5; + *GMINOR = 3; + *GPATCH = 1; + break; + case ARM64xg: + *GMAJOR = 5; + *GMINOR = 3; + *GPATCH = 0; + break; + case ARM7: + *GMAJOR = 4; + *GMINOR = 9; + *GPATCH = 2; + break; + case ARM9: + *GMAJOR = 4; + *GMINOR = 7; + *GPATCH = 2; + break; + case ARM15: + *GMAJOR = 4; + *GMINOR = 8; + *GPATCH = 4; + break; case PPCG4: /* cannot install 4.7.0 */ case PPCG5: /* cannot install 4.7.0 */ case SunUSII: /* cannot install 4.7.0 */ @@ -1232,20 +1256,17 @@ case x86SSE2: case x86SSE3: case x86x87: - case IbmPwr7: case IntCore2: case IntCorei1: - case IntCorei2: - case IntCorei3: case Amd64K10h: - case ARMv7: default: *GMINOR = 7; *GPATCH = 0; break; } } -char *FindGoodGcc(enum OSTYPE OS, enum MACHTYPE arch, int verb, char *targ) +char *FindGoodGcc(enum OSTYPE OS, enum MACHTYPE arch, int verb, char *targ, + int USEMINGW) { char *OSpaths=NULL, *sp, **gccs=NULL; char *ln; @@ -1273,7 +1294,7 @@ sprintf(ln, "find %s -maxdepth 1 -name '*gcc*' -exec ./xisgcc '{}' \\;", OSpaths); gccs = GetLinesFromFile(atlsys(NULL, ln, verb, 1), gccs); - i = SelectBestGcc(verb, targ, GMAJOR, GMINOR, gccs); + i = SelectBestGcc(verb, targ, GMAJOR, GMINOR, gccs, USEMINGW); if (i > 1) { free(ln); @@ -1303,7 +1324,7 @@ if (fp) { gccs = GetLinesFromFile(fp, gccs); - i = SelectBestGcc(verb, targ, GMAJOR, GMINOR, gccs); + i = SelectBestGcc(verb, targ, GMAJOR, GMINOR, gccs, USEMINGW); if (i > 0) { free(ln); @@ -1327,7 +1348,7 @@ if (fp) { gccs = GetLinesFromFile(fp, gccs); - i = SelectBestGcc(verb, targ, GMAJOR, GMINOR, gccs); + i = SelectBestGcc(verb, targ, GMAJOR, GMINOR, gccs, USEMINGW); if (i > 0) { free(ln); @@ -1353,7 +1374,7 @@ i = sprintf(ln, "find %s -name '*gcc*' -exec ./xisgcc '{}' \\;", stdpaths); gccs = GetLinesFromFile(atlsys(NULL, ln, verb, 1), gccs); - i = SelectBestGcc(verb, targ, GMAJOR, GMINOR, gccs); + i = SelectBestGcc(verb, targ, GMAJOR, GMINOR, gccs, USEMINGW); if (i > 1) { free(ln); @@ -1372,7 +1393,7 @@ } char *FindGoodGfortran(enum OSTYPE OS, enum MACHTYPE arch, int verb, - char *targ, char *gcc) + char *targ, char *gcc, int USEMINGW) { char *OSpaths=NULL, *sp, **gccs=NULL; char *ln; @@ -1397,7 +1418,7 @@ sprintf(ln, "find %s -maxdepth 1 -name '*gfortran*' -exec ./xisgcc '{}' \\;", sp); gccs = GetLinesFromFile(atlsys(NULL, ln, verb, 1), gccs); - i = SelectBestGcc(verb, targ, cmaj, cmin, gccs); + i = SelectBestGcc(verb, targ, cmaj, cmin, gccs, USEMINGW); if (i > 1) { free(ln); @@ -1430,7 +1451,7 @@ "find %s -maxdepth 1 -name '*gfortran*' -exec ./xisgcc '{}' \\;", OSpaths); gccs = GetLinesFromFile(atlsys(NULL, ln, verb, 1), gccs); - i = SelectBestGcc(verb, targ, cmaj, cmin, gccs); + i = SelectBestGcc(verb, targ, cmaj, cmin, gccs, USEMINGW); if (i > 1) { free(ln); @@ -1457,7 +1478,7 @@ "find %s -maxdepth 1 -name '*gfortran*' -exec ./xisgcc '{}' \\;", sp); free(sp); gccs = GetLinesFromFile(atlsys(NULL, ln, verb, 1), gccs); - i = SelectBestGcc(verb, targ, cmaj, cmin, gccs); + i = SelectBestGcc(verb, targ, cmaj, cmin, gccs, USEMINGW); if (i > 0) { free(ln); @@ -1480,7 +1501,7 @@ if (fp) { gccs = GetLinesFromFile(fp, gccs); - i = SelectBestGcc(verb, targ, cmaj, cmin, gccs); + i = SelectBestGcc(verb, targ, cmaj, cmin, gccs, USEMINGW); if (i > 0) { free(ln); @@ -1508,7 +1529,7 @@ i = sprintf(ln, "find %s -name '*gfortran*' -exec ./xisgcc '{}' \\;", stdpaths); gccs = GetLinesFromFile(atlsys(NULL, ln, verb, 1), gccs); - i = SelectBestGcc(verb, targ, cmaj, cmin, gccs); + i = SelectBestGcc(verb, targ, cmaj, cmin, gccs, USEMINGW); if (i > 1) { free(ln); @@ -1755,7 +1776,6 @@ comps[k] = NULL; *ptrbits = 0; - *mach = 0; *vec = 0; *OS = 0; *verb = 0; @@ -1767,15 +1787,23 @@ PrintUsage(args[0], i, args[i]); switch(args[i][1]) { - case 'b': + case 'A': if (++i >= nargs) PrintUsage(args[0], i, "out of arguments"); - *ptrbits = atoi(args[i]); + if(args[i][0] >= '0' && args[i][0] <= '9') /* giving a # */ + *mach = atoi(args[i]); + else /* giving a architecture name */ + { + for (k=1; k < NMACH; k++) + if (!strcmp(args[i], machnam[k])) + break; + *mach = (k == NMACH) ? 0 : k; + } break; - case 'A': + case 'b': if (++i >= nargs) PrintUsage(args[0], i, "out of arguments"); - *mach = atoi(args[i]); + *ptrbits = atoi(args[i]); break; case 'V': if (++i >= nargs) @@ -1967,28 +1995,10 @@ } else targarg = ""; - #if defined(ATL_GCCCLANG) || defined(ATL_GCC3P) - { - char *topd; - int dlen; - topd = atlsys_1L(NULL, "pwd", verb, 1); - assert(topd); - dlen = strlen(topd); - goodgcc = malloc(dlen + 10); - assert(goodgcc); - #ifdef ATL_GCCCLANG - sprintf(goodgcc, "%s/gccclang", topd); - #else - sprintf(goodgcc, "%s/gcc3p", topd); - #endif - free(topd); - } - #else - if (usrcomps[GCC_]) - goodgcc = NewStringCopy(usrcomps[GCC_]); - else - goodgcc = FindGoodGcc(OS, mach, verb, targ); - #endif + if (usrcomps[GCC_]) + goodgcc = NewStringCopy(usrcomps[GCC_]); + else + goodgcc = FindGoodGcc(OS, mach, verb, targ, nocygwin); GetComps(OS, mach, verb, targ, ptrbits, usrcomps, nof77, nocygwin, vecexts, goodgcc, bindir); /* @@ -1997,7 +2007,7 @@ if (!nof77 && usrcomps[F77_] && !strcmp(usrcomps[F77_], "gfortran")) { free(usrcomps[F77_]); - usrcomps[F77_] = FindGoodGfortran(OS, mach, verb, targ, goodgcc); + usrcomps[F77_] = FindGoodGfortran(OS, mach, verb, targ, goodgcc,nocygwin); if (!usrcomps[F77_]) usrcomps[F77_] = NewStringCopy("gfortran"); } diff -Nru atlas-3.10.2/CONFIG/src/probe_OS.c atlas-3.10.3/CONFIG/src/probe_OS.c --- atlas-3.10.2/CONFIG/src/probe_OS.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/probe_OS.c 2016-07-28 19:43:01.000000000 +0000 @@ -44,6 +44,17 @@ * Need to confirm what is returned under cygwin for XP, etc. */ else if (strstr(res, "WOW64")) OS =OSWin64; + else if (strstr(res, "CYGWIN")) + { + free(res); + sprintf(cmnd, "%s -m", unam); + res = atlsys_1L(targ, cmnd, verb, 0); + if (strstr(res, "x86_64") || strstr(res, "AMD64") || + strstr(res, "Intel64")) + OS = OSWin64; + else + OS = OSWinNT; + } else if (strstr(res, "NT")) OS = OSWinNT; else ierr = 1; } diff -Nru atlas-3.10.2/CONFIG/src/probe_vec.c atlas-3.10.3/CONFIG/src/probe_vec.c --- atlas-3.10.2/CONFIG/src/probe_vec.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/probe_vec.c 2016-07-28 19:43:01.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2006 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -85,8 +85,10 @@ } sprintf(ln, "-DATL_OS_%s -DATL_%s", osnam[OS], ASMNAM[asmb]); for (i=1; i < NISA; i++) + { if (RunISAProbe(ISAXNAM[i], verb, targ, ln)) iret |= (1<= nargs) + PrintUsage(args[0], i, "out of arguments"); + if(args[i][0] >= '0' && args[i][0] <= '9') /* giving a # */ + *mach = atoi(args[i]); + else /* giving a architecture name */ + { + for (k=1; k < NMACH; k++) + if (!strcmp(args[i], machnam[k])) + break; + *mach = (k == NMACH) ? 0 : k; + } + break; case 'f': if (++i >= nargs) PrintUsage(args[0], i, "out of arguments"); @@ -177,11 +189,6 @@ PrintUsage(args[0], i, "out of arguments"); *mhz = atoi(args[i]); break; - case 'A': - if (++i >= nargs) - PrintUsage(args[0], i, "out of arguments"); - *mach = atoi(args[i]); - break; case 'V': if (++i >= nargs) PrintUsage(args[0], i, "out of arguments"); @@ -326,7 +333,10 @@ { if (++i >= nargs) PrintUsage(args[0], i, "out of arguments"); - *gccflags = args[i]; + if (args[i-2][1] == 'C') + comps[GCC_] = args[i]; + else + *gccflags = args[i]; } else { @@ -395,6 +405,9 @@ return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); if (MachIsS390(arch)) return((ptrbits == 64) ? "-m64" : "-m31"); + if (OS == OSAIX) + return((ptrbits == 64) ? "-maix64" : "-maix32"); + if (!CompIsGcc(comp)) { /* @@ -466,8 +479,10 @@ /* * Update l2size, and set f2cdefs/cdefs if they are null */ - if (!l2size) l2size = 4*1024*1024; - else l2size *= 1024; + if (!l2size) + l2size = ((ptrbits == 64) ? 32 : 4) * 1024*1024; + else + l2size *= 1024; if (!f2cdefs) f2cdefs = ""; /* * Append any appended flags, and then we have just compilers and flags @@ -529,7 +544,7 @@ ISAX = i; fprintf(fpout, "# ----------------------------\n"); - fprintf(fpout, "# Make.inc for ATLAS3.10.2\n"); + fprintf(fpout, "# Make.inc for ATLAS3.10.3\n"); fprintf(fpout, "# ----------------------------\n\n"); fprintf(fpout, "# ----------------------------------\n"); @@ -542,6 +557,9 @@ fprintf(fpout, "# -------------------------------------------------\n"); fprintf(fpout, " ARCH = %s", machnam[mach]); fprintf(fpout, "%d", ptrbits); + #if defined(__powerpc64__) && defined(__ORDER_LITTLE_ENDIAN__) + fprintf(fpout, "LE"); + #endif if (ISAX) fprintf(fpout, "%s", ISAXNAM[ISAX]); if (!USEIEEE) @@ -576,6 +594,8 @@ fprintf(fpout, " FLAdir = $(BLDdir)/src/lapack/reference\n"); if (ADd) fprintf(fpout, " ADdir = %s\n", ADd); + else if (OS == OSWin64 && ptrbits == 64) + fprintf(fpout, " ADdir = $(SRCdir)/CONFIG/ARCHS/WIN64\n"); else fprintf(fpout, " ADdir = $(SRCdir)/CONFIG/ARCHS\n"); fprintf(fpout, "\n"); @@ -660,7 +680,7 @@ fprintf(fpout, " %d", tids[k]); } else - fprintf(fpout, "TIDLIST="); + fprintf(fpout, " TIDLIST="); fprintf(fpout, "\n\n"); fprintf(fpout, @@ -691,6 +711,10 @@ fprintf(fpout, " -m32"); else if (strstr(comps[NCOMP+DKC_], " -m64")) fprintf(fpout, " -m64"); + else if (strstr(comps[NCOMP+DKC_], " -maix64")) + fprintf(fpout, " -maix64"); + else if (strstr(comps[NCOMP+DKC_], " -maix32")) + fprintf(fpout, " -maix32"); if (cdefs) fprintf(fpout, " %s", cdefs); if (THREADS) { @@ -739,7 +763,10 @@ comps[i][j-2] == 'c' && comps[i][j-1] == 'c') break; } - goodgcc = (i < NCOMP) ? comps[i] : "gcc"; + if (i < NCOMP) + goodgcc = comps[i]; + else + goodgcc = comps[GCC_] ? comps[GCC_] : "gcc"; fprintf(fpout, " GOODGCC = %s", goodgcc); if (gccflags) fprintf(fpout, " %s", gccflags); @@ -824,8 +851,10 @@ fprintf(fpout, " SSLAPACKlib = # set to serial system lapack\n"); if (THREADS) { - if (OSIsWin(OS)) + if (USEMINGW || (ptrbits == 32 && OSIsWin(OS))) fprintf(fpout, " LIBS = -lkernel32 -lm\n\n"); + else if (OSIsWin(OS)) + fprintf(fpout, " LIBS = -lm\n\n"); else fprintf(fpout, " LIBS = -lpthread -lm\n\n"); } @@ -860,6 +889,8 @@ } else sp = NewStringCopy("gcc"); } + else if (CompIsClang(comps[i])) + sp = NewStringCopy((i==F77_) ? "gfortran" : "clang"); else { sp = NameWithoutPath(comps[i]); diff -Nru atlas-3.10.2/CONFIG/src/wincc.c atlas-3.10.3/CONFIG/src/wincc.c --- atlas-3.10.2/CONFIG/src/wincc.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/wincc.c 2016-07-28 19:43:01.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/CONFIG/src/winf77.c atlas-3.10.3/CONFIG/src/winf77.c --- atlas-3.10.2/CONFIG/src/winf77.c 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/CONFIG/src/winf77.c 2016-07-28 19:43:01.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/configure atlas-3.10.3/configure --- atlas-3.10.2/configure 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/configure 2016-07-28 19:42:59.000000000 +0000 @@ -13,6 +13,8 @@ flapacktar=ATL_NoOverride f77=1 fulllapack=0 +usetids=0 +tidlist= # # path is configure path without trailing configure :) # @@ -77,6 +79,7 @@ echo "ATLAS config includes this script, and probes written in C." echo "Therefore, configure flags are union of script and probe flags." echo "This configure script accepts the following flags:" + echo " --force-clang=/path/to/clang : use clang not gcc all C comps" echo " --cc= : compiler to compile configure probes" echo " --cflags='' : flags for above" echo " --prefix= : Toplevel installation directory." @@ -147,9 +150,24 @@ incinstdir="$flag" handled=1 fi + flag=`echo "$arg" | sed -e "s/--force-clang=//"` + if test "$flag" != "$arg" + then + cc=$flag + pass="$pass -C ac $flag -C gc $flag" + handled=1 + fi + flag=`echo "$arg" | sed -e "s/--cripple-atlas-performance//"` + if test "$flag" != "$arg" + then + pass="$pass -Si cripple-perf 1" + handled=1 + fi flag=`echo "$arg" | sed -e "s/--force-tids=//"` if test "$flag" != "$arg" then + usetids=1 + tidlist=$flag pass="$pass -tl $flag" handled=1 fi @@ -288,6 +306,20 @@ libinstdir='$(DESTDIR)/lib' fi # +# Copy the correct ATLrun.sh over +# +if test $usetids -eq 1 +then + echo "#!/bin/sh" > ATLrun.sh + echo "atldir=\$1" >> ATLrun.sh + echo "shift" >> ATLrun.sh + flag=`echo "$tidlist" | sed -e "s/[0-9]* // ; s/\([0-9]\) /\1,/g ; s/, *$//"` + echo "taskset -c $flag \$atldir/\$*" >> ATLrun.sh + chmod u+x ATLrun.sh +else + cp "$path"/CONFIG/src/ATLrun.sh ATLrun.sh +fi +# # Create the Makefile and copy the compiler info file # echo BLDdir="$blddir" > Makefile Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/arm/ARMv732NEON.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/arm/ARMv732NEON.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/arm/ARMv732.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/arm/ARMv732.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/arm/GENERIC32FPV3D16MAC.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/arm/GENERIC32FPV3D16MAC.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/arm64/GENERIC64.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/arm64/GENERIC64.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/ia64/IA64Itan64.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/ia64/IA64Itan64.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/ppc64/GENERIC64.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/ppc64/GENERIC64.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/ppc64el/GENERIC64LE.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/ppc64el/GENERIC64LE.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/ppc64el/POWER864LE.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/ppc64el/POWER864LE.tar.bz2 differ diff -Nru atlas-3.10.2/debian/archdefs/README atlas-3.10.3/debian/archdefs/README --- atlas-3.10.2/debian/archdefs/README 2015-11-23 18:17:46.000000000 +0000 +++ atlas-3.10.3/debian/archdefs/README 2018-04-29 17:01:59.000000000 +0000 @@ -8,15 +8,18 @@ - amd64: ATLAS 3.10.1 / karaba.cepremap.org / wheezy / 2013-06-04 - arm: + GENERIC32.tar.bz2 (for armel): ATLAS 3.10.1 / vamana.villemot.name (Raspberry Pi) / sid / 2013-07-01 - + ARMv732 (for armhf): from ATLAS 3.10.1 tarball, modified for removing all - the "-mfloat-abi=armhf" and "-mfpu=vfpv3" flags - + ARMv732NEON (for armhf): same than ARMv732; note that this one is not used - for the generic package, but only for the custom one -- arm64: ATLAS 3.10.2 / asachi.debian.org / sid / 2014-10-25 + + GENERIC32FPV3D16MAC.tar.bz2 (for armhf): ATLAS 3.10.3 / asachi.debian.org / sid / 2017-08-10 +- arm64: ATLAS 3.10.3 / asachi.debian.org / sid / 2016-08-05 - i386: from ATLAS 3.10.1 tarball +- ia64: ATLAS 3.10.1 / merulo.debian.org / sid / 2013-06-08 - mips: ATLAS 3.10.1 / gabrielli.debian.org / sid / 2013-07-27 - mipsel: ATLAS 3.10.1 / eder.debian.org / sid / 2013-06-07 - mips64el: ATLAS 3.10.2 / etler.debian.org / sid / 2015-11-08 - powerpc: ATLAS 3.10.1 / partch.debian.org / sid / 2013-06-06 -- ppc64el: ATLAS 3.10.2 / pastel.debian.net / sid / 2014-10-24 -- s390x: ATLAS 3.10.1 / zelenka.debian.org / sid / 2013-06-06 +- ppc64: ATLAS 3.10.3 / pizzetti.debian.org / sid / 2017-08-10 +- ppc64el: ATLAS 3.10.3 / plummer.debian.org / sid / 2016-08-05 +- riscv64: created by Manuel A. Fernandez Montecelo (see https://bugs.debian.org/897128) +- s390x: + + IBMz964.tar.bz2: ATLAS 3.10.1 / zelenka.debian.org / sid / 2013-06-06 + + IBMz1264.tar.bz2: created by Dimitri John Ledkov for Ubuntu +- sparc64: ATLAS 3.10.3 / notker.debian.net / sid / 2017-08-13 Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/riscv64/GENERIC64.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/riscv64/GENERIC64.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/s390x/IBMz1264.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/s390x/IBMz1264.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/s390x/IBMz1364VXZ.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/s390x/IBMz1364VXZ.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/s390x/IBMz1464VXZ2.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/s390x/IBMz1464VXZ2.tar.bz2 differ Binary files /tmp/tmpxzWZAr/SuCtZgXJf9/atlas-3.10.2/debian/archdefs/sparc64/GENERIC64.tar.bz2 and /tmp/tmpxzWZAr/seFmZIKEWC/atlas-3.10.3/debian/archdefs/sparc64/GENERIC64.tar.bz2 differ diff -Nru atlas-3.10.2/debian/blas-atlas.pc.in atlas-3.10.3/debian/blas-atlas.pc.in --- atlas-3.10.2/debian/blas-atlas.pc.in 2014-10-15 19:34:27.000000000 +0000 +++ atlas-3.10.3/debian/blas-atlas.pc.in 2017-09-09 11:19:09.000000000 +0000 @@ -1,9 +1,9 @@ prefix=/usr -libdir=${prefix}/lib/atlas-base/atlas -includedir=${prefix}/include +libdir=${prefix}/lib/@DEB_HOST_MULTIARCH@/atlas +includedir=${prefix}/include/@DEB_HOST_MULTIARCH@ Name: atlas-blas Description: Automatically Tuned Linear Algebra Software, BLAS -Version: @DEB_UPSTREAM_VERSION@ +Version: @DEB_VERSION_UPSTREAM@ URL: http://math-atlas.sourceforge.net/ Libs: -L${libdir} -lblas Libs.private: -L${libdir} -latlas -lm diff -Nru atlas-3.10.2/debian/changelog atlas-3.10.3/debian/changelog --- atlas-3.10.2/debian/changelog 2015-11-23 18:22:03.000000000 +0000 +++ atlas-3.10.3/debian/changelog 2020-08-23 00:29:03.000000000 +0000 @@ -1,3 +1,178 @@ +atlas (3.10.3-8ubuntu5~16.04.sav0) xenial; urgency=medium + + * Backport to Xenial + * debian/control: Set debhelper-compat (= 10) BD (LP highest for Xenial) + + -- Rob Savoury Sat, 22 Aug 2020 17:29:03 -0700 + +atlas (3.10.3-8ubuntu5) eoan; urgency=medium + + * Enable z14 build at -march=z14, with cross-compile engine at + -march=z13. LP: #1837577 + + -- Dimitri John Ledkov Thu, 15 Aug 2019 15:34:03 +0100 + +atlas (3.10.3-8ubuntu4) eoan; urgency=medium + + * Fix missing / typpo in /vx install paths. + * Add Enable-cross-compile.patch from IBM. + * Update archdefs tarballs for z13 and z14, with additional tuning. + * Configure z13/z14 builds without build-time tuning. + + -- Dimitri John Ledkov Mon, 12 Aug 2019 13:18:13 +0100 + +atlas (3.10.3-8ubuntu3) eoan; urgency=medium + + * Imprort cherrypicked upstream patches to correct z13, z14 SIMD vector + instructions. + * Add tarballs for z13 and z14 tuning. + * Attempt to build for z13 and z14, install into vx and vxe directories. + * Whilst neither z14 hw is availabl, nor cross-compilation, only vx + optimized binaries are provided on s390x. LP: #1814796 + + -- Dimitri John Ledkov Tue, 12 Mar 2019 17:48:48 +0000 + +atlas (3.10.3-8) unstable; urgency=medium + + * Drop armel-is-v4t.patch. + No longer needed now that armel baseline is v5te (see #882174). + * Fix build of custom package (.pc file was generated too late) + * Bump to debhelper compat level 12 + * Add Rules-Requires-Root: no + * Bump to S-V 4.3.0 + + -- Sébastien Villemot Tue, 22 Jan 2019 11:31:26 +0100 + +atlas (3.10.3-7) unstable; urgency=medium + + * libatlas-doc: add Breaks+Replaces libatlas-base-dev (<< 3.10.3-6~) + Thanks to Andreas Beckmann (Closes: #901099) + + -- Sébastien Villemot Sat, 09 Jun 2018 19:07:12 +0200 + +atlas (3.10.3-6) unstable; urgency=medium + + * Resurrect architectural defaults for ia64. + * Add architectural defaults for riscv64. + Thanks to Manuel A. Fernandez Montecelo (Closes: #897128) + * Bump to debhelper compat level 11. + * d/rules: remove get-orig-source target. + * Remove Built-Using field. + Since both LAPACK and ATLAS are under BSD-3-clause, there is no reason to + use this field (which is now restricted to license compliance issues). + * Bump S-V to 4.1.4. + * Update Vcs-* fields for move to salsa. + * Install doc under /u/s/d/libatlas-base-dev, per Policy §12.3. + + -- Sébastien Villemot Sun, 29 Apr 2018 19:37:01 +0200 + +atlas (3.10.3-5) unstable; urgency=low + + * libatlas3-base Conflicts+Replaces libcblas3. (Closes: #874802) + * libatlas-base-dev Conflicts+Replaces libcblas-dev. (Closes: #874803) + + -- Sébastien Villemot Wed, 13 Sep 2017 22:29:27 +0200 + +atlas (3.10.3-4) unstable; urgency=low + + * Multi-archify the package. + Incidentally, drop the dependency of libatlas-base-dev on libblas-dev, since + cblas.h is now managed by the alternative. + * kfreebsd.patch: update using the fix applied upstream. + * Bump Standards-Version to 4.1.0. + + -- Sébastien Villemot Sat, 09 Sep 2017 10:27:50 +0200 + +atlas (3.10.3-3) unstable; urgency=medium + + * d/control: libatlas-base-dev now directly depends on libblas-dev. + This is necessary for having cblas.h under /usr/include (as was + the case before libatlas-dev was dropped). + * Add archdef for sparc64. + * d/control: mark libatlas-doc as M-A foreign. + * d/orig-tar.{sh,exclude}: fetch TexDoc subdir from git instead of CVS. + + -- Sébastien Villemot Wed, 16 Aug 2017 22:30:36 +0200 + +atlas (3.10.3-2) unstable; urgency=medium + + * “The frantic post-release package overhaul” + + [ Sébastien Villemot ] + * d/copyright: complete rewrite using machine-readable format 1.0. + * Merge libatlas-dev into libatlas-base-dev, drop the former. + * Drop useless patches: + + d/p/10_s390.diff + + d/p/17_hppa.diff + + d/p/18_alpha.diff + + d/p/20_armel.diff + + d/p/22_sh.diff + + d/p/16_warning-removed.diff + * d/p/ppc64-endianness.patch: new patch, fix machine detection on ppc64. + * d/p/fix-typos.patch: new patch. + * d/p/missing-cflags.patch: new patch. + * Drop patches for creating full BLAS and LAPACK libs, and shared libs. + Replace them by rules in debian/rules. + Also no longer embed lib(pt)?cblas.a into liblapack.a, not needed. + * d/rules: + + Rewrite and simplify using dh (drop cdbs). + + Support parallel=N in DEB_BUILD_OPTIONS. + + Support nodoc in DEB_BUILD_OPTIONS. + + Inject flags from dpkg-buildflags into the generic package. + Incidentally, this means that the noopt and nostrip flags of + DEB_BUILD_OPTIONS are now supported. (Closes: #697203) + + Do not use "-Ss flapack" option of configure, it does not work. + * d/watch: + + Bump to format version 4. Update d/orig-tar.sh to make it compatible with + the new invocation syntax. + + Add +ds suffix to orig tarball, since we're repacking it. + * d/control: + + Use canonical URL in Vcs-Browser. + + Drop Suggests on libblas-doc, superseded by liblapack-doc. + + Remove several unnecessary build-dependencies. + + Bump Standards-Version to 4.0.1. + + libatlas-test now marked priority optional, per Policy 4.0.1. + * Bump debhelper compat level to 10. + * Remove obsolete TODO. + * d/lapack-atlas.in: add Requires.private on blas. + * Custom package: + + Use DEB_BUILD_OPTIONS=custom as new interface. + By the way, no longer automatically append a changelog entry. + (Closes: #854781) + + Improve the instructions in README.Debian. + + Add -lpthread to dynamic libs and .pc file when relevant. + + Ship libpt{c,f77}blas.so when relevant. (Closes: #737834) + * armhf generic package: stop using Cortex-a7 machtype, enable VFPv3-D16. + * Add archdef for ppc64. + + [ Dimitri John Ledkov ] + * d/rules: on Ubuntu s390x set ARCHS to z12. + * d/archdefs/s390x: add IBMz12 tarball + + -- Sébastien Villemot Thu, 10 Aug 2017 18:39:08 +0200 + +atlas (3.10.3-1) unstable; urgency=low + + [ Sébastien Villemot ] + * Imported Upstream version 3.10.3. (Closes: #833139) + * Drop d/p/cpu-throttling-check.diff, no longer needed. + Replace it with new --cripple-atlas-performance option. + * Drop d/p/ppc64el-new-archdef-name.patch, applied upstream. + * Drop d/p/armhf.diff, no longer needed. + * d/rules: update architecture/ISA names/enums. + * Update archdefs for ppc64el, arm64 and armhf. + * d/control: + + drop useless Section field for binary package libatlas-test. + + bump Standards-Version to 3.9.8, no changes needed. + + use secure URLs for Vcs-* fields. + * Improve shell syntax in preinst scripts. + Thanks to Wolfgang Karall-Ahlborn (Closes: #805406) + + [ Dimitri John Ledkov ] + * d/rules: on ppc64el set ARCHES to POWER8. + + -- Sébastien Villemot Sat, 06 Aug 2016 20:27:27 +0200 + atlas (3.10.2-9) unstable; urgency=medium [ Sébastien Villemot ] diff -Nru atlas-3.10.2/debian/compat atlas-3.10.3/debian/compat --- atlas-3.10.2/debian/compat 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/compat 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -9 \ No newline at end of file diff -Nru atlas-3.10.2/debian/control atlas-3.10.3/debian/control --- atlas-3.10.2/debian/control 2015-11-01 09:19:03.000000000 +0000 +++ atlas-3.10.3/debian/control 2020-08-23 00:29:03.000000000 +0000 @@ -1,23 +1,35 @@ Source: atlas +Maintainer: Ubuntu Developers +XSBC-Original-Maintainer: Debian Science Team +Uploaders: Sébastien Villemot Section: devel Priority: optional -Maintainer: Debian Science Team -Uploaders: Sébastien Villemot -Standards-Version: 3.9.6 -Build-Depends: debhelper (>= 9), patch, gfortran, cdbs, - libblas-dev (>= 1.2.20110419-3), - liblapack-dev (>= 3.4.1), liblapack-pic (>= 3.4.1), libblas-test, - texlive-latex-base, ghostscript, cpufrequtils +Build-Depends: debhelper-compat (= 10), + dh-exec, + gfortran, + liblapack-pic (>= 3.7.1-2~), + texlive-latex-base, + ghostscript +Standards-Version: 4.3.0 +Vcs-Browser: https://salsa.debian.org/science-team/atlas +Vcs-Git: https://salsa.debian.org/science-team/atlas.git Homepage: http://math-atlas.sourceforge.net/ -Vcs-Git: git://anonscm.debian.org/debian-science/packages/atlas.git -Vcs-Browser: http://anonscm.debian.org/gitweb/?p=debian-science/packages/atlas.git +Rules-Requires-Root: no Package: libatlas3-base -Section: libs Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends}, libblas-common -Provides: libblas.so.3, liblapack.so.3 -Built-Using: ${Built-Using} +Multi-Arch: same +Section: libs +Depends: ${shlibs:Depends}, + ${misc:Depends} +Breaks: libblas3 (<< 3.7.1-2~), + liblapack3 (<< 3.7.1-2~), + libopenblas-base (<< 0.2.20+ds-3~), + libatlas-base-dev (<< 3.10.3-4~) +Conflicts: libcblas3 +Provides: libblas.so.3, + liblapack.so.3 +Replaces: libcblas3 Description: Automatically Tuned Linear Algebra Software, generic shared ATLAS is an approach for the automatic generation and optimization of numerical software. Currently ATLAS supplies optimized versions for the @@ -33,13 +45,23 @@ section: "Building Optimized Atlas Packages on your ARCH" in README.Debian Package: libatlas-base-dev -Section: libdevel Architecture: any -Depends: libatlas3-base (= ${binary:Version}), libatlas-dev, ${misc:Depends}, - ${shlibs:Depends} -Provides: libblas.so, liblapack.so -Suggests: libblas-doc, liblapack-doc -Built-Using: ${Built-Using} +Multi-Arch: same +Section: libdevel +Depends: libatlas3-base (= ${binary:Version}), + ${misc:Depends}, + ${shlibs:Depends} +Suggests: libatlas-doc, + liblapack-doc +Breaks: libblas-dev (<< 3.7.1-2~), + liblapack-dev (<< 3.7.1-2~), + libopenblas-dev (<< 0.2.20+ds-3~), + libatlas-dev (<< 3.10.3-2) +Conflicts: libcblas-dev +Provides: libblas.so, + liblapack.so +Replaces: libatlas-dev (<< 3.10.3-2), + libcblas-dev Description: Automatically Tuned Linear Algebra Software, generic static ATLAS is an approach for the automatic generation and optimization of numerical software. Currently ATLAS supplies optimized versions for the @@ -47,29 +69,14 @@ Subroutines (BLAS), and a subset of the linear algebra routines in the LAPACK library. . - This package includes the static libraries and symbolic links + This package includes the headers, the static libraries and symbolic links needed for program development. -Package: libatlas-dev -Section: libdevel -Architecture: any -Depends: libc6-dev, libblas-dev, ${misc:Depends} -Suggests: liblapack-dev, liblapack-doc -Description: Automatically Tuned Linear Algebra Software, C header files - ATLAS is an approach for the automatic generation and optimization of - numerical software. Currently ATLAS supplies optimized versions for the - complete set of linear algebra kernels known as the Basic Linear Algebra - Subroutines (BLAS), and a subset of the linear algebra routines in the - LAPACK library. - . - This package provides the headers needed to compile against the libraries - provided by ATLAS. - Package: libatlas-test Architecture: any -Section: devel -Priority: extra -Depends: ${shlibs:Depends}, ${misc:Depends} +Multi-Arch: same +Depends: ${shlibs:Depends}, + ${misc:Depends} Description: Automatically Tuned Linear Algebra Software, test programs ATLAS is an approach for the automatic generation and optimization of numerical software. Currently ATLAS supplies optimized versions for the @@ -81,9 +88,12 @@ binaries to compare the BLAS and ATLAS libraries on your system. Package: libatlas-doc +Architecture: all +Multi-Arch: foreign Section: doc Depends: ${misc:Depends} -Architecture: all +Breaks: libatlas-base-dev (<< 3.10.3-6~) +Replaces: libatlas-base-dev (<< 3.10.3-6~) Description: Automatically Tuned Linear Algebra Software, documentation ATLAS is an approach for the automatic generation and optimization of numerical software. Currently ATLAS supplies optimized versions for the diff -Nru atlas-3.10.2/debian/copyright atlas-3.10.3/debian/copyright --- atlas-3.10.2/debian/copyright 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/copyright 2017-09-09 11:19:09.000000000 +0000 @@ -1,62 +1,129 @@ -This package was debianized by Camm Maguire on -Tue, 14 Sep 1999 09:28:44 -0400 and rewritten by Sylvestre Ledru - on September 2009. - -It was downloaded from http://math-atlas.sourceforge.net/ - -Upstream Authors: - R. Clint Whaley - Jack Dongarra - Jeff Horner - Peter Soendergaard - Antoine P. Petitet - Julian Ruhe - Tim Mattox - Hank Dietz - Camm Maguire - -Copyright: - (C) Copyright 1997-2008 All Rights Reserved - (C) 1996-2011 R. Clint Whaley - (C) 1998 Jeff Horner - (C) 2001 Peter Soendergaard - (C) 1999 Antoine P. Petitet - (C) 2001 Julian Ruhe - (C) 2003 Tim Mattox - (C) 2003 Hank Dietz - (C) 2003 Camm Maguire - (C) 1999 The Australian National University - - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: ATLAS +Upstream-Contact: R. Clint Whaley +Source: http://math-atlas.sourceforge.net/ + +Files: * +Copyright: 1997-2016 R. Clint Whaley + 1999-2000, 2004 Antoine P. Petitet + 2000-2001 Peter Soendergaard + 2009, 2012 Siju Samuel + 2001 Julian Ruhe + 2010 IBM Corporation + 1998 Jeff Horner + 2011 Md. Rakib Hasan + 2010-2011 Vesperix Corporation + 2009 Chad Zalkin + 2011 Md. Majedul Haque Sujon + 1999 The Australian National University +License: BSD-3-clause-ATLAS + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the ATLAS group or the names of its contributers may - not be used to endorse or promote products derived from this - software without specific written permission. + . + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in + the documentation and/or other materials provided with the distri- + bution. + 3. The name of the University, the ATLAS group, or the names of its + contributors may not be used to endorse or promote products deri- + ved from this software without specific written permission. + . + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO- + RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (IN- + CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Files: interfaces/blas/F77/src/lsame.f + src/blas/f77reference/* +Copyright: 1992-2017 The University of Tennessee and The University of Tennessee Research Foundation + 2000-2017 The University of California Berkeley + 2006-2017 The University of Colorado Denverf +License: BSD-3-clause + +Files: bin/extract.c +Copyright: 1994, 2015, R. Clint Whaley (rwhaley@cs.utk.edu) +License: GPL-2-modified + This program is distributed under the terms of the Gnu + General Public License (GPL), with the following two exceptions: + (1) Clause (9), dealing with updating the GPL automatically, is + specifically disallowed by the author. The author will + determine if a newer GPL version is still appropriate. + (2) The basefiles extract accepts as input, and the extracted + files it produces as output, are specifically designated + as outside the scope if this license (i.e. they are *not* + required by this license to be GPL). + The full, unaltered, text of the GPL is included at the end of + the program source listing. + . + On Debian systems, the complete text of the GNU General Public + License, version 2, can be found in the file + `/usr/share/common-licenses/GPL-2'. + +Files: tune/blas/gemm/CASES/ATL_smm_3dnow_90.c +Copyright: none +License: public-domain + The authors and University of Kentucky make this software freely + available as a PUBLIC DOMAIN release. None of the authors nor + University of Kentucky can be held responsible for any problems + deriving from use of this software. + . + The primary author is: + . + Tim Mattox + Department of Electrical and Computer Engineering + University of Kentucky + Lexington, KY 40506-0046 + email: tmattox@engr.uky.edu + URL: http://aggregate.org/ + +Files: debian/* +Copyright: 1999-2007 Camm Maguire + 2008-2013 Sylvestre Ledru + 2013-2017 Sébastien Villemot +License: BSD-3-clause +License: BSD-3-clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + . + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + . + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer listed + in this license in the documentation and/or other materials + provided with the distribution. + . + - Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + . + The copyright holders provide no reassurances that the source code + provided does not infringe any patent, copyright, or any other + intellectual property rights of third parties. The copyright holders + disclaim any liability to any recipient for claims brought against + recipient by any third party for infringement of that parties + intellectual property rights. + . THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -`AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS - BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -On alpha architectures, the standard builds use K. GOTO's kernels -contributed to the ATLAS project under the LGPL. Therefore on these -platforms, the distributed prebuilt libraries, as well as any user -built library using this kernel, is licensed under the LGPL. - -On Debian GNU/Linux systems, the complete text of the GNU Lesser General -Public License can be found in `/usr/share/common-licenses/LGPL-2'. - + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff -Nru atlas-3.10.2/debian/docs atlas-3.10.3/debian/docs --- atlas-3.10.2/debian/docs 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/docs 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -README diff -Nru atlas-3.10.2/debian/lapack-atlas.pc.in atlas-3.10.3/debian/lapack-atlas.pc.in --- atlas-3.10.2/debian/lapack-atlas.pc.in 2014-10-15 19:34:35.000000000 +0000 +++ atlas-3.10.3/debian/lapack-atlas.pc.in 2017-09-09 11:19:09.000000000 +0000 @@ -1,10 +1,11 @@ prefix=/usr -libdir=${prefix}/lib/atlas-base/atlas -includedir=${prefix}/include +libdir=${prefix}/lib/@DEB_HOST_MULTIARCH@/atlas +includedir=${prefix}/include/@DEB_HOST_MULTIARCH@ Name: atlas-lapack Description: Automatically Tuned Linear Algebra Software, LAPACK -Version: @DEB_UPSTREAM_VERSION@ +Version: @DEB_VERSION_UPSTREAM@ URL: http://math-atlas.sourceforge.net/ Libs: -L${libdir} -llapack Libs.private: -L${libdir} -latlas -lm +Requires.private: blas Cflags: -I${includedir} diff -Nru atlas-3.10.2/debian/libatlas3-base.install atlas-3.10.3/debian/libatlas3-base.install --- atlas-3.10.2/debian/libatlas3-base.install 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/libatlas3-base.install 2019-08-12 12:18:13.000000000 +0000 @@ -1,3 +1,22 @@ -debian/tmp/usr/lib/atlas-base/*.so.* -debian/tmp/usr/lib/atlas-base/atlas/*.so.* +#!/usr/bin/dh-exec +build/libblas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/liblapack.so.* /usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/libatlas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/ +build/lib*cblas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/ +build/lib*f77blas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/ +build/liblapack_atlas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/ + +[s390x] ${Z13} build-z13/libblas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vx +[s390x] ${Z13} build-z13/liblapack.so.* /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vx +[s390x] ${Z13} build-z13/libatlas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/vx +[s390x] ${Z13} build-z13/lib*cblas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/vx +[s390x] ${Z13} build-z13/lib*f77blas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/vx +[s390x] ${Z13} build-z13/liblapack_atlas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/vx + +[s390x] ${Z14} build-z14/libblas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vxe +[s390x] ${Z14} build-z14/liblapack.so.* /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vxe +[s390x] ${Z14} build-z14/libatlas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/vxe +[s390x] ${Z14} build-z14/lib*cblas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/vxe +[s390x] ${Z14} build-z14/lib*f77blas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/vxe +[s390x] ${Z14} build-z14/liblapack_atlas.so.* /usr/lib/${DEB_HOST_MULTIARCH}/vxe diff -Nru atlas-3.10.2/debian/libatlas3-base.links atlas-3.10.3/debian/libatlas3-base.links --- atlas-3.10.2/debian/libatlas3-base.links 2015-11-01 09:19:00.000000000 +0000 +++ atlas-3.10.3/debian/libatlas3-base.links 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -usr/lib/atlas-base/libcblas.so.3 usr/lib/libcblas.so.3 -usr/lib/atlas-base/libf77blas.so.3 usr/lib/libf77blas.so.3 -usr/lib/atlas-base/libatlas.so.3 usr/lib/libatlas.so.3 -usr/lib/atlas-base/liblapack_atlas.so.3 usr/lib/liblapack_atlas.so.3 diff -Nru atlas-3.10.2/debian/libatlas3-base.postinst atlas-3.10.3/debian/libatlas3-base.postinst --- atlas-3.10.2/debian/libatlas3-base.postinst 2015-11-01 09:11:46.000000000 +0000 +++ atlas-3.10.3/debian/libatlas3-base.postinst 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -#! /bin/sh - -set -e - - -update-alternatives --install /usr/lib/libblas.so.3 libblas.so.3 \ - /usr/lib/atlas-base/atlas/libblas.so.3 35 - -update-alternatives --install /usr/lib/liblapack.so.3 liblapack.so.3 \ - /usr/lib/atlas-base/atlas/liblapack.so.3 35 - -# dh_installdeb will replace this with shell code automatically -# generated by other debhelper scripts. - -#DEBHELPER# - -exit 0 diff -Nru atlas-3.10.2/debian/libatlas3-base.postinst.in atlas-3.10.3/debian/libatlas3-base.postinst.in --- atlas-3.10.2/debian/libatlas3-base.postinst.in 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/libatlas3-base.postinst.in 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +update-alternatives --install /usr/lib/@DEB_HOST_MULTIARCH@/libblas.so.3 \ + libblas.so.3-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/atlas/libblas.so.3 35 + +update-alternatives --install /usr/lib/@DEB_HOST_MULTIARCH@/liblapack.so.3 \ + liblapack.so.3-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/atlas/liblapack.so.3 35 + +#DEBHELPER# + +exit 0 diff -Nru atlas-3.10.2/debian/libatlas3-base.preinst atlas-3.10.3/debian/libatlas3-base.preinst --- atlas-3.10.2/debian/libatlas3-base.preinst 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/libatlas3-base.preinst 2017-09-09 11:19:09.000000000 +0000 @@ -1,17 +1,14 @@ -#! /bin/sh +#!/bin/sh set -e -# In 3.10.1-1, some symlinks were moved out of the alternatives system -# The following ensures that they are freed by the alternatives system before unpacking -if [ $1 = "upgrade" -a `dpkg --compare-versions "$2" ge 3.8.4-4~exp1` -a `dpkg --compare-versions "$2" lt 3.10.1-1` ] +# Cleanup pre-multiarch alternative +if [ "$1" = "upgrade" ] && dpkg --compare-versions "$2" lt 3.10.3-4~ then update-alternatives --remove libblas.so.3 /usr/lib/atlas-base/atlas/libblas.so.3 + update-alternatives --remove liblapack.so.3 /usr/lib/atlas-base/atlas/liblapack.so.3 fi -# dh_installdeb will replace this with shell code automatically -# generated by other debhelper scripts. - #DEBHELPER# exit 0 diff -Nru atlas-3.10.2/debian/libatlas3-base.prerm atlas-3.10.3/debian/libatlas3-base.prerm --- atlas-3.10.2/debian/libatlas3-base.prerm 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/libatlas3-base.prerm 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -#!/bin/sh - -set -e - -if [ "$1" != "upgrade" ] -then - update-alternatives --remove libblas.so.3 \ - /usr/lib/atlas-base/atlas/libblas.so.3 - - update-alternatives --remove liblapack.so.3 \ - /usr/lib/atlas-base/atlas/liblapack.so.3 -fi - -#DEBHELPER# - -exit 0 diff -Nru atlas-3.10.2/debian/libatlas3-base.prerm.in atlas-3.10.3/debian/libatlas3-base.prerm.in --- atlas-3.10.2/debian/libatlas3-base.prerm.in 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/libatlas3-base.prerm.in 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,16 @@ +#!/bin/sh + +set -e + +if [ "$1" != "upgrade" ] +then + update-alternatives --remove libblas.so.3-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/atlas/libblas.so.3 + + update-alternatives --remove liblapack.so.3-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/atlas/liblapack.so.3 +fi + +#DEBHELPER# + +exit 0 diff -Nru atlas-3.10.2/debian/libatlas-base-dev.install atlas-3.10.3/debian/libatlas-base-dev.install --- atlas-3.10.2/debian/libatlas-base-dev.install 2014-10-15 19:27:15.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-base-dev.install 2019-08-12 12:18:13.000000000 +0000 @@ -1,6 +1,32 @@ -debian/tmp/usr/lib/atlas-base/*.so -debian/tmp/usr/lib/atlas-base/*.a -debian/tmp/usr/lib/atlas-base/atlas/*.so -debian/tmp/usr/lib/atlas-base/atlas/*.a -debian/blas-atlas.pc /usr/lib/pkgconfig -debian/lapack-atlas.pc /usr/lib/pkgconfig +#!/usr/bin/dh-exec + +usr/include/ +usr/lib/*/*.a +debian/blas-atlas.pc /usr/lib/${DEB_HOST_MULTIARCH}/pkgconfig/ +debian/lapack-atlas.pc /usr/lib/${DEB_HOST_MULTIARCH}/pkgconfig/ +build/libblas.a /usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/libblas.so /usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/liblapack.a /usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/liblapack.so /usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/libatlas.so /usr/lib/${DEB_HOST_MULTIARCH}/ +build/lib*cblas.so /usr/lib/${DEB_HOST_MULTIARCH}/ +build/lib*f77blas.so /usr/lib/${DEB_HOST_MULTIARCH}/ +build/liblapack_atlas.so /usr/lib/${DEB_HOST_MULTIARCH}/ + +[s390x] ${Z13} build/libblas.a /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vx +[s390x] ${Z13} build/libblas.so /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vx +[s390x] ${Z13} build/liblapack.a /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vx +[s390x] ${Z13} build/liblapack.so /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vx +[s390x] ${Z13} build/libatlas.so /usr/lib/${DEB_HOST_MULTIARCH}/vx +[s390x] ${Z13} build/lib*cblas.so /usr/lib/${DEB_HOST_MULTIARCH}/vx +[s390x] ${Z13} build/lib*f77blas.so /usr/lib/${DEB_HOST_MULTIARCH}/vx +[s390x] ${Z13} build/liblapack_atlas.so /usr/lib/${DEB_HOST_MULTIARCH}/vx + +[s390x] ${Z14} build/libblas.a /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vxe +[s390x] ${Z14} build/libblas.so /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vxe +[s390x] ${Z14} build/liblapack.a /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vxe +[s390x] ${Z14} build/liblapack.so /usr/lib/${DEB_HOST_MULTIARCH}/atlas/vxe +[s390x] ${Z14} build/libatlas.so /usr/lib/${DEB_HOST_MULTIARCH}/vxe +[s390x] ${Z14} build/lib*cblas.so /usr/lib/${DEB_HOST_MULTIARCH}/vxe +[s390x] ${Z14} build/lib*f77blas.so /usr/lib/${DEB_HOST_MULTIARCH}/vxe +[s390x] ${Z14} build/liblapack_atlas.so /usr/lib/${DEB_HOST_MULTIARCH}/vxe diff -Nru atlas-3.10.2/debian/libatlas-base-dev.links atlas-3.10.3/debian/libatlas-base-dev.links --- atlas-3.10.2/debian/libatlas-base-dev.links 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-base-dev.links 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -usr/lib/atlas-base/libcblas.so usr/lib/libcblas.so -usr/lib/atlas-base/libcblas.a usr/lib/libcblas.a -usr/lib/atlas-base/libf77blas.so usr/lib/libf77blas.so -usr/lib/atlas-base/libf77blas.a usr/lib/libf77blas.a -usr/lib/atlas-base/libatlas.so usr/lib/libatlas.so -usr/lib/atlas-base/libatlas.a usr/lib/libatlas.a -usr/lib/atlas-base/liblapack_atlas.so usr/lib/liblapack_atlas.so -usr/lib/atlas-base/liblapack_atlas.a usr/lib/liblapack_atlas.a diff -Nru atlas-3.10.2/debian/libatlas-base-dev.postinst atlas-3.10.3/debian/libatlas-base-dev.postinst --- atlas-3.10.2/debian/libatlas-base-dev.postinst 2014-10-15 19:27:15.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-base-dev.postinst 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -#! /bin/sh - -set -e - -update-alternatives --install /usr/lib/libblas.so libblas.so \ - /usr/lib/atlas-base/atlas/libblas.so 35 \ - --slave /usr/lib/libblas.a libblas.a \ - /usr/lib/atlas-base/atlas/libblas.a \ - --slave /usr/lib/pkgconfig/blas.pc blas.pc \ - /usr/lib/pkgconfig/blas-atlas.pc - -update-alternatives --install /usr/lib/liblapack.so liblapack.so \ - /usr/lib/atlas-base/atlas/liblapack.so 35 \ - --slave /usr/lib/liblapack.a liblapack.a \ - /usr/lib/atlas-base/atlas/liblapack.a \ - --slave /usr/lib/pkgconfig/lapack.pc lapack.pc \ - /usr/lib/pkgconfig/lapack-atlas.pc - -# dh_installdeb will replace this with shell code automatically -# generated by other debhelper scripts. - -#DEBHELPER# - -exit 0 diff -Nru atlas-3.10.2/debian/libatlas-base-dev.postinst.in atlas-3.10.3/debian/libatlas-base-dev.postinst.in --- atlas-3.10.2/debian/libatlas-base-dev.postinst.in 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-base-dev.postinst.in 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,25 @@ +#!/bin/sh + +set -e + +update-alternatives --install /usr/lib/@DEB_HOST_MULTIARCH@/libblas.so libblas.so-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/atlas/libblas.so 35 \ + --slave /usr/lib/@DEB_HOST_MULTIARCH@/libblas.a libblas.a-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/atlas/libblas.a \ + --slave /usr/include/@DEB_HOST_MULTIARCH@/cblas.h cblas.h-@DEB_HOST_MULTIARCH@ \ + /usr/include/@DEB_HOST_MULTIARCH@/cblas-atlas.h \ + --slave /usr/lib/@DEB_HOST_MULTIARCH@/pkgconfig/blas.pc \ + blas.pc-@DEB_HOST_MULTIARCH@ /usr/lib/@DEB_HOST_MULTIARCH@/pkgconfig/blas-atlas.pc + +update-alternatives --install /usr/lib/@DEB_HOST_MULTIARCH@/liblapack.so \ + liblapack.so-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/atlas/liblapack.so 35 \ + --slave /usr/lib/@DEB_HOST_MULTIARCH@/liblapack.a liblapack.a-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/atlas/liblapack.a \ + --slave /usr/lib/@DEB_HOST_MULTIARCH@/pkgconfig/lapack.pc \ + lapack.pc-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/pkgconfig/lapack-atlas.pc + +#DEBHELPER# + +exit 0 diff -Nru atlas-3.10.2/debian/libatlas-base-dev.preinst atlas-3.10.3/debian/libatlas-base-dev.preinst --- atlas-3.10.2/debian/libatlas-base-dev.preinst 2015-01-29 19:50:18.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-base-dev.preinst 2017-09-09 11:19:09.000000000 +0000 @@ -1,24 +1,14 @@ -#! /bin/sh +#!/bin/sh set -e -# In 3.10.1-1, some symlinks were moved out of the alternatives system -# The following ensures that they are freed by the alternatives system before unpacking -if [ $1 = "upgrade" -a `dpkg --compare-versions "$2" ge 3.8.4-4~exp1` -a `dpkg --compare-versions "$2" lt 3.10.1-1` ] +# Cleanup pre-multiarch alternative +if [ "$1" = "upgrade" ] && dpkg --compare-versions "$2" lt 3.10.3-4~ then - update-alternatives --remove libblas.so /usr/lib/atlas-base/atlas/libblas.so + update-alternatives --remove libblas.so /usr/lib/atlas-base/atlas/libblas.so + update-alternatives --remove liblapack.so /usr/lib/atlas-base/atlas/liblapack.so fi -# Cleanup obsolete alternatives dating back to lenny -if [ "$1" = "install" ] || [ "$1" = "upgrade" ] -then - update-alternatives --remove libblas-3.so /usr/lib/atlas/libblas.so - update-alternatives --remove liblapack-3.so /usr/lib/atlas/liblapack.so -fi - -# dh_installdeb will replace this with shell code automatically -# generated by other debhelper scripts. - #DEBHELPER# exit 0 diff -Nru atlas-3.10.2/debian/libatlas-base-dev.prerm atlas-3.10.3/debian/libatlas-base-dev.prerm --- atlas-3.10.2/debian/libatlas-base-dev.prerm 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-base-dev.prerm 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -#!/bin/sh - -set -e - -if [ "$1" != "upgrade" ] -then - update-alternatives --remove libblas.so \ - /usr/lib/atlas-base/atlas/libblas.so - - update-alternatives --remove liblapack.so \ - /usr/lib/atlas-base/atlas/liblapack.so - -fi - -#DEBHELPER# - -exit 0 diff -Nru atlas-3.10.2/debian/libatlas-base-dev.prerm.in atlas-3.10.3/debian/libatlas-base-dev.prerm.in --- atlas-3.10.2/debian/libatlas-base-dev.prerm.in 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-base-dev.prerm.in 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,16 @@ +#!/bin/sh + +set -e + +if [ "$1" != "upgrade" ] +then + update-alternatives --remove libblas.so-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/atlas/libblas.so + + update-alternatives --remove liblapack.so-@DEB_HOST_MULTIARCH@ \ + /usr/lib/@DEB_HOST_MULTIARCH@/atlas/liblapack.so +fi + +#DEBHELPER# + +exit 0 diff -Nru atlas-3.10.2/debian/libatlas-doc.doc-base.contrib atlas-3.10.3/debian/libatlas-doc.doc-base.contrib --- atlas-3.10.2/debian/libatlas-doc.doc-base.contrib 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-doc.doc-base.contrib 2018-04-30 08:48:11.000000000 +0000 @@ -11,4 +11,4 @@ Section: Programming Format: PDF -Files: /usr/share/doc/libatlas-doc/atlas_contrib.pdf.gz +Files: /usr/share/doc/libatlas-base-dev/atlas_contrib.pdf.gz diff -Nru atlas-3.10.2/debian/libatlas-doc.doc-base.devel atlas-3.10.3/debian/libatlas-doc.doc-base.devel --- atlas-3.10.2/debian/libatlas-doc.doc-base.devel 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-doc.doc-base.devel 2018-04-30 08:48:17.000000000 +0000 @@ -7,4 +7,4 @@ Section: Programming Format: PDF -Files: /usr/share/doc/libatlas-doc/atlas_devel.pdf.gz +Files: /usr/share/doc/libatlas-base-dev/atlas_devel.pdf.gz diff -Nru atlas-3.10.2/debian/libatlas-doc.doc-base.install atlas-3.10.3/debian/libatlas-doc.doc-base.install --- atlas-3.10.2/debian/libatlas-doc.doc-base.install 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-doc.doc-base.install 2018-04-30 08:48:22.000000000 +0000 @@ -11,4 +11,4 @@ Section: Programming Format: PDF -Files: /usr/share/doc/libatlas-doc/atlas_install.pdf.gz +Files: /usr/share/doc/libatlas-base-dev/atlas_install.pdf.gz diff -Nru atlas-3.10.2/debian/libatlas-doc.docs atlas-3.10.3/debian/libatlas-doc.docs --- atlas-3.10.2/debian/libatlas-doc.docs 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-doc.docs 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,3 @@ +README +doc/*.txt +TexDoc/*.pdf diff -Nru atlas-3.10.2/debian/libatlas-test.install atlas-3.10.3/debian/libatlas-test.install --- atlas-3.10.2/debian/libatlas-test.install 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/libatlas-test.install 2017-09-09 11:19:09.000000000 +0000 @@ -1,9 +1,11 @@ -build/atlas-base/bin/*tst usr/lib/libatlas-test -build/atlas-base/tune/blas/level1/x* usr/lib/libatlas-test -build/atlas-base/tune/blas/gemv/x* usr/lib/libatlas-test -build/atlas-base/tune/blas/gemm/x* usr/lib/libatlas-test -build/atlas-base/tune/blas/ger/x* usr/lib/libatlas-test -build/atlas-base/interfaces/blas/C/testing/x* usr/lib/libatlas-test -build/atlas-base/interfaces/blas/F77/testing/x* usr/lib/libatlas-test -interfaces/blas/C/testing/*.dat usr/lib/libatlas-test -interfaces/blas/F77/testing/*.dat usr/lib/libatlas-test +#!/usr/bin/dh-exec + +build/bin/*tst usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/tune/blas/level1/x* usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/tune/blas/gemv/x* usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/tune/blas/gemm/x* usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/tune/blas/ger/x* usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/interfaces/blas/C/testing/x* usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +build/interfaces/blas/F77/testing/x* usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +interfaces/blas/C/testing/*.dat usr/lib/${DEB_HOST_MULTIARCH}/atlas/ +interfaces/blas/F77/testing/*.dat usr/lib/${DEB_HOST_MULTIARCH}/atlas/ diff -Nru atlas-3.10.2/debian/orig-tar.exclude atlas-3.10.3/debian/orig-tar.exclude --- atlas-3.10.2/debian/orig-tar.exclude 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/orig-tar.exclude 2017-09-09 11:19:09.000000000 +0000 @@ -1,2 +1,3 @@ -CVS +.gitignore +gitwash *.pdf diff -Nru atlas-3.10.2/debian/orig-tar.sh atlas-3.10.3/debian/orig-tar.sh --- atlas-3.10.2/debian/orig-tar.sh 2014-07-12 10:43:37.000000000 +0000 +++ atlas-3.10.3/debian/orig-tar.sh 2017-09-09 11:19:09.000000000 +0000 @@ -1,19 +1,16 @@ #!/bin/sh -e -# called by uscan with '--upstream-version' +# called by uscan with '--upstream-version' DIR=ATLAS DIRTARGET=atlas-$2 TAR=../atlas_$2.orig.tar.bz2 -# clean up the upstream tarball -tar jxvf $3 +# add TexDoc to tarball +tar -xf $TAR mv $DIR $DIRTARGET -# Before -# cvs -d:pserver:anonymous@math-atlas.cvs.sourceforge.net:/cvsroot/math-atlas login -cvs -z3 -d:pserver:anonymous@math-atlas.cvs.sourceforge.net:/cvsroot/math-atlas co -P AtlasBase/TexDoc -mv AtlasBase/TexDoc/ $DIRTARGET -rm -rf AtlasBase -tar -j -c -f $TAR -X debian/orig-tar.exclude $DIRTARGET +git clone https://github.com/math-atlas/math-atlas.git +mv math-atlas/AtlasBase/TexDoc/ $DIRTARGET +rm -rf math-atlas +rm $TAR +tar -caf $TAR -X debian/orig-tar.exclude $DIRTARGET rm -rf $DIRTARGET - -exit 0 diff -Nru atlas-3.10.2/debian/patches/0001-Avoid-c99-standard-compiler.patch atlas-3.10.3/debian/patches/0001-Avoid-c99-standard-compiler.patch --- atlas-3.10.2/debian/patches/0001-Avoid-c99-standard-compiler.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/0001-Avoid-c99-standard-compiler.patch 2019-02-26 15:53:14.000000000 +0000 @@ -0,0 +1,30 @@ +From 52b54788779a0a4c3bd98f8e6c7608a87341aa2e Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 19 Feb 2019 19:03:52 +0100 +Subject: [PATCH 1/6] Avoid c99 standard compiler + +When probing for a usable GCC, the existing code already dropped path +names that contained "c89" or "c90", because these compilers don't have +the GCC extensions enabled. This patch also drops names with "c99" in +them. +--- + CONFIG/src/atlconf_misc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/CONFIG/src/atlconf_misc.c b/CONFIG/src/atlconf_misc.c +index 63cb1ef..fb62214 100644 +--- a/CONFIG/src/atlconf_misc.c ++++ b/CONFIG/src/atlconf_misc.c +@@ -824,7 +824,8 @@ int CompIsGcc(char *comp) + int i; + + cmpname = NameWithoutPath(comp); +- if (strstr(cmpname, "c89") || strstr(cmpname, "c90")) ++ if (strstr(cmpname, "c89") || strstr(cmpname, "c90") || ++ strstr(cmpname, "c99")) + { + free(cmpname); + return(0); +-- +2.17.0 + diff -Nru atlas-3.10.2/debian/patches/0001-Enable-cross-compile.patch atlas-3.10.3/debian/patches/0001-Enable-cross-compile.patch --- atlas-3.10.2/debian/patches/0001-Enable-cross-compile.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/0001-Enable-cross-compile.patch 2019-08-12 12:11:18.000000000 +0000 @@ -0,0 +1,265 @@ +From 35af7ba68e2b7998a690845f95a5c60ddc39d2a5 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 29 May 2019 17:51:34 +0200 +Subject: [PATCH] Enable "cross-compile" + +This adds support for building ATLAS without running any target code. In +order for this to work, the archdefs must contain some additional files +that would otherwise be built during various tuning steps; see the new +targets extra_get and extra_put in "CONFIG/ARCHS/Makefile". + +Even if the archdefs contain these additional files, cross compilation +is *not* automatically enabled. To activate it and disable tuning at +build time, add the option "-Si archdef 2" when running "configure". +--- + CONFIG/ARCHS/Makefile | 24 ++++++++++++++++++++++++ + bin/atlas_install.c | 2 ++ + makes/Make.aux | 10 +++++----- + makes/Make.bin | 22 ++++++++++++++++++++++ + makes/Make.l3tune | 6 ++++++ + makes/Make.sysinfo | 8 +++++++- + 6 files changed, 66 insertions(+), 6 deletions(-) + +diff --git a/CONFIG/ARCHS/Makefile b/CONFIG/ARCHS/Makefile +index 321e05c..e61b5a0 100644 +--- a/CONFIG/ARCHS/Makefile ++++ b/CONFIG/ARCHS/Makefile +@@ -211,3 +211,27 @@ ArchNew : $(mach) xnegflt + - cp $(BLDdir)/bin/INSTALL_LOG/?PerfSumm.txt $(adefd)/. + rm -f xnegflt + archput : sys_put kern_put gemm_put la_put ++ ++ifdef ATL_NOTUNE ++ ++# To avoid tuning, some extra files are needed. ++ ++extra_get : ++ - cp $(INCAdir)/atlas_type.h $(adefd)/kern/ ++ - cp $(INCAdir)/atlas_[sdcz]sysinfo.h $(adefd)/kern/ ++ - cp $(INCAdir)/atlas_[sd]lamch.h $(adefd)/kern/ ++ - cp $(INCAdir)/atlas_[sdcz]trsmXover.h $(adefd)/kern/ ++ - cp $(INCAdir)/atlas_[sdcz]syr*NX.h $(adefd)/kern/ ++ ++extra_put : ++ - cp $(adefd)/kern/atlas_type.h $(INCAdir)/. ++ - cp $(adefd)/kern/atlas_[sdcz]sysinfo.h $(INCAdir)/. ++ - cp $(adefd)/kern/atlas_[sd]lamch.h $(INCAdir)/. ++ - cp $(adefd)/kern/atlas_[sdcz]trsmXover.h $(INCAdir)/. ++ - cp $(adefd)/kern/atlas_[sdcz]syr*NX.h $(INCAdir)/. ++ ++ArchNew : extra_get ++ ++archput : extra_put ++ ++endif +diff --git a/bin/atlas_install.c b/bin/atlas_install.c +index de3eb3a..3c811e6 100644 +--- a/bin/atlas_install.c ++++ b/bin/atlas_install.c +@@ -697,6 +697,8 @@ void GoToTown(int ARCHDEF, int L1DEF, int TuneLA) + ATL_Cassert(system("make IBozoL1.grd\n")==0, + "USING BOZO L1 DEFAULTS", NULL); + } ++ if (ARCHDEF >= 2) ++ setenv("ATL_NOTUNE", "1", 1); + if (ARCHDEF) + DefInstall = !system("make IArchDef.grd\n"); + +diff --git a/makes/Make.aux b/makes/Make.aux +index 1f769c8..c793028 100644 +--- a/makes/Make.aux ++++ b/makes/Make.aux +@@ -113,23 +113,23 @@ clean : + + $(ATLFWAIT) : + cd $(BINdir) ; $(MAKE) xatlas_waitfile +-$(INCAdir)/atlas_type.h : $(ATLFWAIT) ++$(INCAdir)/atlas_type.h : | $(ATLFWAIT) + cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_type.h + $(ATLFWAIT) -f $(INCAdir)/atlas_type.h + sINCdep = $(INCAdir)/atlas_ssysinfo.h $(INCAdir)/atlas_type.h +-$(INCAdir)/atlas_ssysinfo.h : $(ATLFWAIT) ++$(INCAdir)/atlas_ssysinfo.h : | $(ATLFWAIT) + cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_ssysinfo.h + $(ATLFWAIT) -f $(INCAdir)/atlas_ssysinfo.h + dINCdep = $(INCAdir)/atlas_dsysinfo.h $(INCAdir)/atlas_type.h +-$(INCAdir)/atlas_dsysinfo.h : $(ATLFWAIT) ++$(INCAdir)/atlas_dsysinfo.h : | $(ATLFWAIT) + cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_dsysinfo.h + $(ATLFWAIT) -f $(INCAdir)/atlas_dsysinfo.h + cINCdep = $(INCAdir)/atlas_csysinfo.h $(INCAdir)/atlas_type.h +-$(INCAdir)/atlas_csysinfo.h : $(ATLFWAIT) ++$(INCAdir)/atlas_csysinfo.h : | $(ATLFWAIT) + cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_csysinfo.h + $(ATLFWAIT) -f $(INCAdir)/atlas_csysinfo.h + zINCdep = $(INCAdir)/atlas_zsysinfo.h $(INCAdir)/atlas_type.h +-$(INCAdir)/atlas_zsysinfo.h : $(ATLFWAIT) ++$(INCAdir)/atlas_zsysinfo.h : | $(ATLFWAIT) + cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_zsysinfo.h + $(ATLFWAIT) -f $(INCAdir)/atlas_zsysinfo.h + +diff --git a/makes/Make.bin b/makes/Make.bin +index 1035cb9..acad578 100644 +--- a/makes/Make.bin ++++ b/makes/Make.bin +@@ -163,7 +163,9 @@ IRunMADef : + cd $(SYSdir) ; $(MAKE) RunMADef pre=$(pre) + + IRunMMDef : ++ifndef ATL_NOTUNE + cd $(MMTdir) ; $(MAKE) RunMMDef pre=$(pre) ++endif + cd $(MMTdir) ; ./xemit_mm -p $(pre) -R -2 + cd $(MMTdir) ; $(MAKE) install pre=$(pre) + IKillL1 : force_build +@@ -303,22 +305,42 @@ INSTALL_LOG/$(pre)bestTT_$(nb)x$(nb)x$(nb) : \ + cp $(MMTdir)/res/$(pre)bestTT_$(nb)x$(nb)x$(nb) INSTALL_LOG/. + + $(R1Tdir)/res/$(pre)R2K.sum : $(R1Tdir)/res/$(pre)R1K.sum force_build ++ifdef ATL_NOTUNE ++ cd $(R1Tdir) ; $(MAKE) $(pre)r2install ++else + cd $(R1Tdir) ; $(MAKE) res/$(pre)R2K.sum pre=$(pre) ++endif + $(R1Tdir)/res/$(pre)R1K.sum : force_build ++ifdef ATL_NOTUNE ++ cd $(R1Tdir) ; $(MAKE) $(pre)r1install ++else + cd $(R1Tdir) ; $(MAKE) res/$(pre)R1K.sum pre=$(pre) ++endif + INSTALL_LOG/$(pre)R1K.sum : $(R1Tdir)/res/$(pre)R1K.sum + cp $(R1Tdir)/res/$(pre)R1K.sum INSTALL_LOG/. + INSTALL_LOG/$(pre)R2K.sum : INSTALL_LOG/$(pre)R1K.sum \ + $(R1Tdir)/res/$(pre)R2K.sum + cp $(R1Tdir)/res/$(pre)R2K.sum INSTALL_LOG/. ++ifndef ATL_NOTUNE + cd $(R1Tdir) ; $(MAKE) $(pre)nxtune ++else ++ cd $(BLDdir)/src/blas/reference/level2 ; make $(pre)lib ++endif + + $(MVTdir)/res/$(pre)MVNK.sum : force_build ++ifdef ATL_NOTUNE ++ cd $(MVTdir) ; $(MAKE) $(pre)mvninstall ++else + cd $(MVTdir) ; $(MAKE) res/$(pre)MVNK.sum pre=$(pre) ++endif + INSTALL_LOG/$(pre)MVNK.sum : $(MVTdir)/res/$(pre)MVNK.sum + cp $(MVTdir)/res/$(pre)MVNK.sum INSTALL_LOG/. + $(MVTdir)/res/$(pre)MVTK.sum : force_build ++ifdef ATL_NOTUNE ++ cd $(MVTdir) ; $(MAKE) $(pre)mvtinstall ++else + cd $(MVTdir) ; $(MAKE) res/$(pre)MVTK.sum pre=$(pre) ++endif + INSTALL_LOG/$(pre)MVTK.sum : $(MVTdir)/res/$(pre)MVTK.sum + cp $(MVTdir)/res/$(pre)MVTK.sum INSTALL_LOG/. + +diff --git a/makes/Make.l3tune b/makes/Make.l3tune +index eaf7d7d..cd7f5f1 100644 +--- a/makes/Make.l3tune ++++ b/makes/Make.l3tune +@@ -118,6 +118,7 @@ res/atlas_strsmXover.h : + cp $(strsmXover) res/. + + stsmfc : ++ifndef ATL_NOTUNE + rm -f $(strsmXover) + cd $(L3Bdir) ; $(MAKE) slib + $(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Upper_ \ +@@ -128,6 +129,7 @@ stsmfc : + tran=NoTranspose_ diag=$(diag) + $(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Lower_ \ + tran=Transpose_ diag=$(diag) ++endif + cd $(L3Bdir) ; $(MAKE) slib + dtrsmXover = $(INCAdir)/atlas_dtrsmXover.h + +@@ -138,6 +140,7 @@ res/atlas_dtrsmXover.h : + cp $(dtrsmXover) res/. + + dtsmfc : ++ifndef ATL_NOTUNE + rm -f $(dtrsmXover) + cd $(L3Bdir) ; $(MAKE) dlib + $(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Upper_ \ +@@ -148,6 +151,7 @@ dtsmfc : + tran=NoTranspose_ diag=$(diag) + $(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Lower_ \ + tran=Transpose_ diag=$(diag) ++endif + cd $(L3Bdir) ; $(MAKE) dlib + qtrsmXover = $(INCAdir)/atlas_qtrsmXover.h + +@@ -158,6 +162,7 @@ res/atlas_qtrsmXover.h : + cp $(qtrsmXover) res/. + + qtsmfc : ++ifndef ATL_NOTUNE + rm -f $(qtrsmXover) + cd $(L3Bdir) ; $(MAKE) qlib + $(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Upper_ \ +@@ -168,6 +173,7 @@ qtsmfc : + tran=NoTranspose_ diag=$(diag) + $(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Lower_ \ + tran=Transpose_ diag=$(diag) ++endif + cd $(L3Bdir) ; $(MAKE) qlib + + $(pre)tsmfc.o : force_build +diff --git a/makes/Make.sysinfo b/makes/Make.sysinfo +index 2b7dfdc..8e5dab2 100644 +--- a/makes/Make.sysinfo ++++ b/makes/Make.sysinfo +@@ -5,6 +5,7 @@ maxlat=6 + mflop=200 + flags= + ++ifndef ATL_NOTUNE + sTestFlags : force_build + $(MAKE) srbob `cat res/sBEST` pre='s' type=float + +@@ -85,12 +86,14 @@ RunLamch : xemit_lamch + cp res/atlas_?lamch.h $(INCAdir)/. + RunTyp: xemit_typ + $(ATLRUN) $(SYSdir) xemit_typ > $(INCAdir)/atlas_type.h ++endif + + xemit_buildinfo : emit_buildinfo.o + $(XCC) $(XCCFLAGS) -o $@ emit_buildinfo.o + xsyssum : GetSysSum.o + $(XCC) $(XCCFLAGS) -o $@ GetSysSum.o + ++ifndef ATL_NOTUNE + xL1 : time.o L1CacheSize.o + $(KC) $(KCFLAGS) -o $@ L1CacheSize.o time.o + +@@ -125,6 +128,7 @@ smatime.o : $(mySRCdir)/matime.c + $(KC) -c $(KCFLAGS) -DSREAL $(mySRCdir)/matime.c + xmasrch : $(mySRCdir)/masrch.c + $(XCC) $(XCCFLAGS) -o $@ $(mySRCdir)/masrch.c ++endif + + ATL_cputime.c : + cp $(mySRCdir)/ATL_cputime.c . +@@ -143,6 +147,8 @@ emit_buildinfo.o : $(mySRCdir)/emit_buildinfo.c + $(XCC) -c $(XCCFLAGS) $(mySRCdir)/emit_buildinfo.c + GetSysSum.o : $(INCAdir)/atlas_type.h $(mySRCdir)/GetSysSum.c + $(XCC) -c $(XCCFLAGS) $(mySRCdir)/GetSysSum.c ++ ++ifndef ATL_NOTUNE + time.o : $(mySRCdir)/time.c + $(KC) -c $(KCFLAGS) -I./ $(mySRCdir)/time.c + emit_lamch.o : $(mySRCdir)/emit_lamch.c +@@ -155,7 +161,7 @@ findNT.o : $(mySRCdir)/findNT.c + $(KC) -c $(KCFLAGS) $(mySRCdir)/findNT.c + tlb.o : $(mySRCdir)/tlb.c + $(KC) -c $(KCFLAGS) $(mySRCdir)/tlb.c +- ++endif + + + force_build : +-- +2.17.0 + diff -Nru atlas-3.10.2/debian/patches/0002-Fix-rpath-link-command-line-options.patch atlas-3.10.3/debian/patches/0002-Fix-rpath-link-command-line-options.patch --- atlas-3.10.2/debian/patches/0002-Fix-rpath-link-command-line-options.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/0002-Fix-rpath-link-command-line-options.patch 2019-02-26 15:53:14.000000000 +0000 @@ -0,0 +1,46 @@ +From eb2e2f7ddbbffabb78015c5e7d2625e54d6c2de1 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 19 Feb 2019 19:20:19 +0100 +Subject: [PATCH 2/6] Fix -rpath-link command line options + +The "-rpath-link" command line options were written in the wrong syntax, +causing errors in the build. This is fixed. +--- + makes/Make.lib | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/makes/Make.lib b/makes/Make.lib +index 4ceff02..b322a32 100644 +--- a/makes/Make.lib ++++ b/makes/Make.lib +@@ -47,11 +47,11 @@ cshared : fat_cshared + # + LDTRY_WIN: + $(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \ +- -rpath-link $(LIBINSTdir) --output-def=$(outdef) \ ++ -rpath-link=$(LIBINSTdir) --output-def=$(outdef) \ + --whole-archive $(libas) --no-whole-archive $(LIBS) + GCCTRY_WIN: + $(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \ +- -Wl,"-rpath-link $(LIBINSTdir)" \ ++ -Wl,"-rpath-link=$(LIBINSTdir)" \ + -Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS) + GCCTRY_norp_WIN: + $(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \ +@@ -113,11 +113,11 @@ TRYALL_WIN : + # + LDTRY: + $(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \ +- -rpath-link $(LIBINSTdir) \ ++ -rpath-link=$(LIBINSTdir) \ + --whole-archive $(libas) --no-whole-archive $(LIBS) + GCCTRY: + $(GOODGCC) -shared -o $(outso) \ +- -Wl,"-rpath-link $(LIBINSTdir)" \ ++ -Wl,"-rpath-link=$(LIBINSTdir)" \ + -Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS) + GCCTRY_norp: + $(GOODGCC) -shared -o $(outso) \ +-- +2.17.0 + diff -Nru atlas-3.10.2/debian/patches/0003-Fix-SIMD-support-on-IBM-z13.patch atlas-3.10.3/debian/patches/0003-Fix-SIMD-support-on-IBM-z13.patch --- atlas-3.10.2/debian/patches/0003-Fix-SIMD-support-on-IBM-z13.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/0003-Fix-SIMD-support-on-IBM-z13.patch 2019-02-26 15:53:14.000000000 +0000 @@ -0,0 +1,55 @@ +From f8affd6ecb4f1df3c58a372d235a24d269491f39 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 5 Dec 2018 18:59:15 +0100 +Subject: [PATCH 3/6] Fix SIMD support on IBM z13 + +The header file atlas_simd.h contained a syntax error and a few functional +errors that affected IBM z13. It prevented any SIMD kernels from being +compiled successfully for that platform. This is fixed. The macro +vec_madd is avoided, because some GCC versions don't implement it +correctly; the equivalent GCC builtin __builtin_s390_vec_madd is used +instead. +--- + include/atlas_simd.h | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/include/atlas_simd.h b/include/atlas_simd.h +index baee6b1..68daf79 100644 +--- a/include/atlas_simd.h ++++ b/include/atlas_simd.h +@@ -69,7 +69,7 @@ + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_VXZ) +- #if ATL_VLEN != 2; ++ #if ATL_VLEN != 2 + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_NEON) +@@ -390,19 +390,19 @@ + #define ATL_vld(v_, p_) v_ = vec_ld2f(p_); + #define ATL_vst(p_, v_) vec_st2f(v_, p_); + #endif +- #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0) ++ #define ATL_vzero(v_) v_ = vec_splats((double)0.0) + #define ATL_vcopy(d_, s_) d_ = s_ +- #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_))) ++ #define ATL_vbcast(v_, p_) v_ = vec_splats((double)*((TYPE*)(p_))) + #define ATL_vuld(v_, p_) ATL_vld(v_, p_) + #define ATL_vust(p_, v_) ATL_vst(p_, v_) + #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_ + #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_ + #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_ +- #define ATL_vmac(d_, s1_, s2_) d_ = vec_madd(s1_, s2_, d_) ++ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_) + #define ATL_vvrsum1(s0_) \ + { ATL_VTYPE t_;\ + t_ = vec_splat(s0_, 1); \ +- s0 += t_; \ ++ s0_ += t_; \ + } + #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0) + #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1) +-- +2.17.0 + diff -Nru atlas-3.10.2/debian/patches/0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch atlas-3.10.3/debian/patches/0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch --- atlas-3.10.2/debian/patches/0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch 2019-02-26 15:53:14.000000000 +0000 @@ -0,0 +1,46 @@ +From 6d4ea0752e0790e318f64618a87978e6590e3549 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 12 Dec 2018 19:44:32 +0100 +Subject: [PATCH 4/6] Read L1 data cache size from sysconf if possible + +The probing of the L1 data cache size is sometimes not reliable. This can +cause the tuning to yield varying, sub-obtimal results. But on Linux the +L1 data cache size can usually be retrieved with sysconf instead, which is +faster and more reliable. Do this whenever possible. +--- + tune/sysinfo/L1CacheSize.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/tune/sysinfo/L1CacheSize.c b/tune/sysinfo/L1CacheSize.c +index e62a273..dffa76e 100644 +--- a/tune/sysinfo/L1CacheSize.c ++++ b/tune/sysinfo/L1CacheSize.c +@@ -30,6 +30,7 @@ + + #include + #include ++#include + + #define REPS 4096 + +@@ -276,7 +277,16 @@ int main(int nargs, char *args[]) + exit(-1); + } + if (nargs > 1) MaxSize = atoi(args[1]); +- L1Size = GetL1Size(MaxSize, 1.08); ++ ++#ifdef _SC_LEVEL1_DCACHE_SIZE ++ { ++ long res = sysconf(_SC_LEVEL1_DCACHE_SIZE); ++ L1Size = res > 0 ? (int) (res / 1024) : 0; ++ } ++#endif ++ ++ if (!L1Size) ++ L1Size = GetL1Size(MaxSize, 1.08); + if (!L1Size) + L1Size = GetL1Size(MaxSize, 1.08); + if (!L1Size) +-- +2.17.0 + diff -Nru atlas-3.10.2/debian/patches/0005-Optimizations-for-IBM-z13.patch atlas-3.10.3/debian/patches/0005-Optimizations-for-IBM-z13.patch --- atlas-3.10.2/debian/patches/0005-Optimizations-for-IBM-z13.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/0005-Optimizations-for-IBM-z13.patch 2019-02-26 15:53:14.000000000 +0000 @@ -0,0 +1,68 @@ +From 2cd0edf97d0a8ecf168519e1013d7674a8cc40eb Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 12 Dec 2018 20:06:27 +0100 +Subject: [PATCH 5/6] Optimizations for IBM z13 + +Perform some optimizations for IBM z13: +- Compile with -O2 instead of -O. +- Streamline vector loads/stores. +- Define the vvrsum2 macro. + +Also, use the compile option -march=z13 instead of -march=native. +--- + CONFIG/src/atlcomp.txt | 8 +++----- + include/atlas_simd.h | 11 +++++------ + 2 files changed, 8 insertions(+), 11 deletions(-) + +diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt +index aa31604..2ac71cf 100644 +--- a/CONFIG/src/atlcomp.txt ++++ b/CONFIG/src/atlcomp.txt +@@ -246,12 +246,10 @@ MACH=IBMz9,IBMz10,IBMz196 OS=ALL LVL=500 COMPS=f77 + 'gfortran' '-O3 -funroll-loops' + MACH=IBMz9,IBMz10,IBMz196,IBMz12 OS=ALL LVL=500 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc + 'gcc' '-O3 -funroll-loops' +-MACH=IBMz13 OS=ALL LVL=1000 COMPS=dmc,skc,dkc,icc,xcc,gcc +- 'gcc' '-march=native -O -mvx -mzvector' +-MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc +- 'gcc' '-march=native -O -mvx -mzvector -fno-peephole -fno-peephole2' ++MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc ++ 'gcc' '-march=z13 -mtune=z13 -O2' + MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77 +- 'gfortran' '-march=native -O -mvx -mzvector' ++ 'gfortran' '-march=z13 -mtune=z13 -O2' + # + # Windows defaults ; need to make SSE/SSE2 arch dep. + # +diff --git a/include/atlas_simd.h b/include/atlas_simd.h +index 68daf79..f171933 100644 +--- a/include/atlas_simd.h ++++ b/include/atlas_simd.h +@@ -384,8 +384,8 @@ + #endif + #define ATL_VTYPE vector double + #if (defined(DREAL) || defined(DCPLX)) +- #define ATL_vld(v_, p_) {v_[0] = *(p_); v_[1] = (p_)[1]; } +- #define ATL_vst(p_, v_) {*(p_) = v_[0]; (p_)[1] = v_[1];} ++ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_) ++ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_ + #else + #define ATL_vld(v_, p_) v_ = vec_ld2f(p_); + #define ATL_vst(p_, v_) vec_st2f(v_, p_); +@@ -400,10 +400,9 @@ + #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_ + #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_) + #define ATL_vvrsum1(s0_) \ +- { ATL_VTYPE t_;\ +- t_ = vec_splat(s0_, 1); \ +- s0_ += t_; \ +- } ++ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); } ++ #define ATL_vvrsum2(s0_, s1_) \ ++ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); } + #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0) + #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1) + #elif defined(ATL_NEON) && (defined(SREAL) || defined(SCPLX)) +-- +2.17.0 + diff -Nru atlas-3.10.2/debian/patches/0006-Add-IBM-z14-support.patch atlas-3.10.3/debian/patches/0006-Add-IBM-z14-support.patch --- atlas-3.10.2/debian/patches/0006-Add-IBM-z14-support.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/0006-Add-IBM-z14-support.patch 2019-03-12 17:46:29.000000000 +0000 @@ -0,0 +1,272 @@ +From 6e15b2ed6c0a517a841914d42afed8ab35a0d70b Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Thu, 13 Dec 2018 17:32:49 +0100 +Subject: [PATCH 6/6] Add IBM z14 support + +Add general support for IBM z14. Also detect and handle the vector +enhancements facility 1, which specifically adds single-precision FP +arithmetic for vectors. +--- + CONFIG/include/atlconf.h | 14 ++++---- + CONFIG/src/Makefile | 6 ++++ + CONFIG/src/atlcomp.txt | 4 +++ + CONFIG/src/backend/Make.ext | 4 ++- + CONFIG/src/backend/archinfo_linux.c | 3 +- + CONFIG/src/backend/probe_vxz2.c | 12 +++++++ + CONFIG/src/probe_comp.c | 3 +- + include/atlas_prefetch.h | 3 +- + include/atlas_simd.h | 53 +++++++++++++++++++++++++++++ + 9 files changed, 91 insertions(+), 11 deletions(-) + create mode 100644 CONFIG/src/backend/probe_vxz2.c + +Index: atlas-3.10.3/CONFIG/include/atlconf.h +=================================================================== +--- atlas-3.10.3.orig/CONFIG/include/atlconf.h ++++ atlas-3.10.3/CONFIG/include/atlconf.h +@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, + * Corei3EP: v3 Haswell, E5-26XX + * Corei4: skylake + */ +-#define NMACH 63 ++#define NMACH 64 + static char *machnam[NMACH] = + {"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5", + "POWER6", "POWER7", "POWER8", "POWERe6500", +- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", ++ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14", + "x86x87", "x86SSE1", "x86SSE2", "x86SSE3", + "P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo", + "CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3", +@@ -42,7 +42,7 @@ static char *machnam[NMACH] = + "ARM64xgene1", "ARM64a53", "ARM64a57", "GENERIC"}; + enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5, + IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500, +- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, /* s390(x) in Linux */ ++ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */ + x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */ + IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS, + IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2, +@@ -83,7 +83,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, + #define MachIsARM64(mach_) \ + ( (mach_) >= ARM64xg && || (mach_) <= ARM64a57) + #define MachIsS390(mach_) \ +- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ13 ) ++ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 ) + + + static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"}; +@@ -97,13 +97,13 @@ enum F2CNAME {f2c_NamErr=0, f2c_Add_, f2 + enum F2CINT {f2c_IntErr=0, FintCint, FintClong, FintClonglong, FintCshort}; + enum F2CSTRING {f2c_StrErr=0, fstrSun, fstrCray, fstrStructVal, fstrStructPtr}; + +-#define NISA 15 ++#define NISA 16 + static char *ISAXNAM[NISA] = +- {"", "VSX", "VXZ", "AltiVec", ++ {"", "VSX", "VXZ2", "VXZ", "AltiVec", + "AVXMAC", "AVXFMA4", "AVX", "SSE3", "SSE2", "SSE1", "3DNow", + "FPV3D2MACNEON", "FPV3D16MACNEON", "FPV3D32MAC", "FPV3D16MAC"}; + enum ISAEXT +- {ISA_None=0, ISA_VSX, ISA_VXZ, ISA_AV, ++ {ISA_None=0, ISA_VSX, ISA_VXZ2, ISA_VXZ, ISA_AV, + ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX, ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow, + ISA_NEON, ISA_NEON16, ISA_VFP3D32MAC, ISA_VFP3D16MAC}; + +Index: atlas-3.10.3/CONFIG/src/Makefile +=================================================================== +--- atlas-3.10.3.orig/CONFIG/src/Makefile ++++ atlas-3.10.3/CONFIG/src/Makefile +@@ -158,6 +158,12 @@ IRun_NEON : + $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_neon args="$(args)" \ + redir=config0.out + - cat config0.out ++IRun_VXZ2 : ++ $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz2 \ ++ $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_vxz2.c ++ $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_vxz2 args="$(args)" \ ++ redir=config0.out ++ - cat config0.out + IRun_VXZ : + $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz \ + $(SRCdir)/backend/probe_dvec.c $(SRCdir)/backend/probe_vxz.c +Index: atlas-3.10.3/CONFIG/src/atlcomp.txt +=================================================================== +--- atlas-3.10.3.orig/CONFIG/src/atlcomp.txt ++++ atlas-3.10.3/CONFIG/src/atlcomp.txt +@@ -250,6 +250,10 @@ MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dm + 'gcc' '-march=z13 -mtune=z13 -O2' + MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77 + 'gfortran' '-march=z13 -mtune=z13 -O2' ++MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc ++ 'gcc' '-march=z14 -mtune=z14 -O2' ++MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77 ++ 'gfortran' '-march=z13 -mtune=z14 -O2' + # + # Windows defaults ; need to make SSE/SSE2 arch dep. + # +Index: atlas-3.10.3/CONFIG/src/backend/Make.ext +=================================================================== +--- atlas-3.10.3.orig/CONFIG/src/backend/Make.ext ++++ atlas-3.10.3/CONFIG/src/backend/Make.ext +@@ -39,7 +39,7 @@ files = archinfo_aix.c archinfo_freebsd. + probe_gas_mips.S probe_gas_parisc.S probe_gas_ppc.S probe_gas_s390.S \ + probe_gas_sparc.S probe_gas_wow64.S probe_gas_x8632.S \ + probe_gas_x8664.S probe_smac.c probe_svec.c probe_this_asm.c \ +- probe_vxz.c ++ probe_vxz2.c probe_vxz.c + + all : $(files) + +@@ -107,6 +107,8 @@ flibchkF.f : $(basf) + $(extF) -b $(basf) -o flibchkF.f rout=flibchkF.f + probe_arm32_FPABI.c : $(basf) + $(extC) -b $(basf) -o probe_arm32_FPABI.c rout=probe_arm32_FPABI ++probe_vxz2.c : $(basf) ++ $(extC) -b $(basf) -o probe_vxz2.c rout=probe_vxz2 + probe_vxz.c : $(basf) + $(extC) -b $(basf) -o probe_vxz.c rout=probe_vxz + probe_aff_SETAFFNP.c : $(basf) +Index: atlas-3.10.3/CONFIG/src/backend/archinfo_linux.c +=================================================================== +--- atlas-3.10.3.orig/CONFIG/src/backend/archinfo_linux.c ++++ atlas-3.10.3/CONFIG/src/backend/archinfo_linux.c +@@ -336,7 +336,8 @@ enum MACHTYPE ProbeArch() + else if (strstr(res, "2817") || strstr(res, "2818")) mach = IbmZ196; + else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12; + else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13; +- else mach = IbmZ13; /* looks risky to me, but IBM folks did it */ ++ else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14; ++ else mach = IbmZ14; /* looks risky to me, but IBM folks did it */ + free(res); + } + break; +Index: atlas-3.10.3/CONFIG/src/backend/probe_vxz2.c +=================================================================== +--- /dev/null ++++ atlas-3.10.3/CONFIG/src/backend/probe_vxz2.c +@@ -0,0 +1,12 @@ ++#include ++void do_vsum(float *z, float *x, float *y) // RETURNS: z = x + y ++{ ++ vector float vx, vy; ++ vx = (vector float) {x[0], x[1], x[2], x[3]}; ++ vy = (vector float) {y[0], y[1], y[2], y[3]}; ++ vy += vx; ++ z[0] = vy[0]; ++ z[1] = vy[1]; ++ z[2] = vy[2]; ++ z[3] = vy[3]; ++} +Index: atlas-3.10.3/CONFIG/src/probe_comp.c +=================================================================== +--- atlas-3.10.3.orig/CONFIG/src/probe_comp.c ++++ atlas-3.10.3/CONFIG/src/probe_comp.c +@@ -452,7 +452,7 @@ COMPNODE **GetDefaultComps(enum OSTYPE O + vp = "-mavx2 -mfma"; + else if (vecexts & (1< ++ ++ #define ATL_VPERMI(s_, t_, i_) \ ++ ((ATL_VTYPE) vec_permi((vector double) s_, (vector double) t_, i_)) ++ ++ #if defined(SREAL) || defined(SCPLX) ++ #define ATL_VTYPE vector float ++ #if ATL_VLEN != 4 ++ #error "VSXZ2 supports only VLEN = 4 for floats!" ++ #endif ++ #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \ ++ { ATL_VTYPE t0_, t1_; \ ++ t0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); \ ++ t1_ = vec_mergeh(s2_, s3_) + vec_mergel(s2_, s3_); \ ++ s0_ = ATL_VPERMI(t0_, t1_, 0) + ATL_VPERMI(t0_, t1_, 3); \ ++ } ++ #define ATL_vsplat2(d_, s_) d_ = vec_splat(s_, 2) ++ #define ATL_vsplat3(d_, s_) d_ = vec_splat(s_, 3) ++ #else /* double precision */ ++ #define ATL_VTYPE vector double ++ #if ATL_VLEN != 2 ++ #error "VSXZ2 supports only VLEN = 2 for doubles!" ++ #endif ++ #define ATL_vvrsum1(s0_) \ ++ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); } ++ #define ATL_vvrsum2(s0_, s1_) \ ++ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); } ++ #endif ++ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_) ++ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_ ++ #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0) ++ #define ATL_vcopy(d_, s_) d_ = s_ ++ #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_))) ++ #define ATL_vuld(v_, p_) v_ = vec_xl(0, (TYPE *)(p_)) ++ #define ATL_vust(p_, v_) vec_xst(v_, 0, (TYPE *)(p_)) ++ #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_ ++ #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_ ++ #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_ ++ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_) ++ #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0) ++ #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1) + #elif defined(ATL_VXZ) + #include + diff -Nru atlas-3.10.2/debian/patches/02_rename_lapack_atlas.diff atlas-3.10.3/debian/patches/02_rename_lapack_atlas.diff --- atlas-3.10.2/debian/patches/02_rename_lapack_atlas.diff 2014-07-12 10:45:54.000000000 +0000 +++ atlas-3.10.3/debian/patches/02_rename_lapack_atlas.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,109 +0,0 @@ -#! /bin/sh /usr/share/dpatch/dpatch-run -## 02_rename_lapack_atlas.dpatch.dpatch by -## -## All lines beginning with `## DP:' are a description of the patch. -## DP: Rename ATLAS incomplete lapack to lapack_atlas. - -@DPATCH@ ---- a/CONFIG/src/SpewMakeInc.c -+++ b/CONFIG/src/SpewMakeInc.c -@@ -595,7 +595,7 @@ int main(int nargs, char **args) - fprintf(fpout, " ATLASlib = $(LIBdir)/libatlas.a\n"); - fprintf(fpout, " CBLASlib = $(LIBdir)/libcblas.a\n"); - fprintf(fpout, " F77BLASlib = $(LIBdir)/libf77blas.a\n"); -- fprintf(fpout, " LAPACKlib = $(LIBdir)/liblapack.a\n"); -+ fprintf(fpout, " LAPACKlib = $(LIBdir)/liblapack_atlas.a\n"); - if (THREADS) - { - fprintf(fpout, " PTCBLASlib = $(LIBdir)/libptcblas.a\n"); ---- a/Make.top -+++ b/Make.top -@@ -657,8 +657,8 @@ install_inc: - install_lib: - cp $(LIBdir)/libatlas.a $(INSTdir)/. - cp $(LIBdir)/libcblas.a $(INSTdir)/. -- cp $(LIBdir)/liblapack.a $(INSTdir)/. -- chmod 0644 $(INSTdir)/libatlas.a $(INSTdir)/liblapack.a \ -+ cp $(LIBdir)/liblapack_atlas.a $(INSTdir)/. -+ chmod 0644 $(INSTdir)/libatlas.a $(INSTdir)/liblapack_atlas.a \ - $(INSTdir)/libcblas.a - - cp $(LIBdir)/libf77blas.a $(INSTdir)/. - - chmod 0644 $(INSTdir)/libf77blas.a ---- a/makes/Make.lib -+++ b/makes/Make.lib -@@ -4,10 +4,10 @@ mySRCdir = $(SRCdir)/lib - # - # override with libatlas.so only when atlas is built to one lib - # --DYNlibs = liblapack.so libf77blas.so libcblas.so libatlas.so --PTDYNlibs = liblapack.so libptf77blas.so libptcblas.so libatlas.so --CDYNlibs = liblapack.so libcblas.so libatlas.so --CPTDYNlibs = liblapack.so libptcblas.so libatlas.so -+DYNlibs = liblapack_atlas.so libf77blas.so libcblas.so libatlas.so -+PTDYNlibs = liblapack_atlas.so libptf77blas.so libptcblas.so libatlas.so -+CDYNlibs = liblapack_atlas.so libcblas.so libatlas.so -+CPTDYNlibs = liblapack_atlas.so libptcblas.so libatlas.so - - VER=3.10.2 - tmpd = RCW_tMp -@@ -26,7 +26,7 @@ $(tarnam).tar.bz2 : - cp $(LIBdir)/libatlas.a $(ARCH)/lib/. - cp $(LIBdir)/libf77blas.a $(ARCH)/lib/. - cp $(LIBdir)/libcblas.a $(ARCH)/lib/. -- cp $(LIBdir)/liblapack.a $(ARCH)/lib/. -+ cp $(LIBdir)/liblapack_atlas.a $(ARCH)/lib/. - - cp $(LIBdir)/libptcblas.a $(ARCH)/lib/. - - cp $(LIBdir)/libptf77blas.a $(ARCH)/lib/. - $(TAR) cf $(tarnam).tar $(ARCH) -@@ -184,7 +184,7 @@ fat_ptshared : - LIBINSTdir="$(LIBINSTdir)" - fat_shared : # serial target - $(MAKE) TRYALL outso=libsatlas.so \ -- libas="liblapack.a libf77blas.a libcblas.a libatlas.a" \ -+ libas="liblapack_atlas.a libf77blas.a libcblas.a libatlas.a" \ - LIBINSTdir="$(LIBINSTdir)" - # - # Builds shared lib, not include fortran codes from LAPACK -@@ -198,10 +198,10 @@ fat_cshared : libclapack.a - libas="libclapack.a libcblas.a libatlas.a" \ - LIBINSTdir="$(LIBINSTdir)" - --libclapack.a : liblapack.a -+libclapack.a : liblapack_atlas.a - rm -rf clapack libclapack.a - mkdir clapack -- cd clapack ; ar x ../liblapack.a -+ cd clapack ; ar x ../liblapack_atlas.a - rm -f clapack/*f77wrap* clapack/*C2F* - ar r libclapack.a clapack/ATL_* clapack/clapack_* - rm -rf clapack -@@ -229,7 +229,7 @@ tdlls: # thread - LIBINSTdir="$(LIBINSTdir)" - sdlls: # serial target - $(MAKE) TRYALL_WIN outso=libsatlas.dll outdef=libsatlas.def \ -- libas="liblapack.a libf77blas.a libcblas.a libatlas.a" \ -+ libas="liblapack_atlas.a libf77blas.a libcblas.a libatlas.a" \ - LIBINSTdir="$(LIBINSTdir)" - cdlls: ctdlls csdlls - ctdlls: libptclapack.a # threaded target -@@ -246,7 +246,7 @@ csdlls: libclapack.a # serial - # ======================================================================= - dylib : - rm -rf $(tmpd) ; mkdir $(tmpd) -- cd $(tmpd) ; ar x ../liblapack.a -+ cd $(tmpd) ; ar x ../liblapack_atlas.a - cd $(tmpd) ; ar x ../libf77blas.a - cd $(tmpd) ; ar x ../libcblas.a - cd $(tmpd) ; ar x ../libatlas.a -@@ -283,9 +283,9 @@ ptcdylib : libptclapack.a - -compatibility_version $(VER) *.o $(LIBS) - rm -rf $(tmpd) - --libclapack.dylib : libcblas.dylib libatlas.dylib liblapack.a -+libclapack.dylib : libcblas.dylib libatlas.dylib liblapack_atlas.a - rm -rf $(tmpd) ; mkdir $(tmpd) -- cd $(tmpd) ; ar x ../liblapack.a -+ cd $(tmpd) ; ar x ../liblapack_atlas.a - rm -f $(tmpd)/*C2F $(tmpd)/*f77wrap* - cd $(tmpd) ; libtool -dynamic -o ../libclapack.dylib \ - -install_name $(LIBINSTdir)/libclapack.dylib \ diff -Nru atlas-3.10.2/debian/patches/10_s390.diff atlas-3.10.3/debian/patches/10_s390.diff --- atlas-3.10.2/debian/patches/10_s390.diff 2014-07-12 10:46:15.000000000 +0000 +++ atlas-3.10.3/debian/patches/10_s390.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ ---- a/CONFIG/src/SpewMakeInc.c -+++ b/CONFIG/src/SpewMakeInc.c -@@ -394,7 +394,7 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - if (MachIsMIPS(arch)) - return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); - if (MachIsS390(arch)) -- return((ptrbits == 64) ? "-m64" : "-m31"); -+ return(sp); - if (!CompIsGcc(comp)) - { - /* -@@ -774,8 +774,6 @@ int main(int nargs, char **args) - fprintf(fpout, "_fbsd"); - } - } -- if (MachIsS390(mach)) -- fprintf(fpout, ptrbits == 32 ? "-m31" : "-m64"); - fprintf(fpout, "\n F77SYSLIB = %s\n", f77lib ? f77lib : ""); - fprintf(fpout, " BC = $(KC)\n"); - fprintf(fpout, " NCFLAGS = $(KCFLAGS)\n"); ---- a/CONFIG/src/probe_comp.c -+++ b/CONFIG/src/probe_comp.c -@@ -587,7 +587,7 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - if (MachIsMIPS(arch)) - return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); - if (MachIsS390(arch)) -- return((ptrbits == 64) ? "-m64" : "-m31"); -+ return(sp); - if (!CompIsGcc(comp)) - { - /* diff -Nru atlas-3.10.2/debian/patches/16_warning-removed.diff atlas-3.10.3/debian/patches/16_warning-removed.diff --- atlas-3.10.2/debian/patches/16_warning-removed.diff 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/patches/16_warning-removed.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -Index: ATLAS/Make.top -=================================================================== ---- ATLAS.orig/Make.top 2013-01-09 17:42:11.000000000 +0100 -+++ ATLAS/Make.top 2013-01-09 17:42:11.000000000 +0100 -@@ -9,7 +9,7 @@ - $(BZIP) error_$(ARCH).tar - - leafstart: -- cd $(leaf) ; ln -s $(BLDdir)/Make.inc Make.inc -+ cd $(leaf) ; if test -h Make.inc; then rm Make.inc; fi; ln -s $(BLDdir)/Make.inc Make.inc - - build: - cd bin/ ; $(MAKE) xatlas_build diff -Nru atlas-3.10.2/debian/patches/17_hppa.diff atlas-3.10.3/debian/patches/17_hppa.diff --- atlas-3.10.2/debian/patches/17_hppa.diff 2014-07-12 10:46:32.000000000 +0000 +++ atlas-3.10.3/debian/patches/17_hppa.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,39 +0,0 @@ ---- a/CONFIG/include/atlconf.h -+++ b/CONFIG/include/atlconf.h -@@ -63,7 +63,13 @@ enum MACHTYPE {MACHOther, IbmPwr3, IbmPw - ( (mach_) == ARMv7 ) - #define MachIsS390(mach_) \ - ( (mach_) >= IbmZ9 && (mach_) <= IbmZ196 ) -- -+#ifdef __hppa__ -+#define MachIsHPPA(mach_) \ -+ ( __hppa__ ) -+#else -+#define MachIsHPPA(mach_) \ -+ ( 0 ) -+#endif - - static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"}; - static char *f2c_intstr[5] = ---- a/CONFIG/src/SpewMakeInc.c -+++ b/CONFIG/src/SpewMakeInc.c -@@ -391,6 +391,8 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - - if (MachIsIA64(arch)) - return(sp); -+ if (MachIsHPPA(arch)) -+ return(sp); - if (MachIsMIPS(arch)) - return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); - if (MachIsS390(arch)) ---- a/CONFIG/src/probe_comp.c -+++ b/CONFIG/src/probe_comp.c -@@ -584,6 +584,8 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - - if (MachIsIA64(arch)) - return(sp); -+ if (MachIsHPPA(arch)) -+ return(sp); - if (MachIsMIPS(arch)) - return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); - if (MachIsS390(arch)) diff -Nru atlas-3.10.2/debian/patches/18_alpha.diff atlas-3.10.3/debian/patches/18_alpha.diff --- atlas-3.10.2/debian/patches/18_alpha.diff 2014-07-12 10:46:35.000000000 +0000 +++ atlas-3.10.3/debian/patches/18_alpha.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ ---- a/CONFIG/include/atlconf.h -+++ b/CONFIG/include/atlconf.h -@@ -70,6 +70,13 @@ enum MACHTYPE {MACHOther, IbmPwr3, IbmPw - #define MachIsHPPA(mach_) \ - ( 0 ) - #endif -+#ifdef __alpha__ -+#define MachIsALPHA(mach_) \ -+ ( __alpha__ ) -+#else -+#define MachIsALPHA(mach_) \ -+ ( 0 ) -+#endif - - static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"}; - static char *f2c_intstr[5] = ---- a/CONFIG/src/SpewMakeInc.c -+++ b/CONFIG/src/SpewMakeInc.c -@@ -393,6 +393,8 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - return(sp); - if (MachIsHPPA(arch)) - return(sp); -+ if (MachIsALPHA(arch)) -+ return(sp); - if (MachIsMIPS(arch)) - return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); - if (MachIsS390(arch)) ---- a/CONFIG/src/probe_comp.c -+++ b/CONFIG/src/probe_comp.c -@@ -586,6 +586,8 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - return(sp); - if (MachIsHPPA(arch)) - return(sp); -+ if (MachIsALPHA(arch)) -+ return(sp); - if (MachIsMIPS(arch)) - return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); - if (MachIsS390(arch)) diff -Nru atlas-3.10.2/debian/patches/20_armel.diff atlas-3.10.3/debian/patches/20_armel.diff --- atlas-3.10.2/debian/patches/20_armel.diff 2014-07-12 10:46:38.000000000 +0000 +++ atlas-3.10.3/debian/patches/20_armel.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,39 +0,0 @@ ---- a/CONFIG/include/atlconf.h -+++ b/CONFIG/include/atlconf.h -@@ -77,6 +77,14 @@ enum MACHTYPE {MACHOther, IbmPwr3, IbmPw - #define MachIsALPHA(mach_) \ - ( 0 ) - #endif -+#ifdef __ARMEL__ -+#define MachIsARMEL(mach_) \ -+ ( __ARMEL__ ) -+#else -+#define MachIsARMEL(mach_) \ -+ ( 0 ) -+#endif -+ - - static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"}; - static char *f2c_intstr[5] = ---- a/CONFIG/src/SpewMakeInc.c -+++ b/CONFIG/src/SpewMakeInc.c -@@ -395,6 +395,8 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - return(sp); - if (MachIsALPHA(arch)) - return(sp); -+ if (MachIsARMEL(arch)) -+ return(sp); - if (MachIsMIPS(arch)) - return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); - if (MachIsS390(arch)) ---- a/CONFIG/src/probe_comp.c -+++ b/CONFIG/src/probe_comp.c -@@ -588,6 +588,8 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - return(sp); - if (MachIsALPHA(arch)) - return(sp); -+ if (MachIsARMEL(arch)) -+ return(sp); - if (MachIsMIPS(arch)) - return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); - if (MachIsS390(arch)) diff -Nru atlas-3.10.2/debian/patches/21_mips2.diff atlas-3.10.3/debian/patches/21_mips2.diff --- atlas-3.10.2/debian/patches/21_mips2.diff 2014-07-12 10:46:40.000000000 +0000 +++ atlas-3.10.3/debian/patches/21_mips2.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ ---- a/CONFIG/src/SpewMakeInc.c -+++ b/CONFIG/src/SpewMakeInc.c -@@ -398,7 +398,7 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - if (MachIsARMEL(arch)) - return(sp); - if (MachIsMIPS(arch)) -- return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); -+ return(sp); - if (MachIsS390(arch)) - return(sp); - if (!CompIsGcc(comp)) ---- a/CONFIG/src/probe_comp.c -+++ b/CONFIG/src/probe_comp.c -@@ -591,7 +591,7 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - if (MachIsARMEL(arch)) - return(sp); - if (MachIsMIPS(arch)) -- return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); -+ return(sp); - if (MachIsS390(arch)) - return(sp); - if (!CompIsGcc(comp)) diff -Nru atlas-3.10.2/debian/patches/22_sh.diff atlas-3.10.3/debian/patches/22_sh.diff --- atlas-3.10.2/debian/patches/22_sh.diff 2014-07-12 10:46:43.000000000 +0000 +++ atlas-3.10.3/debian/patches/22_sh.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,39 +0,0 @@ ---- a/CONFIG/include/atlconf.h -+++ b/CONFIG/include/atlconf.h -@@ -85,6 +85,14 @@ enum MACHTYPE {MACHOther, IbmPwr3, IbmPw - ( 0 ) - #endif - -+#ifdef __sh__ -+#define MachIsSH(mach_) \ -+ ( __sh__ ) -+#else -+#define MachIsSH(mach_) \ -+ ( 0 ) -+#endif -+ - - static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"}; - static char *f2c_intstr[5] = ---- a/CONFIG/src/SpewMakeInc.c -+++ b/CONFIG/src/SpewMakeInc.c -@@ -399,6 +399,8 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - return(sp); - if (MachIsMIPS(arch)) - return(sp); -+ if (MachIsSH(arch)) -+ return(sp); - if (MachIsS390(arch)) - return(sp); - if (!CompIsGcc(comp)) ---- a/CONFIG/src/probe_comp.c -+++ b/CONFIG/src/probe_comp.c -@@ -594,6 +594,8 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu - return(sp); - if (MachIsS390(arch)) - return(sp); -+ if (MachIsSH(arch)) -+ return(sp); - if (!CompIsGcc(comp)) - { - /* diff -Nru atlas-3.10.2/debian/patches/armel-is-v4t.diff atlas-3.10.3/debian/patches/armel-is-v4t.diff --- atlas-3.10.2/debian/patches/armel-is-v4t.diff 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/patches/armel-is-v4t.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ -Description: On armel, do not use asm constructs provided by ATLAS - Since 3.10, ATLAS incorporates asm constructs for ARM processors. However, - these are too recent for the Debian armel port (which is ARMv4t). This patch - ensures that these asm constructs are not used on that port. -Author: Sébastien Villemot -Forwarded: no -Last-Update: 2013-06-07 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ ---- a/CONFIG/src/backend/probe_gas_arm.S -+++ b/CONFIG/src/backend/probe_gas_arm.S -@@ -1,5 +1,8 @@ - #define ATL_GAS_ARM - #include "atlas_asm.h" -+#ifdef __ARM_ARCH_4T__ -+#error "Debian armel is only v4t, but ATLAS needs a more recent ISA" -+#endif - # - # Linux ARM assembler for: - # int asm_probe(int i) diff -Nru atlas-3.10.2/debian/patches/armhf.diff atlas-3.10.3/debian/patches/armhf.diff --- atlas-3.10.2/debian/patches/armhf.diff 2014-07-12 10:46:49.000000000 +0000 +++ atlas-3.10.3/debian/patches/armhf.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ -Description: Do not use soft-float ABI on armhf - See http://math-atlas.sourceforge.net/errata.html#armhardfp. - Also remove -mfpu=vfpv3 flag, because some armhf machines have only vfpv3-d16 - (i.e. 16 FPU registers instead of 32). -Author: Sébastien Villemot -Last-Update: 2013-06-09 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ ---- a/CONFIG/src/atlcomp.txt -+++ b/CONFIG/src/atlcomp.txt -@@ -259,13 +259,13 @@ MACH=P4,PM OS=WinNT,Win64 LVL=0 COMPS=ic - # ARM defaults - # - MACH=ARMv7 OS=ALL LVL=1000 COMPS=xcc -- 'gcc' '-mcpu=cortex-a8 -O1 -mfpu=vfpv3 -mfloat-abi=softfp ' -+ 'gcc' '-mcpu=cortex-a8 -O1 ' - MACH=ARMv7 OS=ALL LVL=1000 COMPS=smc,skc,gcc,icc -- 'gcc' '-O1 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=vfpv3 -mfloat-abi=softfp -fno-expensive-optimizations' -+ 'gcc' '-O1 -mcpu=cortex-a8 -mtune=cortex-a8 -fno-expensive-optimizations' - MACH=ARMv7 OS=ALL LVL=1000 COMPS=dmc,dkc -- 'gcc' '-O1 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=vfpv3 -mfloat-abi=softfp -fno-schedule-insns2' -+ 'gcc' '-O1 -mcpu=cortex-a8 -mtune=cortex-a8 -fno-schedule-insns2' - MACH=ARMv7 OS=ALL LVL=1000 COMPS=f77 -- 'gfortran' '-mcpu=cortex-a8 -mfpu=vfpv3 -mfloat-abi=softfp -O' -+ 'gfortran' '-mcpu=cortex-a8 -O' - # - # Generic defaults - # diff -Nru atlas-3.10.2/debian/patches/cpu-throttling-check.diff atlas-3.10.3/debian/patches/cpu-throttling-check.diff --- atlas-3.10.2/debian/patches/cpu-throttling-check.diff 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/patches/cpu-throttling-check.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,30 +0,0 @@ -Description: Reenable the configuration flag for disable CPU throttling check - For the generic package, we need the ability to compile on machines with CPU - throttling enabled (since some buildds have that feature). -Author: Sébastien Villemot -Forwarded: not-needed -Last-Update: 2013-06-11 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ ---- a/CONFIG/src/config.c -+++ b/CONFIG/src/config.c -@@ -928,10 +928,8 @@ - " -Si nocygwin <0/1> : Do/don't depend on GPL cygwin library\n"); - fprintf(stderr, - " (Windows compiler/cygwin install only)\n"); --/* Disabled due to abuse - fprintf(stderr, - " -Si cputhrchk <0/1> : Ignore/heed CPU throttle probe\n"); -- */ - fprintf(stderr, - " -tl <#> : set # of threads, use list of affinity IDs\n"); - fprintf(stderr, -@@ -1129,6 +1127,8 @@ - *NoF77 = k; - else if (!strcmp(sp0, "nocygwin")) - *NoCygwin = k; -+ else if (!strcmp(sp0, "cputhrchk")) -+ *ThrChk = k; - else if (!strcmp(sp0, "kern")) - gcc3 = sp; - else if (!strcmp(sp0, "ADdir") || !strcmp(sp0, "addir")) diff -Nru atlas-3.10.2/debian/patches/fix-typos.patch atlas-3.10.3/debian/patches/fix-typos.patch --- atlas-3.10.2/debian/patches/fix-typos.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/fix-typos.patch 2018-04-29 17:33:16.000000000 +0000 @@ -0,0 +1,161 @@ +Description: Fix typos +Author: Sébastien Villemot +Forwarded: https://sourceforge.net/p/math-atlas/patches/78/, https://github.com/math-atlas/math-atlas/pull/31 +Applied-Upstream: 3.10.4 +Last-Update: 2017-08-25 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/TexDoc/atlas_devel.tex ++++ b/TexDoc/atlas_devel.tex +@@ -226,7 +226,7 @@ but all new codes should do so. + if (cond) statement; + \end{verbatim} + \item Comments are either to the left of a line if there is room, or on +- lines preceeding the commented code if not, using a style like: ++ lines preceding the commented code if not, using a style like: + \vspace*{-0.1in} + \begin{verbatim} + /* +--- a/bin/stattime.c ++++ b/bin/stattime.c +@@ -768,7 +768,7 @@ int GetFlags(int nargs, char **args, FIL + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "no '-' preceeding flag!", i); ++ PrintUsage(args[0], "no '-' preceding flag!", i); + switch(args[i][1]) + { + case 's': +--- a/results/atl2tvec.c ++++ b/results/atl2tvec.c +@@ -787,7 +787,7 @@ int GetFlags /* returns: DOWHAT (AVG,MI + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "no '-' preceeding flag!", i); ++ PrintUsage(args[0], "no '-' preceding flag!", i); + switch(args[i][1]) + { + case 'c': +--- a/results/cattvecs.c ++++ b/results/cattvecs.c +@@ -39,7 +39,7 @@ char **GetFlags /* RETURNS: arra + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "no '-' preceeding flag!", i); ++ PrintUsage(args[0], "no '-' preceding flag!", i); + switch(args[i][1]) + { + case '#': /* -# <# files> */ +--- a/results/mergetvecs.c ++++ b/results/mergetvecs.c +@@ -41,7 +41,7 @@ char **GetFlags /* RETURNS: arra + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "no '-' preceeding flag!", i); ++ PrintUsage(args[0], "no '-' preceding flag!", i); + switch(args[i][1]) + { + case 'i': /* -i[1,2] */ +--- a/results/perctvecs.c ++++ b/results/perctvecs.c +@@ -40,7 +40,7 @@ char **GetFlags /* RETURNS: arra + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "no '-' preceeding flag!", i); ++ PrintUsage(args[0], "no '-' preceding flag!", i); + switch(args[i][1]) + { + case 'i': /* -i */ +--- a/results/reducetvec.c ++++ b/results/reducetvec.c +@@ -43,7 +43,7 @@ char **GetFlags /* RETURNS: arra + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "no '-' preceeding flag!", i); ++ PrintUsage(args[0], "no '-' preceding flag!", i); + switch(args[i][1]) + { + case 'i': /* -i */ +--- a/results/stattime.c ++++ b/results/stattime.c +@@ -768,7 +768,7 @@ int GetFlags(int nargs, char **args, FIL + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "no '-' preceeding flag!", i); ++ PrintUsage(args[0], "no '-' preceding flag!", i); + switch(args[i][1]) + { + case 's': +--- a/results/tvec2plp.c ++++ b/results/tvec2plp.c +@@ -31,7 +31,7 @@ char **GetFlags /* RETURNS: arra + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "no '-' preceeding flag!", i); ++ PrintUsage(args[0], "no '-' preceding flag!", i); + switch(args[i][1]) + { + case 'i': /* -i */ +--- a/tune/blas/gemv/mvktime.c ++++ b/tune/blas/gemv/mvktime.c +@@ -584,7 +584,7 @@ void GetFlags(int nargs, char **args, in + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "No '-' preceeding flag!", i); ++ PrintUsage(args[0], "No '-' preceding flag!", i); + switch(args[i][1]) + { + case 'f' : /* set resfile output */ +--- a/tune/blas/ger/r1ktime.c ++++ b/tune/blas/ger/r1ktime.c +@@ -553,7 +553,7 @@ void GetFlags(int nargs, char **args, in + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "No '-' preceeding flag!", i); ++ PrintUsage(args[0], "No '-' preceding flag!", i); + switch(args[i][1]) + { + case 'f' : /* set resfile output */ +--- a/tune/blas/ger/r2ktime.c ++++ b/tune/blas/ger/r2ktime.c +@@ -577,7 +577,7 @@ void GetFlags(int nargs, char **args, in + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], "No '-' preceeding flag!", i); ++ PrintUsage(args[0], "No '-' preceding flag!", i); + switch(args[i][1]) + { + case 'f' : /* set resfile output */ +--- a/tune/blas/ger/s1nxtune.c ++++ b/tune/blas/ger/s1nxtune.c +@@ -299,7 +299,7 @@ int GetFlags(int nargs, char **args, enu + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], i, "No '-' preceeding flag!"); ++ PrintUsage(args[0], i, "No '-' preceding flag!"); + switch(args[i][1]) + { + case 's' : +--- a/tune/blas/ger/s2nxtune.c ++++ b/tune/blas/ger/s2nxtune.c +@@ -300,7 +300,7 @@ int GetFlags(int nargs, char **args, enu + for (i=1; i < nargs; i++) + { + if (args[i][0] != '-') +- PrintUsage(args[0], i, "No '-' preceeding flag!"); ++ PrintUsage(args[0], i, "No '-' preceding flag!"); + switch(args[i][1]) + { + case 's' : diff -Nru atlas-3.10.2/debian/patches/generic.diff atlas-3.10.3/debian/patches/generic.diff --- atlas-3.10.2/debian/patches/generic.diff 2014-07-12 10:49:41.000000000 +0000 +++ atlas-3.10.3/debian/patches/generic.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,41 +0,0 @@ -Description: Add a GENERIC machine type - In practice this type will have the same effect than the UNKNOWN machine type, - except that it does not try to autodetect the machine CPU. - This is useful when trying to build a generic package on an arch for which - ATLAS does not define a generic machine type. For example, on a armel buildd - with ARMv7 CPU, ATLAS would detect ARMv7 if provided the UNKNOWN type. -Author: Sébastien Villemot -Bug-Debian: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=719355 -Last-Update: 2014-07-12 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ ---- a/CONFIG/include/atlconf.h -+++ b/CONFIG/include/atlconf.h -@@ -18,7 +18,7 @@ enum OSTYPE {OSOther=0, OSLinux, OSSunOS - enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS, - AFARM, AFS390}; - --#define NMACH 52 -+#define NMACH 53 - static char *machnam[NMACH] = - {"UNKNOWN", "POWER3", "POWER4", "POWER5", "PPCG4", "PPCG5", - "POWER6", "POWER7", "POWERe6500", "IBMz9", "IBMz10", "IBMz196", -@@ -29,7 +29,7 @@ static char *machnam[NMACH] = - "Efficeon", "K7", "HAMMER", "AMD64K10h", "AMDLLANO", "AMDDOZER","AMDDRIVER", - "UNKNOWNx86", "IA64Itan", "IA64Itan2", - "USI", "USII", "USIII", "USIV", "UST1", "UST2", "UnknownUS", -- "MIPSR1xK", "MIPSICE9", "ARMv7"}; -+ "MIPSR1xK", "MIPSICE9", "ARMv7", "GENERIC"}; - enum MACHTYPE {MACHOther, IbmPwr3, IbmPwr4, IbmPwr5, PPCG4, PPCG5, - IbmPwr6, IbmPwr7, Pwre6500, - IbmZ9, IbmZ10, IbmZ196, /* s390(x) in Linux */ -@@ -42,7 +42,8 @@ enum MACHTYPE {MACHOther, IbmPwr3, IbmPw - SunUSI, SunUSII, SunUSIII, SunUSIV, SunUST1, SunUST2, SunUSX, - MIPSR1xK, /* includes R10K, R12K, R14K, R16K */ - MIPSICE9, /* SiCortex ICE9 -- like MIPS5K */ -- ARMv7 /* includes Cortex A8, A9 */ -+ ARMv7, /* includes Cortex A8, A9 */ -+ GENERIC - }; - #define MachIsX86(mach_) \ - ( (mach_) >= x86x87 && (mach_) <= x86X ) diff -Nru atlas-3.10.2/debian/patches/generic.patch atlas-3.10.3/debian/patches/generic.patch --- atlas-3.10.2/debian/patches/generic.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/generic.patch 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,42 @@ +Description: Add a GENERIC machine type + In practice this type will have the same effect than the UNKNOWN machine type, + except that it does not try to autodetect the machine CPU. + This is useful when trying to build a generic package on an arch for which + ATLAS does not define a generic machine type. For example, on a armel buildd + with ARMv7 CPU, ATLAS would detect ARMv7 if provided the UNKNOWN type. +Bug-Debian: https://bugs.debian.org/719355 +Author: Sébastien Villemot +Forwarded: not-needed +Last-Update: 2017-08-16 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/CONFIG/include/atlconf.h ++++ b/CONFIG/include/atlconf.h +@@ -25,7 +25,7 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, + * Corei3EP: v3 Haswell, E5-26XX + * Corei4: skylake + */ +-#define NMACH 62 ++#define NMACH 63 + static char *machnam[NMACH] = + {"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5", + "POWER6", "POWER7", "POWER8", "POWERe6500", +@@ -39,7 +39,7 @@ static char *machnam[NMACH] = + "USI", "USII", "USIII", "USIV", "UST1", "UST2", "UnknownUS", + "MIPSR1xK", "MIPSICE9", + "ARMa7", "ARMa9", "ARMa15", "ARMa17", +- "ARM64xgene1", "ARM64a53", "ARM64a57"}; ++ "ARM64xgene1", "ARM64a53", "ARM64a57", "GENERIC"}; + enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5, + IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500, + IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, /* s390(x) in Linux */ +@@ -58,7 +58,8 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, + ARM17, /* lots of tablets */ + ARM64xg, /* includes ARMv8 */ + ARM64a53, +- ARM64a57 ++ ARM64a57, ++ GENERIC + }; + #define MachIsX86(mach_) \ + ( (mach_) >= x86x87 && (mach_) <= x86X ) diff -Nru atlas-3.10.2/debian/patches/kfreebsd.diff atlas-3.10.3/debian/patches/kfreebsd.diff --- atlas-3.10.2/debian/patches/kfreebsd.diff 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/patches/kfreebsd.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ ---- a/CONFIG/src/SpewMakeInc.c -+++ b/CONFIG/src/SpewMakeInc.c -@@ -778,8 +778,10 @@ - fprintf(fpout, " -melf_i386"); - else if (ptrbits == 64) - fprintf(fpout, " -melf_x86_64"); -- if (OS == OSFreeBSD) -- fprintf(fpout, "_fbsd"); -+ #ifdef __FreeBSD_kernel__ -+ fprintf(fpout, "_fbsd"); -+#endif -+ - } - } - fprintf(fpout, "\n F77SYSLIB = %s\n", f77lib ? f77lib : ""); diff -Nru atlas-3.10.2/debian/patches/kfreebsd.patch atlas-3.10.3/debian/patches/kfreebsd.patch --- atlas-3.10.2/debian/patches/kfreebsd.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/kfreebsd.patch 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,35 @@ +Description: Ensure that kfreebsd-* is detected as FreeBSD + The "GNU" test is moved at the end, otherwise kfreebsd is recognized as Linux. + This is necessary in order to have correct linker flags. +Forwarded: https://sourceforge.net/p/math-atlas/patches/19/ +Applied-Upstream: 3.10.4 +Last-Update: 2017-08-28 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/CONFIG/src/probe_OS.c ++++ b/CONFIG/src/probe_OS.c +@@ -18,11 +18,7 @@ enum OSTYPE ProbeOS(int verb, char *targ + res = atlsys_1L(targ, cmnd, verb, 0); + if (res) + { +-/* +- * Accept GNU (HURD) as Linux, since they seem to use same stuff; +- * This is patch from Sylvestre Ledru; I have no direct experience wt HURD +- */ +- if(strstr(res, "Linux") || strstr(res, "GNU")) OS = OSLinux; ++ if(strstr(res, "Linux")) OS = OSLinux; + else if(strstr(res, "FreeBSD")) OS = OSFreeBSD; + else if (strstr(res, "Darwin")) OS = OSOSX; + else if(strstr(res, "SunOS")) +@@ -59,6 +55,11 @@ enum OSTYPE ProbeOS(int verb, char *targ + else ierr = 1; + } + else if (strstr(res, "HP-UX")) OS = OSHPUX; ++/* ++ * Accept GNU (HURD) as Linux, since they seem to use same stuff; ++ * This is patch from Sylvestre Ledru; I have no direct experience wt HURD ++ */ ++ else if(strstr(res, "GNU")) OS = OSLinux; + else ierr = 1; + free(res); + } diff -Nru atlas-3.10.2/debian/patches/mips.patch atlas-3.10.3/debian/patches/mips.patch --- atlas-3.10.2/debian/patches/mips.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/mips.patch 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,30 @@ +Description: On mips{,el}, do not pass -mabi=n32 flag. + The mips{,el} ports use the O32 ABI. + See https://www.linux-mips.org/wiki/MIPS_ABI_History for more details on ABIs. +Bug: https://sourceforge.net/p/math-atlas/support-requests/1053/ +Applied-Upstream: 3.10.4 +Last-Update: 2017-08-25 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/CONFIG/src/SpewMakeInc.c ++++ b/CONFIG/src/SpewMakeInc.c +@@ -402,7 +402,7 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu + if (MachIsIA64(arch)) + return(sp); + if (MachIsMIPS(arch)) +- return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); ++ return(sp); + if (MachIsS390(arch)) + return((ptrbits == 64) ? "-m64" : "-m31"); + if (OS == OSAIX) +--- a/CONFIG/src/probe_comp.c ++++ b/CONFIG/src/probe_comp.c +@@ -596,7 +596,7 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu + if (MachIsIA64(arch)) + return(sp); + if (MachIsMIPS(arch)) +- return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32"); ++ return(sp); + if (MachIsS390(arch)) + return((ptrbits == 64) ? "-m64" : "-m31"); + if (OS == OSAIX) diff -Nru atlas-3.10.2/debian/patches/missing-cflags.patch atlas-3.10.3/debian/patches/missing-cflags.patch --- atlas-3.10.2/debian/patches/missing-cflags.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/missing-cflags.patch 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,17 @@ +Description: Add missing CFLAGS when building xmmgen_sse +Author: Sébastien Villemot +Forwarded: no +Last-Update: 2017-08-08 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/makes/Make.mmtune ++++ b/makes/Make.mmtune +@@ -155,7 +155,7 @@ mmsearch.o : $(mySRCdir)/mmsearch.c $(pa + SSEGENOUTDIR : + mkdir SSEGENOUTDIR + xmmgen_sse : $(mySRCdir)/mmgen_sse.c +- $(XCC) -o $@ $(mySRCdir)/mmgen_sse.c -lm ++ $(XCC) $(XCCFLAGS) -o $@ $(mySRCdir)/mmgen_sse.c -lm + xmmksearch_sse : xmmgen_sse mmksearch_sse.o SSEGENOUTDIR + $(XCC) $(XCCFLAGS) -o $@ mmksearch_sse.o + mmksearch_sse.o : $(mySRCdir)/mmksearch_sse.c $(parsedeps) diff -Nru atlas-3.10.2/debian/patches/powerpc-dcbt.patch atlas-3.10.3/debian/patches/powerpc-dcbt.patch --- atlas-3.10.2/debian/patches/powerpc-dcbt.patch 2015-11-23 18:12:58.000000000 +0000 +++ atlas-3.10.3/debian/patches/powerpc-dcbt.patch 2017-09-09 11:19:09.000000000 +0000 @@ -174,7 +174,7 @@ fmadd rC01, rA0, rB1, rC01 --- a/tune/blas/gemm/CASES/ATL_dmm8x4x2_vsx.c +++ b/tune/blas/gemm/CASES/ATL_dmm8x4x2_vsx.c -@@ -52,7 +52,7 @@ static inline vector TYPE vec_mergel(vec +@@ -60,7 +60,7 @@ static inline vector TYPE vec_mergel(vec #ifndef ATL_GOT_L1PREFETCH #ifdef _ARCH_PPC #undef ATL_pfl1R diff -Nru atlas-3.10.2/debian/patches/ppc64el-abiv2.patch atlas-3.10.3/debian/patches/ppc64el-abiv2.patch --- atlas-3.10.2/debian/patches/ppc64el-abiv2.patch 2014-11-15 12:01:09.000000000 +0000 +++ atlas-3.10.3/debian/patches/ppc64el-abiv2.patch 2017-09-09 11:19:09.000000000 +0000 @@ -1,11 +1,11 @@ -Origin: http://sourceforge.net/p/math-atlas/patches/65/#3cb1 -Forwarded: http://sourceforge.net/p/math-atlas/patches/65/ Description: ELFv2 ABI changes for ppc64el For more details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1080073#c40 +Bug-Debian: https://bugs.debian.org/766695 +Forwarded: http://sourceforge.net/p/math-atlas/patches/65/ +Origin: http://sourceforge.net/p/math-atlas/patches/65/#3cb1 +Reviewed-by: Sébastien Villemot Last-Update: 2014-10-24 -Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=766695 -Reviewed-By: Sébastien Villemot --- a/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c +++ b/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c @@ -268,7 +268,7 @@ Mjoin(.,ATL_USERMM): diff -Nru atlas-3.10.2/debian/patches/ppc64el-ifdef-files-with-lvx.patch atlas-3.10.3/debian/patches/ppc64el-ifdef-files-with-lvx.patch --- atlas-3.10.2/debian/patches/ppc64el-ifdef-files-with-lvx.patch 2014-11-15 12:01:43.000000000 +0000 +++ atlas-3.10.3/debian/patches/ppc64el-ifdef-files-with-lvx.patch 2017-09-09 11:19:09.000000000 +0000 @@ -12,11 +12,11 @@ . For more details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1080073#c40 -Author: Mauricio Faria de Oliveira +Bug-Debian: https://bugs.debian.org/766695 Forwarded: http://sourceforge.net/p/math-atlas/patches/65/ +Author: Mauricio Faria de Oliveira +Reviewed-by: Sébastien Villemot Last-Update: 2014-10-28 -Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=766695 -Reviewed-By: Sébastien Villemot --- a/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c +++ b/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c @@ -7,6 +7,11 @@ @@ -69,7 +69,7 @@ #include #include "atlas_misc.h" #include "atlas_prefetch.h" /* ATL_pfl1R, ATL_pfl1W */ -@@ -651,3 +655,5 @@ void ATL_USERMM +@@ -659,3 +663,5 @@ void ATL_USERMM } } } diff -Nru atlas-3.10.2/debian/patches/ppc64el-new-archdef-name.patch atlas-3.10.3/debian/patches/ppc64el-new-archdef-name.patch --- atlas-3.10.2/debian/patches/ppc64el-new-archdef-name.patch 2014-11-15 12:01:52.000000000 +0000 +++ atlas-3.10.3/debian/patches/ppc64el-new-archdef-name.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -Origin: https://bugzilla.redhat.com/show_bug.cgi?id=1080073#c43 -Forwarded: http://sourceforge.net/p/math-atlas/patches/66/ -Description: Append 'LE' to archdef on little-endian PowerPC64 - For more details, see: - https://bugzilla.redhat.com/show_bug.cgi?id=1080073#c40 -Last-Update: 2014-10-24 -Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=766695 -Reviewed-By: Sébastien Villemot ---- a/CONFIG/src/SpewMakeInc.c -+++ b/CONFIG/src/SpewMakeInc.c -@@ -550,6 +550,10 @@ int main(int nargs, char **args) - fprintf(fpout, "# -------------------------------------------------\n"); - fprintf(fpout, " ARCH = %s", machnam[mach]); - fprintf(fpout, "%d", ptrbits); -+ /* for ppc64le archi add 'LE' characters */ -+ #if defined(__powerpc64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -+ fprintf(fpout, "%s", "LE"); -+ #endif - if (ISAX) - fprintf(fpout, "%s", ISAXNAM[ISAX]); - if (!USEIEEE) diff -Nru atlas-3.10.2/debian/patches/ppc64-endianness.patch atlas-3.10.3/debian/patches/ppc64-endianness.patch --- atlas-3.10.2/debian/patches/ppc64-endianness.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/ppc64-endianness.patch 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,19 @@ +Description: Fix endianness detection on ppc64* + The __ORDER_LITTLE_ENDIAN__ macro is defined even on big endian systems. +Author: Sébastien Villemot +Forwarded: https://sourceforge.net/p/math-atlas/patches/79/ +Applied-Upstream: 3.10.4 +Last-Update: 2017-08-25 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/CONFIG/src/SpewMakeInc.c ++++ b/CONFIG/src/SpewMakeInc.c +@@ -557,7 +557,7 @@ int main(int nargs, char **args) + fprintf(fpout, "# -------------------------------------------------\n"); + fprintf(fpout, " ARCH = %s", machnam[mach]); + fprintf(fpout, "%d", ptrbits); +- #if defined(__powerpc64__) && defined(__ORDER_LITTLE_ENDIAN__) ++ #if defined(__powerpc64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + fprintf(fpout, "LE"); + #endif + if (ISAX) diff -Nru atlas-3.10.2/debian/patches/rename-lapack-atlas.patch atlas-3.10.3/debian/patches/rename-lapack-atlas.patch --- atlas-3.10.2/debian/patches/rename-lapack-atlas.patch 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/debian/patches/rename-lapack-atlas.patch 2017-09-09 11:19:09.000000000 +0000 @@ -0,0 +1,114 @@ +Description: Rename ATLAS incomplete liblapack.a to liblapack_atlas.a + The liblapack.a created by ATLAS build system is incomplete: it only contains + a small subset of the full LAPACK API (only those optimized by ATLAS). + Hence rename this library to liblapack_atlas.a to avoid a name conflict with + the full liblapack.a. + In particular, note that the "-Ss flapack" option of the configure script does + not work as expected as of ATLAS 3.10.3. +Author: +Forwarded: not-needed +Last-Update: 2017-08-08 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/CONFIG/src/SpewMakeInc.c ++++ b/CONFIG/src/SpewMakeInc.c +@@ -615,7 +615,7 @@ int main(int nargs, char **args) + fprintf(fpout, " ATLASlib = $(LIBdir)/libatlas.a\n"); + fprintf(fpout, " CBLASlib = $(LIBdir)/libcblas.a\n"); + fprintf(fpout, " F77BLASlib = $(LIBdir)/libf77blas.a\n"); +- fprintf(fpout, " LAPACKlib = $(LIBdir)/liblapack.a\n"); ++ fprintf(fpout, " LAPACKlib = $(LIBdir)/liblapack_atlas.a\n"); + if (THREADS) + { + fprintf(fpout, " PTCBLASlib = $(LIBdir)/libptcblas.a\n"); +--- a/Make.top ++++ b/Make.top +@@ -657,8 +657,8 @@ install_inc: + install_lib: + cp $(LIBdir)/libatlas.a $(INSTdir)/. + cp $(LIBdir)/libcblas.a $(INSTdir)/. +- cp $(LIBdir)/liblapack.a $(INSTdir)/. +- chmod 0644 $(INSTdir)/libatlas.a $(INSTdir)/liblapack.a \ ++ cp $(LIBdir)/liblapack_atlas.a $(INSTdir)/. ++ chmod 0644 $(INSTdir)/libatlas.a $(INSTdir)/liblapack_atlas.a \ + $(INSTdir)/libcblas.a + - cp $(LIBdir)/libf77blas.a $(INSTdir)/. + - chmod 0644 $(INSTdir)/libf77blas.a +--- a/makes/Make.lib ++++ b/makes/Make.lib +@@ -4,10 +4,10 @@ mySRCdir = $(SRCdir)/lib + # + # override with libatlas.so only when atlas is built to one lib + # +-DYNlibs = liblapack.so libf77blas.so libcblas.so libatlas.so +-PTDYNlibs = liblapack.so libptf77blas.so libptcblas.so libatlas.so +-CDYNlibs = liblapack.so libcblas.so libatlas.so +-CPTDYNlibs = liblapack.so libptcblas.so libatlas.so ++DYNlibs = liblapack_atlas.so libf77blas.so libcblas.so libatlas.so ++PTDYNlibs = liblapack_atlas.so libptf77blas.so libptcblas.so libatlas.so ++CDYNlibs = liblapack_atlas.so libcblas.so libatlas.so ++CPTDYNlibs = liblapack_atlas.so libptcblas.so libatlas.so + + VER=3.10.3 + tmpd = RCW_tMp +@@ -26,7 +26,7 @@ $(tarnam).tar.bz2 : + cp $(LIBdir)/libatlas.a $(ARCH)/lib/. + cp $(LIBdir)/libf77blas.a $(ARCH)/lib/. + cp $(LIBdir)/libcblas.a $(ARCH)/lib/. +- cp $(LIBdir)/liblapack.a $(ARCH)/lib/. ++ cp $(LIBdir)/liblapack_atlas.a $(ARCH)/lib/. + - cp $(LIBdir)/libptcblas.a $(ARCH)/lib/. + - cp $(LIBdir)/libptf77blas.a $(ARCH)/lib/. + $(TAR) cf $(tarnam).tar $(ARCH) +@@ -184,7 +184,7 @@ fat_ptshared : + LIBINSTdir="$(LIBINSTdir)" + fat_shared : # serial target + $(MAKE) TRYALL outso=libsatlas.so \ +- libas="liblapack.a libf77blas.a libcblas.a libatlas.a" \ ++ libas="liblapack_atlas.a libf77blas.a libcblas.a libatlas.a" \ + LIBINSTdir="$(LIBINSTdir)" + # + # Builds shared lib, not include fortran codes from LAPACK +@@ -198,10 +198,10 @@ fat_cshared : libclapack.a + libas="libclapack.a libcblas.a libatlas.a" \ + LIBINSTdir="$(LIBINSTdir)" + +-libclapack.a : liblapack.a ++libclapack.a : liblapack_atlas.a + rm -rf clapack libclapack.a + mkdir clapack +- cd clapack ; ar x ../liblapack.a ++ cd clapack ; ar x ../liblapack_atlas.a + rm -f clapack/*f77wrap* clapack/*C2F* + ar r libclapack.a clapack/ATL_* clapack/clapack_* + rm -rf clapack +@@ -229,7 +229,7 @@ tdlls: # thread + LIBINSTdir="$(LIBINSTdir)" + sdlls: # serial target + $(MAKE) TRYALL_WIN outso=libsatlas.dll outdef=libsatlas.def \ +- libas="liblapack.a libf77blas.a libcblas.a libatlas.a" \ ++ libas="liblapack_atlas.a libf77blas.a libcblas.a libatlas.a" \ + LIBINSTdir="$(LIBINSTdir)" + cdlls: ctdlls csdlls + ctdlls: libptclapack.a # threaded target +@@ -246,7 +246,7 @@ csdlls: libclapack.a # serial + # ======================================================================= + dylib : + rm -rf $(tmpd) ; mkdir $(tmpd) +- cd $(tmpd) ; ar x ../liblapack.a ++ cd $(tmpd) ; ar x ../liblapack_atlas.a + cd $(tmpd) ; ar x ../libf77blas.a + cd $(tmpd) ; ar x ../libcblas.a + cd $(tmpd) ; ar x ../libatlas.a +@@ -283,9 +283,9 @@ ptcdylib : libptclapack.a + -compatibility_version $(VER) *.o $(LIBS) + rm -rf $(tmpd) + +-libclapack.dylib : libcblas.dylib libatlas.dylib liblapack.a ++libclapack.dylib : libcblas.dylib libatlas.dylib liblapack_atlas.a + rm -rf $(tmpd) ; mkdir $(tmpd) +- cd $(tmpd) ; ar x ../liblapack.a ++ cd $(tmpd) ; ar x ../liblapack_atlas.a + rm -f $(tmpd)/*C2F $(tmpd)/*f77wrap* + cd $(tmpd) ; libtool -dynamic -o ../libclapack.dylib \ + -install_name $(LIBINSTdir)/libclapack.dylib \ diff -Nru atlas-3.10.2/debian/patches/series atlas-3.10.3/debian/patches/series --- atlas-3.10.2/debian/patches/series 2015-11-23 14:02:59.000000000 +0000 +++ atlas-3.10.3/debian/patches/series 2019-08-12 12:11:18.000000000 +0000 @@ -1,19 +1,17 @@ -02_rename_lapack_atlas.diff -static_full_blas_lapack.diff -shared_libraries.diff -10_s390.diff -16_warning-removed.diff -17_hppa.diff -18_alpha.diff -20_armel.diff -21_mips2.diff -22_sh.diff -kfreebsd.diff -armel-is-v4t.diff -armhf.diff -cpu-throttling-check.diff -generic.diff -ppc64el-new-archdef-name.patch +rename-lapack-atlas.patch +mips.patch +kfreebsd.patch +generic.patch ppc64el-abiv2.patch ppc64el-ifdef-files-with-lvx.patch powerpc-dcbt.patch +fix-typos.patch +missing-cflags.patch +ppc64-endianness.patch +0001-Avoid-c99-standard-compiler.patch +0002-Fix-rpath-link-command-line-options.patch +0003-Fix-SIMD-support-on-IBM-z13.patch +0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch +0005-Optimizations-for-IBM-z13.patch +0006-Add-IBM-z14-support.patch +0001-Enable-cross-compile.patch diff -Nru atlas-3.10.2/debian/patches/shared_libraries.diff atlas-3.10.3/debian/patches/shared_libraries.diff --- atlas-3.10.2/debian/patches/shared_libraries.diff 2014-07-16 18:29:59.000000000 +0000 +++ atlas-3.10.3/debian/patches/shared_libraries.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,130 +0,0 @@ -Description: Create shared versions of the libraries -Author: David Evans - Sébastien Villemot -Last-Update: 2014-07-16 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ ---- a/makes/Make.lib -+++ b/makes/Make.lib -@@ -33,6 +33,79 @@ $(tarnam).tar.bz2 : - rm -rf $(ARCH) - $(BZIP) --best $(tarnam).tar - -+fullshared: libatlas.so libcblas.so libf77blas.so liblapack_atlas.so atlas/libblas.so atlas/liblapack.so -+ -+libatlas.so: libatlas.so.3 -+ ln -sf $< $@ -+ -+libatlas.so.3: libatlas.so.3.0 -+ ln -sf $< $@ -+ -+libatlas.so.3.0 : libatlas.a -+ ld $(LDFLAGS) -shared -soname libatlas.so.3 -o $@ \ -+ --whole-archive libatlas.a --no-whole-archive -lc $(LIBS) $(F77SYSLIB) -+ -+libcblas.so: libcblas.so.3 -+ ln -sf $< $@ -+ -+libcblas.so.3: libcblas.so.3.0 -+ ln -sf $< $@ -+ -+libcblas.so.3.0 : libcblas.a libatlas.so -+ ld $(LDFLAGS) -shared -soname libcblas.so.3 -o $@ \ -+ --whole-archive libcblas.a \ -+ --no-whole-archive -L. -latlas $(F77SYSLIB) -lm -lc -+ -+libf77blas.so: libf77blas.so.3 -+ ln -sf $< $@ -+ -+libf77blas.so.3: libf77blas.so.3.0 -+ ln -sf $< $@ -+ -+libf77blas.so.3.0 : libf77blas.a libcblas.so libatlas.so -+ ld $(LDFLAGS) -shared -soname libf77blas.so.3 -o $@ \ -+ --whole-archive libf77blas.a \ -+ --no-whole-archive -L. -lcblas -latlas $(F77SYSLIB) -lm -lc -+ -+liblapack_atlas.so: liblapack_atlas.so.3 -+ ln -sf $< $@ -+ -+liblapack_atlas.so.3: liblapack_atlas.so.3.0 -+ ln -sf $< $@ -+ -+liblapack_atlas.so.3.0 : liblapack_atlas.a libatlas.so libcblas.so libf77blas.so -+ ld $(LDFLAGS) -shared -soname liblapack_atlas.so.3 -o $@ \ -+ --whole-archive liblapack_atlas.a \ -+ --no-whole-archive -L. -lf77blas -lcblas -latlas -lm -lc $(F77SYSLIB) -+ -+atlas/libblas.so: atlas/libblas.so.3 -+ (cd atlas && ln -sf libblas.so.3 libblas.so) -+ -+atlas/libblas.so.3: atlas/libblas.so.3.0 -+ (cd atlas && ln -sf libblas.so.3.0 libblas.so.3) -+ -+atlas/libblas.so.3.0: libf77blas.a libcblas.a libatlas.so.3.0 -+ if test -f libptf77blas.a -a -f libptcblas.a; then \ -+ ld $(LDFLAGS) -shared -soname libblas.so.3 -o $@ \ -+ --whole-archive libptf77blas.a libptcblas.a \ -+ --no-whole-archive -L. -latlas $(F77SYSLIB) -lm -lc; \ -+ else \ -+ ld $(LDFLAGS) -shared -soname libblas.so.3 -o $@ \ -+ --whole-archive libf77blas.a libcblas.a \ -+ --no-whole-archive -L. -latlas $(F77SYSLIB) -lm -lc; \ -+ fi -+ -+atlas/liblapack.so: atlas/liblapack.so.3 -+ (cd atlas && ln -sf liblapack.so.3 liblapack.so) -+ -+atlas/liblapack.so.3: atlas/liblapack.so.3.0 -+ (cd atlas && ln -sf liblapack.so.3.0 liblapack.so.3) -+ -+atlas/liblapack.so.3.0: atlas/liblapack.a atlas/libblas.so libatlas.so.3.0 -+ ld $(LDFLAGS) -shared -soname liblapack.so.3 -o $@ \ -+ --whole-archive atlas/liblapack.a \ -+ --no-whole-archive -L . -lblas -latlas $(F77SYSLIB) -lm -lc -+ - # =================================================================== - # The following commands are to build dynamic/shared objects on Linux - # using the gnu gcc or ld ---- a/Make.top -+++ b/Make.top -@@ -16,6 +16,7 @@ build: - cd bin/ ; ./xatlas_build $(INSTFLAGS) - cd lib/ ; $(MAKE) atlas/libblas.a - cd lib/ ; $(MAKE) atlas/liblapack.a -+ cd lib/ ; $(MAKE) fullshared - - time: - ./xatlbench -dc $(BLDdir)/bin/INSTALL_LOG -dp $(BLDdir)/ARCHS/$(ARCH) -@@ -683,3 +684,21 @@ install_static_lib: - - cp $(LIBdir)/libsatlas.so $(INSTdir)/. - - cp $(LIBdir)/libtatlas.so $(INSTdir)/. - -+install_shared_lib: -+ for lib in atlas cblas f77blas lapack_atlas; do \ -+ cp $(LIBdir)/lib$$lib.so.3.0 $(INSTdir)/. ; \ -+ ( cd $(INSTdir) && ln -s lib$$lib.so.3.0 lib$$lib.so.3); \ -+ ( cd $(INSTdir) && ln -s lib$$lib.so.3 lib$$lib.so); \ -+ chmod 0644 $(INSTdir)/lib$$lib.so ;\ -+ chmod 0644 $(INSTdir)/lib$$lib.so.3 ;\ -+ chmod 0644 $(INSTdir)/lib$$lib.so.3.0 ;\ -+ done; -+ mkdir -p $(INSTdir)/atlas -+ for lib in blas lapack; do \ -+ cp $(LIBdir)/atlas/lib$$lib.so.3.0 $(INSTdir)/atlas/. ; \ -+ ( cd $(INSTdir)/atlas/ && ln -s lib$$lib.so.3.0 lib$$lib.so.3); \ -+ ( cd $(INSTdir)/atlas/ && ln -s lib$$lib.so.3 lib$$lib.so); \ -+ chmod 0644 $(INSTdir)/atlas/lib$$lib.so ;\ -+ chmod 0644 $(INSTdir)/atlas/lib$$lib.so.3 ;\ -+ chmod 0644 $(INSTdir)/atlas/lib$$lib.so.3.0 ;\ -+ done; ---- a/CONFIG/src/Makefile -+++ b/CONFIG/src/Makefile -@@ -576,6 +576,7 @@ $(LIBINSTdir) : - install : $(INCINSTdir) $(LIBINSTdir) $(INCINSTdir)/atlas - $(MAKE) -f Make.top install_inc INSTdir=$(INCINSTdir) - $(MAKE) -f Make.top install_static_lib INSTdir=$(LIBINSTdir) -+ $(MAKE) -f Make.top install_shared_lib INSTdir=$(LIBINSTdir) - - confclean: $(CLEANdep) - rm -f *core* *.o config?.out diff -Nru atlas-3.10.2/debian/patches/static_full_blas_lapack.diff atlas-3.10.3/debian/patches/static_full_blas_lapack.diff --- atlas-3.10.2/debian/patches/static_full_blas_lapack.diff 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/patches/static_full_blas_lapack.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,117 +0,0 @@ -Description: Create full featured static libblas.a and liblapack.a - The resulting BLAS library will use the threaded flavour if the build has been - configured to create them. Otherwise, the serial library is used. -Author: David Evans -Last-Update: 2013-06-05 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ ---- a/Make.top -+++ b/Make.top -@@ -14,6 +14,9 @@ - build: - cd bin/ ; $(MAKE) xatlas_build - cd bin/ ; ./xatlas_build $(INSTFLAGS) -+ cd lib/ ; $(MAKE) atlas/libblas.a -+ cd lib/ ; $(MAKE) atlas/liblapack.a -+ - time: - ./xatlbench -dc $(BLDdir)/bin/INSTALL_LOG -dp $(BLDdir)/ARCHS/$(ARCH) - C_sanity_test: -@@ -401,6 +404,7 @@ - $(MAKE) -f Make.top leafstart leaf=src/blas/reference/level1 - $(MAKE) -f Make.top leafstart leaf=src/auxil - $(MAKE) -f Make.top leafstart leaf=src/testing -+ $(MAKE) -f Make.top leafstart leaf=lib/atlas - $(MAKE) -f Make.top leafstart leaf=lib - $(MAKE) -f Make.top leafstart leaf=bin - cd src/threads ; touch atomic.inc -@@ -591,6 +595,7 @@ - - lib : - mkdir lib -+ mkdir lib/atlas - - include: - mkdir include -@@ -654,12 +659,17 @@ - chmod 0644 $(INSTdir)/cblas.h $(INSTdir)/clapack.h - cp $(INCAdir)/* $(INSTdir)/atlas/. - - chmod 0644 $(INSTdir)/atlas/* --install_lib: -+ -+install_static_lib: - cp $(LIBdir)/libatlas.a $(INSTdir)/. - cp $(LIBdir)/libcblas.a $(INSTdir)/. - cp $(LIBdir)/liblapack_atlas.a $(INSTdir)/. -+ mkdir -p $(INSTdir)/atlas -+ cp $(LIBdir)/atlas/libblas.a $(INSTdir)/atlas/. -+ cp $(LIBdir)/atlas/liblapack.a $(INSTdir)/atlas/. - chmod 0644 $(INSTdir)/libatlas.a $(INSTdir)/liblapack_atlas.a \ -- $(INSTdir)/libcblas.a -+ $(INSTdir)/libcblas.a $(INSTdir)/atlas/libblas.a \ -+ $(INSTdir)/atlas/liblapack.a - - cp $(LIBdir)/libf77blas.a $(INSTdir)/. - - chmod 0644 $(INSTdir)/libf77blas.a - - cp $(LIBdir)/libptcblas.a $(INSTdir)/. ---- a/makes/Make.lib -+++ b/makes/Make.lib -@@ -175,6 +175,36 @@ - LIBS="$(LIBS)" LIBINSTdir="$(LIBINSTdir)" ; \ - fi - -+ -+# Build full netlib blas/lapack libraries: -+atlas/libblas.a: libatlas.a -+ mkdir tmp -+ cd tmp && \ -+ ar x ../libatlas.a && \ -+ if test -f ../libptf77blas.a -a -f ../libptcblas.a; then \ -+ ar x ../libptf77blas.a && \ -+ ar x ../libptcblas.a; \ -+ else \ -+ ar x ../libf77blas.a && \ -+ ar x ../libcblas.a; \ -+ fi -+ ar r $@ tmp/*.o -+ rm -rf tmp -+ -+atlas/liblapack.a: liblapack_atlas.a libatlas.a -+ mkdir tmp -+ cd tmp && \ -+ ar x /usr/lib/liblapack_pic.a && \ -+ ar x ../liblapack_atlas.a && \ -+ if test -f ../libptcblas.a; then \ -+ ar x ../libptcblas.a; \ -+ else \ -+ ar x ../libcblas.a; \ -+ fi -+ ar r $@ tmp/*.o -+ rm -rf tmp -+ -+ - # - # Builds one shared lib from all ATLAS files - # ---- a/CONFIG/src/Makefile -+++ b/CONFIG/src/Makefile -@@ -575,7 +575,7 @@ - chmod 0755 $(LIBINSTdir) - install : $(INCINSTdir) $(LIBINSTdir) $(INCINSTdir)/atlas - $(MAKE) -f Make.top install_inc INSTdir=$(INCINSTdir) -- $(MAKE) -f Make.top install_lib INSTdir=$(LIBINSTdir) -+ $(MAKE) -f Make.top install_static_lib INSTdir=$(LIBINSTdir) - - confclean: $(CLEANdep) - rm -f *core* *.o config?.out ---- a/CONFIG/src/SpewMakeInc.c -+++ b/CONFIG/src/SpewMakeInc.c -@@ -604,6 +604,9 @@ - } - fprintf(fpout, " TESTlib = $(LIBdir)/libtstatlas.a\n\n"); - -+ fprintf(fpout, " FULLBLASlib = $(LIBdir)/atlas/libblas.a\n"); -+ fprintf(fpout, " FULLLAPACKlib = $(LIBdir)/atlas/liblapack.a\n"); -+ - fprintf(fpout, "# -------------------------------------------\n"); - fprintf(fpout, "# Upper bound on largest cache size, in bytes\n"); - fprintf(fpout, "# -------------------------------------------\n"); diff -Nru atlas-3.10.2/debian/README.Debian atlas-3.10.3/debian/README.Debian --- atlas-3.10.2/debian/README.Debian 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/README.Debian 2017-09-09 11:19:09.000000000 +0000 @@ -1,12 +1,7 @@ ATLAS for Debian ================ -Starting from Atlas 3.8.3-1, the whole build process has been rewritten. -It is now using most of the debhelper tools and simplify the management of -other archs. - - -Why optimized packages are no longer available? +Why are optimized packages no longer available? ----------------------------------------------- Before version 3.8.3-25, optimized binary packages were provided. However, due @@ -24,18 +19,18 @@ How to switch between BLAS/LAPACK implementations ------------------------------------------------- -Since Atlas 3.8.3-10, it is trivial to switch between the various BLAS/LAPACK -implementations. +It is trivial to switch between the various BLAS/LAPACK implementations. + +- BLAS: -BLAS: -update-alternatives --config libblas.so.3 + $ sudo update-alternatives --config libblas.so.3- -LAPACK: -update-alternatives --config liblapack.so.3 +- LAPACK: -Note that since Atlas 3.8.4-8, the ATLAS version of LAPACK is not selected by -default (in order to minimize the incidence of bug #576972); you have to -manually change the LAPACK alternative to use the optimized version. + $ sudo update-alternatives --config liblapack.so.3- + +where is the multiarch path for you architecture (e.g. +x86_64-linux-gnu for amd64). More information is available here: @@ -46,22 +41,65 @@ ---------------------------------------------- Building your own optimized packages of Atlas is straightforward. -Just get the sources of the package and its build-dependencies: -# apt-get source atlas -# apt-get build-dep atlas -# apt-get install devscripts +1) Get the sources of the package and its build-dependencies: + + $ apt-get source atlas + $ sudo apt-get build-dep atlas + +2) Disable CPU throttling if it is enabled. Otherwise Atlas cannot get reliable +timings and therefore the optimization will be suboptimal. This can easily be +done using the tools from the cpufrequtils package. + +The state of your CPUs can be checked with: + + $ cpufreq-info + +If for any CPU, the governor is not set to "performance", then you can do so +by issuing the following command for each of your CPU cores: + + $ sudo cpufreq-set -g performance -c + +where is the CPU core number (the first has number zero). + +See INSTALL.txt from the libatlas-doc package for more details on this issue. + +3) Build the package by running the following: + + $ cd atlas- + $ DEB_BUILD_OPTIONS=custom dpkg-buildpackage -uc -b + +where is replaced with the appropriate value. + +If the Atlas build system still complains about CPU throttling issues, despite +having set the governors to performance, then you can force Atlas by +uncommenting the following line in debian/rules: + + CONFIGURE_FLAGS += --cripple-atlas-performance + +4) When the build finishes (it will take a long time), it should produce a .deb +file, whose precise name you can figure out using: + + $ ls ../libatlas3-base_*.deb + +Install it using: + + $ sudo dpkg -i ../libatlas3-base__.deb> -and type the following from the atlas source subdir: +replacing and with the appropriate values. -# fakeroot debian/rules custom +5) If you don't want the Debian package manager to overwrite your optimized +package with a generic one on system upgrades, you can mark the Atlas package +to be on hold, with the following command: -it should produce a package called: + $ sudo aptitude hold libatlas3-base - ../libatlas3-base_*.deb +Don't forget however to periodically check whether a more recent version of the +package is available for the Debian suite that you are tracking, in which case +you should go again through this procedure. The version check can be performed +using: -which is optimized for the architecture Atlas has been built on. Then install -the package using "dpkg -i". + $ aptitude versions libatlas3-base Misc @@ -71,5 +109,5 @@ libraries managements. - -- Sébastien Villemot , Fri, 7 Jun 2013 12:07:33 +0200 + -- Sébastien Villemot , Thu, 7 Sep 2017 21:50:00 +0200 -- Sylvestre Ledru , Tue, 15 May 2012 18:05:34 +0200 diff -Nru atlas-3.10.2/debian/README.source atlas-3.10.3/debian/README.source --- atlas-3.10.2/debian/README.source 2014-10-25 17:12:20.000000000 +0000 +++ atlas-3.10.3/debian/README.source 2017-09-09 11:19:09.000000000 +0000 @@ -27,13 +27,13 @@ timings (called architectural defaults in the ATLAS jargon). These are located under debian/archdefs/$(DEB_HOST_ARCH_CPU). The name of the tarballs corresponds to the architecture detected by ATLAS (variable ARCH in -build/atlas-base/Make.inc). +build/Make.inc). The procedure for creating these architectural defaults on a given platform is the following: debuild -us -uc -B (will be very long, because timings will be computed) - cd build/atlas-base/ARCHS + cd build/ARCHS make ArchNew (will create an $(ATLAS_ARCH) subdirectory) tar caf $(ATLAS_ARCH).tar.bz2 $(ATLAS_ARCH) mkdir -p ../../../debian/archdefs/$(DEB_HOST_ARCH_CPU)/ @@ -42,5 +42,5 @@ [1] http://math-atlas.sourceforge.net/errata.html#tol - -- Sébastien Villemot , Sat, 25 Oct 2014 19:12:20 +0200 + -- Sébastien Villemot , Tue, 1 Aug 2017 18:03:42 +0200 diff -Nru atlas-3.10.2/debian/rules atlas-3.10.3/debian/rules --- atlas-3.10.2/debian/rules 2015-07-29 06:38:40.000000000 +0000 +++ atlas-3.10.3/debian/rules 2019-08-15 14:33:44.000000000 +0000 @@ -1,214 +1,274 @@ #!/usr/bin/make -f -# Copyright 2008-2010 Sylvestre Ledru -include /usr/share/cdbs/1/rules/debhelper.mk +# Some ATLAS code uses improper format strings, so disable this +export DEB_BUILD_MAINT_OPTIONS=hardening=-format -# NB: the following numbers come from xprint_enums (compilable from the build -# directory with "make xprint_enums); they can change across releases -# -# First number in ARCHS: -# - 0 means Unknown CPU: leave ATLAS find out; used for archs where a more specific choice is not available -# - 9 means IBMz9: for s390x -# - 12 means artificial architecture called x86x87, providing you with portable (but slow!) architectural defaults: for *i386 -# See http://math-atlas.sourceforge.net/atlas_install/node28.html -# - 14 means generic libraries for P4 and later, with SSE2: for *amd64 -# See: http://math-atlas.sourceforge.net/atlas_install/node30.html -# - 51 means ARMv7: for armhf (but not for armel, which is ARM >= v4) -# - 52 means GENERIC: the same than 0 (UNKNOWN), except that it does not try autodetection -# See debian/patches/generic.diff -# Second number in ARCHS: -# - 1 means no instruction set extension -# - 384 means SSE1+SSE2 (always available on amd64) +include /usr/share/dpkg/default.mk + +# Parallel make variable, used by ATLAS build system +PMAKE := make +ifneq (,$(filter parallel=%,$(DEB_BUILD_OPTIONS))) +PMAKE += -j $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS))) +endif + +GFORTRAN_LIB_PATH := $(shell $(CC) -print-search-dirs | grep ^install: | awk '{print $$2}') + +# Flags common to both the generic and custom packages +CONFIGURE_FLAGS := --prefix="$(CURDIR)/debian/tmp" \ + --incdir="$(CURDIR)/debian/tmp/usr/include/$(DEB_HOST_MULTIARCH)/" \ + --libdir="$(CURDIR)/debian/tmp/usr/lib/$(DEB_HOST_MULTIARCH)/" \ + --shared \ + -D c -DWALL \ + -Ss f77lib "-L$(GFORTRAN_LIB_PATH) -lgfortran -lgcc_s -lpthread" \ + -Ss pmake '$(PMAKE)' \ + -v 2 + +ifeq (,$(filter custom,$(DEB_BUILD_OPTIONS))) +# Building the generic package, so enforce more configuration settings to make +# the package independent of the build host hardware. + +# In particular, force the MACHTYPE and ISA. +# The following strings and numbers come from xprint_enums (compilable +# with "make -C build xprint_enums"). +# N.B.: They can change across releases! + +ifeq (yes,$(shell dpkg-vendor --derives-from Ubuntu && echo yes)) +UBUNTU := yes +endif + +# Select the machine type ifeq ($(DEB_HOST_ARCH),s390x) -ARCHS=base_9_1 +MACHTYPE := $(if $(UBUNTU),IBMz12,IBMz9) +Z13 := $(shell grep -q 'features.: .* vx .*' /proc/cpuinfo && echo yes) +Z14 := $(shell grep -q 'features.: .* vxd .*' /proc/cpuinfo && echo yes) +else ifeq ($(DEB_HOST_ARCH_CPU),ppc64el) +MACHTYPE := POWER8 else ifeq ($(DEB_HOST_ARCH_CPU),i386) -ARCHS=base_12_1 +# See http://math-atlas.sourceforge.net/atlas_install/node32.html +MACHTYPE := x86x87 else ifeq ($(DEB_HOST_ARCH_CPU),amd64) -ARCHS=base_14_384 +MACHTYPE := x86SSE2 +else ifeq ($(DEB_HOST_ARCH),ia64) +MACHTYPE := IA64Itan +else +# The same as UNKNOWN, except that it does not try autodetection +# See debian/patches/generic.diff +MACHTYPE := GENERIC +endif + +# ISA corresponds to the instruction set architecture: +# - 1 means no instruction set extension +# - 4 means VXZ2 (available on z14) +# - 8 means VXZ (available on z13) +# - 1536 means SSE1+SSE2 (always available on amd64) +# - 32768 means FPV3D16MAC (always available on armhf) +# VSX should also be enabled on ppc64el, but as of 3.10.3-2 it FTBFS. +ifeq ($(DEB_HOST_ARCH_CPU),amd64) +ISA := 1536 else ifeq ($(DEB_HOST_ARCH),armhf) -ARCHS=base_51_1 +ISA := 32768 else -ARCHS=base_52_1 +ISA := 1 endif -# Pointer bitwidth -MODE_BITWIDTH = $(shell dpkg-architecture -qDEB_HOST_ARCH_BITS) +# - the cripple-atlas-performance flag is needed for disabling the CPU +# throttling check, because on some buildds CPU throttling is enabled and we +# have no way of disabling it; +# - disable multi-threading (-t 0), because otherwise the package FTBFS on some +# single-processor machines; +# - enforce gcc as the compiler for all uses (except for compiling probes, +# in --cc), because otherwise architectural defaults do not work. +CONFIGURE_FLAGS += \ + -b $(DEB_HOST_ARCH_BITS) \ + -A $(MACHTYPE) \ + -V $(ISA) \ + -t 0 \ + --cc="$(CC)" \ + --cflags="$(CPPFLAGS) $(CFLAGS)" \ + -C acg gcc \ + -F acg "$(CPPFLAGS) $(CFLAGS)" \ + -C if gfortran \ + -F if "$(FFLAGS)" \ + -Ss ADdir ../../debian/archdefs/$(DEB_HOST_ARCH_CPU) \ + --cripple-atlas-performance + +else +# Flags specific to the custom package -DEB_SHLIBDEPS_INCLUDE_libatlas3-base := $(CURDIR)/debian/libatlas3-base/usr/lib/atlas-base/:$(CURDIR)/debian/libatlas3-base/usr/lib/atlas-base/atlas/ +# Uncomment the following if the ATLAS build system still complains after you +# have set all the CPU governors to "performance" +#CONFIGURE_FLAGS += --cripple-atlas-performance +endif -DEB_DH_GENCONTROL_ARGS_ALL := -- -VBuilt-Using="`dpkg-query -W -f='$${source:Package} (= $${source:Version})' liblapack-pic`" -GFORTRAN_LIB_PATH=`$(CC) -print-search-dirs|grep ^install:|awk '{print $$2}'` +GENERATED_DEBIAN_FILES := $(patsubst %.in,%,$(wildcard debian/*.pc.in debian/*.postinst.in debian/*.prerm.in)) -# Flags common to both the generic and custom packages -COMMON_CONFIG_PARAMS := --prefix="$(DEB_DESTDIR)" \ - --incdir="$(DEB_DESTDIR)/usr/include/" \ - --libdir="$(DEB_DESTDIR)/usr/lib/$$targetName" \ - --cc="${CC}" \ - --cflags="${CFLAGS}" \ - -D c -DWALL \ - -b $(MODE_BITWIDTH) \ - -Fa alg '-fPIC' \ - -Ss f77lib "-L$(GFORTRAN_LIB_PATH) -lgfortran -lgcc_s -lpthread" \ - -Ss pmake '$(MAKE)' \ - -Ss flapack /usr/lib/liblapack_pic.a \ - -A $$atlasArch \ - -V $$atlasISA \ - -v 2 +$(GENERATED_DEBIAN_FILES): %: %.in + sed -e "s/@DEB_VERSION_UPSTREAM@/$(DEB_VERSION_UPSTREAM)/g" \ + -e "s/@DEB_HOST_MULTIARCH@/$(DEB_HOST_MULTIARCH)/g" < $< > $@ + + +%: + dh $@ + +override_dh_auto_configure: $(GENERATED_DEBIAN_FILES) + mkdir -p build/ + cd build && ../configure $(CONFIGURE_FLAGS) +ifeq ($(Z13),yes) + mkdir -p build-z13 + cd build-z13 && ../configure $(CONFIGURE_FLAGS) -Si archdef 2 -A IBMz13 -V 8 --cflags="$(CPPFLAGS) $(CFLAGS) -march=z13 -mzvector" -C acg gcc -F acg "$(CPPFLAGS) $(CFLAGS) -march=z13 -mzvector" -C if gfortran -F if "$(FFLAGS) -march=z13 -mzvector" + mkdir -p build-z14 + cd build-z14 && ../configure $(CONFIGURE_FLAGS) -Si archdef 2 -A IBMz14 -V 4 --cflags="$(CPPFLAGS) $(CFLAGS) -march=z14 -mzvector" -C acg gcc -F acg "$(CPPFLAGS) $(CFLAGS) -march=z14 -mzvector" -C if gfortran -F if "$(FFLAGS) -march=z14 -mzvector" -Fa xc '-march=z13' +endif + @echo "==============================================" + @echo -n "Configured arch: " + @grep ARCH < build/Make.inc | head -1 | awk '{print $$3}' +ifeq ($(Z13),yes) + @grep ARCH < build-z13/Make.inc | head -1 | awk '{print $$3}' + @grep ARCH < build-z14/Make.inc | head -1 | awk '{print $$3}' +endif + @echo "==============================================" -# See http://math-atlas.sourceforge.net/errata.html#armhardfp -ifeq ($(DEB_HOST_ARCH),armhf) -COMMON_CONFIG_PARAMS += -D c -DATL_ARM_HARDFP=1 +override_dh_auto_build: + make -C build + make -f debian/rules generate_libs BUILD=build +ifeq ($(Z13),yes) + make -C build-z13 + make -f debian/rules generate_libs BUILD=build-z13 + make -C build-z14 + make -f debian/rules generate_libs BUILD=build-z14 endif -# Flags used only for the generic package -GENERIC_CONFIG_PARAMS := \ - -t 0 \ - -Ss ADdir ../../../debian/archdefs/$(DEB_HOST_ARCH_CPU) \ - -Si cputhrchk 0 + # Kludge to fix blas-atlas.pc by adding -lpthread when needed + if test -f build/lib/libptcblas.a; then sed -i 's/^Libs.private: \(.*\)/Libs.private: \1 -lpthread/' debian/blas-atlas.pc; fi -# The archdefs provided by ATLAS are unsuitable for armhf (see details in -# armhf.diff patch) -# -# Hence we replace them by new ones without the problematic flags. They are -# located under debian/archdefs/arm, and used for the generic package. -# -# The following rules ensure that these new archdefs are also used for the -# custom package. -use-my-armhf-archdef: - test -f CONFIG/ARCHS/ARMv732.tar.bz2.old || (mv CONFIG/ARCHS/ARMv732.tar.bz2 CONFIG/ARCHS/ARMv732.tar.bz2.old && cp debian/archdefs/arm/ARMv732.tar.bz2 CONFIG/ARCHS/ARMv732.tar.bz2) - test -f CONFIG/ARCHS/ARMv732NEON.tar.bz2.old || (mv CONFIG/ARCHS/ARMv732NEON.tar.bz2 CONFIG/ARCHS/ARMv732NEON.tar.bz2.old && cp debian/archdefs/arm/ARMv732NEON.tar.bz2 CONFIG/ARCHS/ARMv732NEON.tar.bz2) - -restore-armhf-archdef: - (test -f CONFIG/ARCHS/ARMv732.tar.bz2.old && mv CONFIG/ARCHS/ARMv732.tar.bz2.old CONFIG/ARCHS/ARMv732.tar.bz2) || true - (test -f CONFIG/ARCHS/ARMv732NEON.tar.bz2.old && mv CONFIG/ARCHS/ARMv732NEON.tar.bz2.old CONFIG/ARCHS/ARMv732NEON.tar.bz2) || true - -# Build a custom package optimized for the current arch -custom: custom-stamp -.PHONY: custom -custom-stamp: use-my-armhf-archdef - rm -f configure-stamp - - ncpu=$$(LANG=C cpufreq-info | grep "analyzing CPU" -c);\ - cpu=0;\ - while test $$cpu -lt $$ncpu ; do\ - if test $$(cpufreq-info -p 2>&1 > /dev/null; echo $$?) \ - -eq 0 -a -z "$$(LANG=C cpufreq-info -p -c $$cpu | grep performance)" ; then \ - echo "frequency governor on cpu=$$cpu is not set to 'performance'"; \ - echo "run: 'sudo cpufreq-set -g performance -c cpu#' for each cpu"; \ - echo "aborting atlas build"; \ - exit 1; \ - fi; \ - cpu=$$((cpu+1)) ;\ - done - - # Here, the trick is pretty simple: - # * We don't want to build all the optimized version of Atlas. Just the - # one for the current CPU - # * We override ARCHS by base__ - # * Base is the name of the package (libatlas3-base) - # * __ is provided to have the right expression - # * the regexp will consider that atlasArch and atlasISA are empty - # therefor, leaving the choice of the optimization to Atlas - # * After that, we fall back to the normal build procedure by calling - # debian/rules itself (yep, recursively) - - # Remove all the other archs in this case - dch --local='+custom' "custom build on: `uname -a`" - LANG=C debian/rules ARCHS=base__ binary - touch $@ - -common-configure-arch common-configure-indep:: configure-stamp -configure-stamp: - dh_testdir - - set -e; \ - targetName=`echo $(ARCHS)|sed -e "s|\(.*\)_.*_.*|atlas-\1|g"`; \ - atlasArch=`echo $(ARCHS)|sed -e "s|.*_\(.*\)_.*|\1|g"`; \ - atlasISA=`echo $(ARCHS)|sed -e "s|.*_.*_\(.*\)|\1|g"`; \ - mkdir -p build/$$targetName; \ - cd build/$$targetName; \ - if test "$(ARCHS)" != "base__"; then \ - ../../configure $(COMMON_CONFIG_PARAMS) $(GENERIC_CONFIG_PARAMS); \ - else \ - ../../configure $(COMMON_CONFIG_PARAMS); \ - fi; \ - echo "Configure done. targetName = $$targetName / atlasArch = $$atlasArch / atlasISA = $$atlasISA"; \ - if test ! -s Make.inc; then echo "Configure failed: Make.inc not found"; exit 1; fi; \ - cat Make.inc |grep ARCH|head -1|awk '{print $$3}'; \ - echo "=============================================="; \ - cd - - touch $@ - -debian/blas-atlas.pc: debian/blas-atlas.pc.in - sed -e "s%@DEB_UPSTREAM_VERSION@%$(DEB_UPSTREAM_VERSION)%" < $< > $@ - -debian/lapack-atlas.pc: debian/lapack-atlas.pc.in - sed -e "s%@DEB_UPSTREAM_VERSION@%$(DEB_UPSTREAM_VERSION)%" < $< > $@ - -common-build-arch common-build-indep:: build-stamp -build-stamp: debian/blas-atlas.pc debian/lapack-atlas.pc - dh_testdir - set -e; \ - targetName=`echo $(ARCHS)|sed -e "s|\(.*\)_.*_.*|atlas-\1|g"`; \ - cd build/$$targetName; \ - $(MAKE) build; \ - $(MAKE) check; \ - test -f build/$$targetName/lib/libptcblas.a && $(MAKE) ptcheck; \ - echo "=============================================="; \ - cd ../../../ - # Do not build the help when building the custom package - if test "$(ARCHS)" != "base__"; then \ - make -C TexDoc atlas_contrib.pdf atlas_devel.pdf atlas_install.pdf cblasqref.pdf f77blasqref.pdf lapackqref.pdf; \ - fi - touch $@ + # Build PDF documentation +ifeq (,$(filter nodoc,$(DEB_BUILD_OPTIONS))) + make -C TexDoc atlas_contrib.pdf atlas_devel.pdf atlas_install.pdf cblasqref.pdf f77blasqref.pdf lapackqref.pdf +endif -clean:: clean-work -clean-work: restore-armhf-archdef - dh_testdir - dh_testroot - rm -rf build check - rm -f debian/blas-atlas.pc debian/lapack-atlas.pc - rm -f configure-stamp build-stamp - rm -f TexDoc/*.pdf - [ ! -f Makefile ] || $(MAKE) distclean -.PHONY: clean-work - -common-install-arch common-install-indep:: - dh_testdir - dh_testroot - - set -e; \ - targetName=`echo $(ARCHS)|sed -e "s|\(.*\)_.*_.*|atlas-\1|g"`; \ - cd build/$$targetName; \ - echo "make install of $$targetName to $(DEB_DESTDIR)"; \ - $(MAKE) DESTDIR=$(DEB_DESTDIR) install; \ - echo "============================================="; \ - cd - - -install/libatlas-dev:: - if test -d $(CURDIR)/debian/libatlas-dev/usr/include/; then \ - rm -rf $(CURDIR)/debian/libatlas-dev/usr/include/; \ +generate_libs: + # We do not ship the lib{s,t}atlas.so created by ATLAS build system, they have an awkward name and an incorrect SONAME + # Rather create our shared versions of individual ATLAS libraries + gfortran $(LDFLAGS) -shared -Wl,-soname=libatlas.so.3 -o $(BUILD)/libatlas.so.$(DEB_VERSION_UPSTREAM) \ + -Wl,--whole-archive $(BUILD)/lib/libatlas.a -Wl,--no-whole-archive \ + $$(test -f $(BUILD)/lib/libptcblas.a && echo -lpthread) -lc -lm + ln -sf libatlas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libatlas.so.3 + ln -sf libatlas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libatlas.so + + gfortran $(LDFLAGS) -shared -Wl,-soname=libcblas.so.3 -o $(BUILD)/libcblas.so.$(DEB_VERSION_UPSTREAM) \ + -Wl,--whole-archive $(BUILD)/lib/libcblas.a -Wl,--no-whole-archive \ + -L$(BUILD) -latlas -lm -lc + ln -sf libcblas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libcblas.so.3 + ln -sf libcblas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libcblas.so + + gfortran $(LDFLAGS) -shared -Wl,-soname=libf77blas.so.3 -o $(BUILD)/libf77blas.so.$(DEB_VERSION_UPSTREAM) \ + -Wl,--whole-archive $(BUILD)/lib/libf77blas.a -Wl,--no-whole-archive \ + -L$(BUILD) -lcblas -latlas -lm -lc + ln -sf libf77blas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libf77blas.so.3 + ln -sf libf77blas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libf77blas.so + + gfortran $(LDFLAGS) -shared -Wl,-soname=liblapack_atlas.so.3 -o $(BUILD)/liblapack_atlas.so.$(DEB_VERSION_UPSTREAM) \ + -Wl,--whole-archive $(BUILD)/lib/liblapack_atlas.a -Wl,--no-whole-archive \ + -L$(BUILD) -lf77blas -lcblas -latlas -lm -lc + ln -sf liblapack_atlas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/liblapack_atlas.so.3 + ln -sf liblapack_atlas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/liblapack_atlas.so + + if test -f $(BUILD)/lib/libptcblas.a; then \ + gfortran $(LDFLAGS) -shared -Wl,-soname=libptcblas.so.3 \ + -o $(BUILD)/libptcblas.so.$(DEB_VERSION_UPSTREAM) \ + -Wl,--whole-archive $(BUILD)/lib/libptcblas.a -Wl,--no-whole-archive \ + -L$(BUILD) -latlas -lm -lc; \ + ln -sf libptcblas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libptcblas.so.3; \ + ln -sf libptcblas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libptcblas.so; \ fi - mkdir -p $(CURDIR)/debian/libatlas-dev/usr/include/ - mv $(CURDIR)/debian/tmp/usr/include/*.h $(CURDIR)/debian/tmp/usr/include/atlas/ - mv $(CURDIR)/debian/tmp/usr/include/atlas $(CURDIR)/debian/libatlas-dev/usr/include/ - -install/libatlas-doc:: - if test "$(ARCHS)" != "base__"; then \ - mkdir -p $(CURDIR)/debian/libatlas-doc/usr/share/doc/libatlas-doc/; \ - cp -R $(CURDIR)/doc/* $(CURDIR)/TexDoc/*.pdf $(CURDIR)/debian/libatlas-doc/usr/share/doc/libatlas-doc/; \ - rm $(CURDIR)/debian/libatlas-doc/usr/share/doc/libatlas-doc/ChangeLog; \ + + if test -f $(BUILD)/lib/libptf77blas.a; then \ + gfortran $(LDFLAGS) -shared -Wl,-soname=libptf77blas.so.3 \ + -o $(BUILD)/libptf77blas.so.$(DEB_VERSION_UPSTREAM) \ + -Wl,--whole-archive $(BUILD)/lib/libptf77blas.a -Wl,--no-whole-archive \ + -L$(BUILD) -lptcblas -latlas -lm -lc; \ + ln -sf libptf77blas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libptf77blas.so.3; \ + ln -sf libptf77blas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libptf77blas.so; \ fi + # Create static and shared full BLAS + mkdir tmp + cd tmp && \ + ar x ../$(BUILD)/lib/libatlas.a && \ + if test -f ../$(BUILD)/lib/libptf77blas.a -a -f ../$(BUILD)/lib/libptcblas.a; then \ + ar x ../$(BUILD)/lib/libptf77blas.a && \ + ar x ../$(BUILD)/lib/libptcblas.a; \ + else \ + ar x ../$(BUILD)/lib/libf77blas.a && \ + ar x ../$(BUILD)/lib/libcblas.a; \ + fi + ar r $(BUILD)/libblas.a tmp/*.o + rm -rf tmp + gfortran $(LDFLAGS) -shared -Wl,-soname=libblas.so.3 -o $(BUILD)/libblas.so.$(DEB_VERSION_UPSTREAM) \ + -Wl,--whole-archive $(BUILD)/libblas.a -Wl,--no-whole-archive \ + -L$(BUILD) -latlas $$(test -f $(BUILD)/lib/libptcblas.a && echo -lpthread) -lm -lc + ln -sf libblas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libblas.so.3 + ln -sf libblas.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/libblas.so + + # Create static and shared full LAPACK + # Note that we unpack liblapack_atlas.a after liblapack_pic.a, in order to overwrite routines in the latter + mkdir tmp + cd tmp && \ + ar x /usr/lib/$(DEB_HOST_MULTIARCH)/liblapack_pic.a && \ + ar x ../$(BUILD)/lib/liblapack_atlas.a + ar r $(BUILD)/liblapack.a tmp/*.o + rm -rf tmp + gfortran $(LDFLAGS) -shared -Wl,-soname=liblapack.so.3 -o $(BUILD)/liblapack.so.$(DEB_VERSION_UPSTREAM) \ + -Wl,--whole-archive $(BUILD)/liblapack.a -Wl,--no-whole-archive \ + -L$(BUILD) -lblas -latlas -lm -lc + ln -sf liblapack.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/liblapack.so.3 + ln -sf liblapack.so.$(DEB_VERSION_UPSTREAM) $(BUILD)/liblapack.so + +override_dh_auto_test: + make -C build check + if test -f build/lib/libptcblas.a; then make -C build ptcheck; fi +ifeq ($(Z13),yes) + [ -d build-z13 ] && make -C build-z13 check + if test -f build-z13/lib/libptcblas.a; then make -C build-z13 ptcheck; fi +endif +ifeq ($(Z14),yes) + [ -d build-z14 ] && make -C build-z14 check + if test -f build-z14/lib/libptcblas.a; then make -C build-z14 ptcheck; fi +endif + +override_dh_auto_install: + make -C build install # The build system does not support setting DESTDIR at install time + mv debian/tmp/usr/include/$(DEB_HOST_MULTIARCH)/cblas.h debian/tmp/usr/include/$(DEB_HOST_MULTIARCH)/cblas-atlas.h + +override_dh_installdocs: + dh_installdocs -plibatlas-doc --doc-main-package=libatlas-base-dev + dh_installdocs -Nlibatlas-doc + # We want packages using libblas/liblapack to depend on any BLAS/LAPACK # alternative, and we want packages depending on ATLAS-specific libraries # (e.g. libatlas.so) to depend specifically on libatlas3-base. # # Such a setting is not supported by dh_makeshlibs, so we ship a hand-crafted # shlibs file. -binary-predeb/libatlas3-base:: +override_dh_makeshlibs: + dh_makeshlibs cp debian/libatlas3-base.shlibs debian/libatlas3-base/DEBIAN/shlibs + if test -f build/libptcblas.so; then echo "libptcblas 3 libatlas3-base" >> debian/libatlas3-base/DEBIAN/shlibs; fi + if test -f build/libptf77blas.so; then echo "libptf77blas 3 libatlas3-base" >> debian/libatlas3-base/DEBIAN/shlibs; fi + +override_dh_shlibdeps: + dh_shlibdeps -a -l $$(pwd)/build -get-orig-source: - -uscan --upstream-version 0 +override_dh_auto_clean: + rm -rf build build-* + rm -f TexDoc/*.pdf + rm -f debian/blas-atlas.pc debian/lapack-atlas.pc + rm -f *.a *.so *.so.* + +override_dh_install: + Z13=$(if $(Z13), ,'# ') Z14=$(if $(Z13), ,'# ') dh_install + +override_dh_clean: + dh_clean + rm -f $(GENERATED_DEBIAN_FILES) diff -Nru atlas-3.10.2/debian/source/include-binaries atlas-3.10.3/debian/source/include-binaries --- atlas-3.10.2/debian/source/include-binaries 2015-11-23 18:19:16.000000000 +0000 +++ atlas-3.10.3/debian/source/include-binaries 2019-03-12 17:48:48.000000000 +0000 @@ -1,12 +1,18 @@ debian/archdefs/amd64/x86SSE264SSE2.tar.bz2 debian/archdefs/arm/GENERIC32.tar.bz2 -debian/archdefs/arm/ARMv732.tar.bz2 -debian/archdefs/arm/ARMv732NEON.tar.bz2 +debian/archdefs/arm/GENERIC32FPV3D16MAC.tar.bz2 debian/archdefs/arm64/GENERIC64.tar.bz2 debian/archdefs/i386/x86x8732.tar.bz2 +debian/archdefs/ia64/IA64Itan64.tar.bz2 debian/archdefs/mips/GENERIC32.tar.bz2 debian/archdefs/mipsel/GENERIC32.tar.bz2 debian/archdefs/mips64el/GENERIC64.tar.bz2 debian/archdefs/powerpc/GENERIC32.tar.bz2 -debian/archdefs/ppc64el/GENERIC64LE.tar.bz2 +debian/archdefs/ppc64/GENERIC64.tar.bz2 +debian/archdefs/ppc64el/POWER864LE.tar.bz2 +debian/archdefs/riscv64/GENERIC64.tar.bz2 debian/archdefs/s390x/IBMz964.tar.bz2 +debian/archdefs/s390x/IBMz1264.tar.bz2 +debian/archdefs/sparc64/GENERIC64.tar.bz2 +debian/archdefs/s390x/IBMz1464VXZ2.tar.bz2 +debian/archdefs/s390x/IBMz1364VXZ.tar.bz2 diff -Nru atlas-3.10.2/debian/TODO atlas-3.10.3/debian/TODO --- atlas-3.10.2/debian/TODO 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/TODO 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -* see it is worst keeping liblapack_atlas.a or rename it to liblapack.a -* Provide -dbg packages diff -Nru atlas-3.10.2/debian/watch atlas-3.10.3/debian/watch --- atlas-3.10.2/debian/watch 2014-07-12 10:23:26.000000000 +0000 +++ atlas-3.10.3/debian/watch 2018-04-29 17:33:26.000000000 +0000 @@ -1,3 +1,4 @@ -version=3 +version=4 +opts=dversionmangle=s/\+ds$//,oversionmangle=s/(.*)/$1+ds/ \ http://sf.net/math-atlas/atlas(\d.*)\.tar\.bz2 debian debian/orig-tar.sh diff -Nru atlas-3.10.2/doc/ChangeLog atlas-3.10.3/doc/ChangeLog --- atlas-3.10.2/doc/ChangeLog 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/doc/ChangeLog 2016-07-28 19:42:59.000000000 +0000 @@ -1,3 +1,32 @@ +ATLAS 3.10.3 released 07/28/16, highlights of changes from 3.10.2 + * Updated F77 L1BLAS testers to those used LAPACK3.6.1 + * Fixed bug in rotmg revealed by LAPACK3.6.1 testers + * Fixed bug in hprk/sprk that could cause NaN propogation in HERK/SYRK due + to reading uninitialized memory in BETA=0 case + * Fixed bug in threaded SYR2K/HER2K that could cause NaN propogation due + to reading uninitialized memory + * Extended matrix/vector norm functions to detect NaNs + * Extended configure: + + --force-clang=/path/to/clang : will use clang for all C compilers, + even goodgcc (assumes gcc flag & inline-assembly compatibility) + + --cripple-atlas-performance: install despite failing throttle check + + Can now use arch string rather than enum # for -A arg + + --force-tids now affects ATLrun.sh as well as threaded build + + ARM32 autodetects SOFTFP/HARDFP ABI + * backport of config & archdefs for: + + POWER[7,8]le, IBMz[10,13,19], Corei[3,4], ARM[7,9,15,17], + ARM64[xgene,a53,a57] + + archdefs for NEON ARMa[7,15] + + config support for IBM Z[9,196,12] + * backport & extension of atlas_simd.h & atlas_cplxsimd.h + + New SIMD kernels for: VSX, VXZ, AVX2, AdvancedSIMD, NEON + * Fixed mflop test of PrintMMLine, that sometimes failed to print + valid mflop due to negative values from prior runs + * Removed ATL_dmm6x1x60_sse2_32.c from z index files (not valid cplx kern) + * Forced MinGW comps to be ignored unless -Si nocygwin 1 is set + * Added support for WOW64 detection & basic use, numerous changes to make + work on cygwin64 + * Fixed uninit nM in s[1,2]nxtune.c's RecDoubleNX ATLAS 3.10.2 released 07/10/14, highlights of changes from 3.10.1 * Fixed all errataed bugs: + Failure to init workspace can cause NaNs in SYRK diff -Nru atlas-3.10.2/EXtest/mvntest.c atlas-3.10.3/EXtest/mvntest.c --- atlas-3.10.2/EXtest/mvntest.c 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/EXtest/mvntest.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/EXtest/mvttest.c atlas-3.10.3/EXtest/mvttest.c --- atlas-3.10.2/EXtest/mvttest.c 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/EXtest/mvttest.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/EXtest/r1test.c atlas-3.10.3/EXtest/r1test.c --- atlas-3.10.2/EXtest/r1test.c 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/EXtest/r1test.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/EXtest/r2test.c atlas-3.10.3/EXtest/r2test.c --- atlas-3.10.2/EXtest/r2test.c 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/EXtest/r2test.c 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_altivec.h atlas-3.10.3/include/atlas_altivec.h --- atlas-3.10.2/include/atlas_altivec.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_altivec.h 2016-07-28 19:42:59.000000000 +0000 @@ -10,7 +10,15 @@ #define ATL_AVgcc #endif #ifdef ATL_AVgcc - #include + #ifdef ATL_VSX390 /* part of broken IBM Z patch, presently never deffed */ + #if defined (__IBMC__) || defined (__IBMCPP__) + #include + #else + #include + #endif + #else + #include + #endif #define VECTOR_INIT(v0_,v1_,v2_,v3_) (vector float) {v0_,v1_,v2_,v3_} #define VECTOR_INITI(v0_,v1_,v2_,v3_) (vector int) {v0_,v1_,v2_,v3_} diff -Nru atlas-3.10.2/include/atlas_asm.h atlas-3.10.3/include/atlas_asm.h --- atlas-3.10.2/include/atlas_asm.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_asm.h 2016-07-28 19:42:59.000000000 +0000 @@ -6,9 +6,14 @@ #define my_join(pre, nam) pre ## nam #endif -#if (defined(ATL_OS_Win64) && !defined(ATL_USE64BITS)) || \ - defined(ATL_OS_Win9x) || defined(ATL_OS_OSX) || defined(ATL_OS_WinNT) +#if defined(ATL_OS_Win9x) || defined(ATL_OS_OSX) #define ATL_asmdecor(nam) Mjoin(_,nam) +#elif defined(ATL_OS_Win64) || defined(ATL_OS_WinNT) + #if defined(ATL_GAS_WOW64) || defined (ATL_USE64BITS) + #define ATL_asmdecor(nam) nam + #else + #define ATL_asmdecor(nam) Mjoin(_,nam) + #endif #elif defined(ATL_OS_AIX) && defined(ATL_GAS_PPC) #define ATL_asmdecor(nam) Mjoin(.,nam) #elif !defined(ATL_OS_OSX) && defined(ATL_GAS_PPC) && defined(ATL_USE64BITS) diff -Nru atlas-3.10.2/include/atlas_aux.h atlas-3.10.3/include/atlas_aux.h --- atlas-3.10.2/include/atlas_aux.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_aux.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_cplxsimd.h atlas-3.10.3/include/atlas_cplxsimd.h --- atlas-3.10.2/include/atlas_cplxsimd.h 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_cplxsimd.h 2016-07-28 19:42:59.000000000 +0000 @@ -0,0 +1,2047 @@ +#ifndef ATLAS_CPLXSIMD_H + #define ATLAS_CPLXSIMD_H 1 +#include "atlas_simd.h" +/* + * FUNCTIONALITY SUMMARY + * ============================================================================ + * Constant integers: ATL_CXVLEN, ATL_CXSPLDb + * Load/store for partial vecs (replace I with 0 < I < VLEN/2, 1 always there): + * ATL_vcxldI, ATL_vcxuldI, ATL_vcxustI, ATL_vcxustI + * Macros for axpy-based computation: + * ATL_vcxsplitRIld, ATL_vcxsplitRI, ATL_vcxPrepAlpha + * Macros for dot-based computation: + * ATL_vcxswapRI, ATL_vcxdotcomb + * Macros for updating complex C in GEMM + * * ATL_vunpckLO ATL_vunpckHI + * ============================================================================ + */ +/* + * ============================================================================ + * This file provides macros for doing the types complex computations + * needed by ATLAS in a machine, precision and VLEN-independent manner + * (i.e., this file changes based on VLEN/SIMD ISA, float/double, VLEN, + * but kernels implemented using work unchanged regardless of these variables). + * ATLAS essentially does dot-product based computations (dot,GEMVT,GEMM) + * and AXPY-based (AXPY,GER,GER2). Both types of computation need both + * load and stores. We just use the real load/store for full VLEN ops. + * However, we also need the ability to load/store a single complex number, + * which means the ability to load/store pairs of real numbers. In addition, + * if we want to be able perform vector cleanup, we need the ability to + * load/store I complex numbers (0 < I < ATL_VLEN/2), with loads zeroing any + * elements above the loaded values. Therefore, this file provides + * (ATL_VLEN/2 - 1)*4 routines for loading/store complex numbers: + * ATL_vcxldI(r_, p_) : load lower 2*I elts of aligned p_ to r_, zero rest + * ATL_vcxuldI(r_, p_): load lower 2*I elts of unaligned p_ to r_, zero rest + * ATL_vcxstI(p_, r_) : store lower 2*I real elts of r_ to aligned p_ + * ATL_vcxustI(p_, r_): store lower 2*I real elts of r_ to unaligned p_ + * + * There are numerous ways to do complex computations, but this file provides + * a particular approach for both dot- and axpy-based computations. + * + * For AXPY-based computations, we are performance limited by load/store of Y, + * so we permute all other ops to allow us to keep Y in natural order. + * Not all SIMD ISAs allow one to do different operations to different + * vector elements (eg., ADDSUB), so instead we manipulate alpha outside + * the main loop so that it is is permuted and scaled appropriately to allow + * us to do MAC-based AXPY calculations. We will split X into two vectors + * with duplicated entries: + * {Xr, Xr}*(VLEN/2) {Xi, Xi}*(VLEN/2) + * This permutation must be done inside the loop, and is thus expensive. + * We provide the following functions to accomplish this: + * ATL_vcxsplitRIld(rR_, rI, p_): split&dup cplx #s from aligned p_ + ATL_vcxsplitRI(rXr_, rXi_) : split&dup from natural-order reg rXi_ + * ATL_vcxsplitRIld can be built out of ATL_vld & ATL_vcxsplitRI, which is how + * it is done for cleanup, but on some systems it is more efficient to + * do it directly from memory, so we provide a specialized high-performance + * version. On some systems, the alignment restrictions for this operation + * are lower than full VLEN, so we also provide the const macro: + * ATL_CXSPLDb : required byte alignment for ATL_vcxsplitRIld. + * We use ATL_cxsplit for vector cleanup & when X is not aligned to ATL_CXSPLDb. + * axpy-based calcs are doing Y += alpha * X. Alpha is loop-invariant, so we + * can manipulate it outside the loop, even if that manipulation is relatively + * inefficient. In order to perform the two real MACs required for cplx MAC, + * alpha is split into two vectors that match up with our X vecs as in: + * {Xr, Xr}*(VLEN/2) {Xi, Xi}*(VLEN/2) + * {ALi, Alr}*(VLEN/2) {ALr, -ALi}*(VLEN/2 + * The first of these is the natural order alpha (alN), and the second scaled + * and permuted (alS). First the scalar complex number is loaded to the + * register using ATL_cx[u]ld1, then it is transformed with: + * ATL_vcxPrepAlpha(alN, alS): alN is input & output, alS output only + * + * The naive approach to performing complex MAC (multiply and accumulate) + * requires permuting both X and Y inside the loop, which is very expensive. + * However, we notice that DOT (the accumulator) is loop invariant, so we + * can instead keep it in permuted & scaled form throughout the loop. This + * allows us to avoid either the permute of X or Y (but not both). + * For DOT-based there is no performance difference between X and Y, so we + * can choose to permute either one (one must be computed to build the complex + * multiply and accumulate (MAC) out of real MACs). In general you can only + * force one vector to be aligned (vecs may be mutually misaligned), and that + * load will be cheaper than the unaligned load. We therefore perform loop + * peeling to force X to be aligned whenever that is possible, and then + * permute X rather than Y. The permute adds to the dependence chain in the + * loop, so you want dependent it on the fastest load. + * + * The technique for DOT-based calculations is that the two half of the MACs + * are stored in two different dot variables throughout the loop, one storing + * partial results for the real result, and one for the imaginary result. + * The real/imag dot vectors must be internally summed up to produce the + * final answer (this operation performed outside the loop). The imaginary + * dot looks like: {Xr*Yi, Xi*Yr}*(VLEN/2), so we add all elts to get the ans. + * Real looks: {Xi*Yi, Xr*Yr}*(VLEN/2), so we must subtract the odd elts + * from the even. We provide this macro to accomplish this: + * ATL_cxdotcomb(rR, rI) : put final ans in low 2 elts of rR + * + * Inside the loop, we keep Y in natural order, and have X in both natural + * order (rN) and with imaginary and complex swapped (rS). We provide: + * ATL_cxriswap(rS, rN): swap imag & real components of rN, store in rS + * + * ============================================================================ + */ +/* + * Define some length-specific constants. + * ATL_VONEPN is used to scale so even words are negated (imag*imag). + */ +#if ATL_VLEN == 2 + #define ATL_CXVLEN 1 + #define ATL_CXVLSH 0 + #define ATL_VONEPN ((ATL_VTYPE){ATL_rone, ATL_rnone}) +#elif ATL_VLEN == 4 + #define ATL_CXVLEN 2 + #define ATL_CXVLSH 1 + #define ATL_VONEPN ((ATL_VTYPE){ATL_rone, ATL_rnone,ATL_rone, ATL_rnone}) +#elif ATL_VLEN == 8 + #define ATL_CXVLEN 4 + #define ATL_CXVLSH 2 + #define ATL_VONEPN ((ATL_VTYPE){ATL_rone, ATL_rnone,ATL_rone, ATL_rnone,\ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone}) +#elif ATL_VLEN == 16 + #define ATL_CXVLEN 8 + #define ATL_CXVLSH 3 + #define ATL_VONEPN ((ATL_VTYPE){ATL_rone, ATL_rnone,ATL_rone, ATL_rnone,\ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone, \ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone, \ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone}) +#elif ATL_VLEN == 32 + #define ATL_CXVLEN 16 + #define ATL_CXVLSH 4 + #define ATL_VONEPN (ATL_VTYPE){ATL_rone, ATL_rnone,ATL_rone, ATL_rnone,\ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone} \ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone, \ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone, \ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone, \ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone, \ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone, \ + ATL_rone, ATL_rnone,ATL_rone, ATL_rnone} +#else + #error "unsupported VLEN!" +#endif + +/* + * Define ld/st I, 0 < I < VLEN/2, I=1 always present (scalar complex ld/st) + */ +#if ATL_VLEN == 2 /* 1 vec == 1 complex: DCPLX&(SSE2||VSX||ARM) or gnuvec */ + #define ATL_vcxld1(r_, p_) ATL_vld(r_, p_) + #define ATL_vcxuld1(r_, p_) ATL_vuld(r_, p_) + #define ATL_vcxst1(p_, r_) ATL_vst(p_, r_) + #define ATL_vcxust1(p_, r_) ATL_vust(p_, r_) +#elif ATL_VLEN >= 4 /* gnuvec or DCPLX & AVX or SCPLX & (VSX || SSE) */ + #if (ATL_VSX) && defined(SCPLX) + #if 0 /* gnuvec works better for unaligned load */ + #define ATL_vcxuld1(r_, p_) \ + { ATL_VTYPE t0_, t1_;\ + t0_ = vec_splats(*(p_)); \ + t1_ = vec_splats((p_)[1]); \ + t0_ = vec_vmrglw(t0_, t1_); \ + ATL_vzero(r_); \ + r_ = vec_xxpermdi(t0_, r_, 0); \ + } + #endif + #define ATL_vcxld1(r_, p_) r_ = (ATL_VTYPE) \ + ((vector double){*((double*)(p_))}) + #define ATL_vcxust1(p_, r_) { *(p_) = r_[0]; (p_)[1] = r_[1]; } + #define ATL_vcxst1(p_, r_) ATL_vcxust1(p_, r_) + #elif (defined(ATL_VECARM1) || defined(ATL_NEON)) && defined(SCPLX) + #define ATL_vcxuld1(r_, p_) \ + r_ = vcombine_f32(vld1_f32(p_), vdup_n_f32(0.0f)) + #define ATL_vcxust1(p_, r_) vst1_f32(p_, vget_low_f32(r_)) + #define ATL_vcxld1(r_, p_) ATL_vcxuld1(r_, p_) + #define ATL_vcxst1(p_, r_) ATL_vcxust1(p_, r_) + #elif defined(ATL_AVX) && defined(DCPLX) + #define ATL_vcxld1(r_, p_) \ + { \ + ATL_vzero(r_); \ + r_ = _mm256_insertf128_pd(r_, _mm_load_pd(p_), 0); \ + } + #define ATL_vcxuld1(r_, p_) \ + { \ + ATL_vzero(r_); \ + r_ = _mm256_insertf128_pd(r_, _mm_loadu_pd(p_), 0); \ + } + #define ATL_vcxst1(p_, r_) _mm_store_pd(p_, _mm256_extractf128_pd(r_, 0)) + #define ATL_vcxust1(p_, r_) _mm_storeu_pd(p_, _mm256_extractf128_pd(r_,0)) + #elif defined(ATL_AVX) && defined(SCPLX) + #define ATL_vcxld1(r_, p_) \ + { __m128 t0_;\ + ATL_vzero(r_); \ + t0_ = _mm_setzero_ps(); \ + r_ = _mm256_insertf128_ps(r_, _mm_loadl_pi(t0_,(void*)(p_)), 0); \ + } + #define ATL_vcxuld1(r_, p_) \ + { __m128 t0_, t1_;\ + ATL_vzero(r_); \ + t0_ = _mm_load_ss(p_); \ + t1_ = _mm_load_ss((p_)+1); \ + t0_ = _mm_unpacklo_ps(t0_, t1_); \ + r_ = _mm256_insertf128_ps(r_, t0_, 0); \ + } + #define ATL_vcxst1(p_, r_) \ + _mm_storel_pi((void*)(p_), _mm256_extractf128_ps(r_, 0)) + #define ATL_vcxust1(p_, r_) \ + { __m128 t_;\ + t_ = _mm256_extractf128_ps(r_,0); \ + _mm_store_ss(p_, t_); \ + _mm_store_ss((p_)+1, _mm_shuffle_ps(t_, t_, 1)); \ + } + #elif defined(ATL_SSE1) && defined(SCPLX) + #define ATL_vcxld1(r_, p_) \ + { \ + ATL_vzero(r_); \ + r_ = _mm_loadl_pi(r_, ((void*)(p_))); \ + } + #define ATL_vcxuld1(r_, p_) \ + { ATL_VTYPE t_;\ + r_ = _mm_load_ss(p_); \ + t_ = _mm_load_ss((p_)+1); \ + r_ = _mm_unpacklo_ps(r_, t_); \ + } + #define ATL_vcxst1(p_, r_) _mm_storel_pi((void*)(p_), r_) + #define ATL_vcxust1(p_, r_) \ + { \ + _mm_store_ss(p_, r_); \ + _mm_store_ss((p_)+1, _mm_shuffle_ps(r_, r_, 1)); \ + } + #else /* gnuvec */ + #define ATL_vcxuld1(r_, p_) r_ = (ATL_VTYPE){*(p_), (p_)[1]} + #define ATL_vcxust1(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + } + #define ATL_vcxld1 ATL_vcxuld1 + #define ATL_vcxst1 ATL_vcxust1 + #endif +/* + * For VL == 8, is gnuvec or SCPLX&AVX or DCPLX&AVX512 + * For VL > 8, can only be SCPLX&AVX512 or gnuvec + */ + #if ATL_VLEN >= 8 /* VL>=8, must define add op[2,3] */ + #if defined(SCPLX) && defined(ATL_AVX) + #define ATL_vcxld2(r_, p_) \ + { \ + ATL_vzero(r_); \ + r_ = _mm256_insertf128_ps(r_, _mm_load_ps(p_), 0); \ + } + #define ATL_vcxuld2(r_, p_) \ + { \ + ATL_vzero(r_); \ + r_ = _mm256_insertf128_ps(r_, _mm_loadu_ps(p_), 0); \ + } + #define ATL_vcxld3(r_, p_) \ + { __m128 t_; \ + r_ = _mm256_insertf128_ps(r_, _mm_load_ps(p_), 0); \ + t_ = _mm_setzero_ps(); \ + t_ = _mm_loadl_pi(t_, ((void*)((p_)+4))); \ + r_ = _mm256_insertf128_ps(r_, t_, 1); \ + } + #define ATL_vcxuld3(r_, p_) \ + { __m128 t0_, t1_; \ + r_ = _mm256_insertf128_ps(r_, _mm_loadu_ps(p_), 0); \ + t0_ = _mm_load_ss((p_)+4); \ + t1_ = _mm_load_ss((p_)+5); \ + t0_ = _mm_unpacklo_ps(t0_, t1_); \ + r_ = _mm256_insertf128_ps(r_, t0_, 1); \ + } + #define ATL_vcxst2(p_, r_) \ + _mm_store_ps(p_, _mm256_extractf128_ps(r_, 0)) + #define ATL_vcxust2(p_, r_) \ + _mm_storeu_ps(p_, _mm256_extractf128_ps(r_,0)) + #define ATL_vcxst3(p_, r_) \ + { __m128 t_; \ + ATL_vcxst2(p_, r_); \ + t_ = _mm256_extractf128_ps(r_, 0); \ + _mm_storel_pi((void*)((p_)+4), t_) ; \ + } + #define ATL_vcxust3(p_, r_) \ + { __m128 t_; \ + ATL_vcxust2(p_, r_); \ + t_ = _mm256_extractf128_ps(r_, 1); \ + _mm_store_ss((p_)+4, t_); \ + _mm_store_ss((p_)+5, _mm_shuffle_ps(t_, t_, 1)); \ + } + #elif defined(DCPLX) && defined(ATL_AVX512__00) + #error "AVX512 not presently supported" + #elif defined(SCPLX) && defined(ATL_AVX512__00) + #error "AVX512 not presently supported" + #else /* gnuvec */ + #define ATL_vcxuld2(r_, p_) \ + r_ = (ATL_VTYPE){*(p_), (p_)[1], (p_)[2], (p_)[3]} + #define ATL_vcxust2(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + } + #define ATL_vcxuld3(r_, p_) \ + r_ = (ATL_VTYPE){*(p_), (p_)[1], (p_)[2], (p_)[3], (p_)[4], (p_)[5]} + #define ATL_vcxust3(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + } + + #define ATL_vcxld2 ATL_vcxuld2 + #define ATL_vcxst2 ATL_vcxust2 + #define ATL_vcxld3 ATL_vcxuld3 + #define ATL_vcxst3 ATL_vcxust3 + #endif + #if ATL_VLEN >= 16 /* need [4-7]: gnuvec or SREAL&AVX512 */ + #if defined(SCPLX__0) && defined(ATL_AVX512__0) + #else /* gnuvec */ + #define ATL_vcxuld4(r_, p_) r_ = (ATL_VTYPE)\ + {*(p_), (p_)[1], (p_)[2], (p_)[3], \ + (p_)[4],(p_)[5],(p_)[6],(p_)[7]} + #define ATL_vcxuld5(r_, p_) r_ = (ATL_VTYPE)\ + {*(p_), (p_)[1], (p_)[2], (p_)[3], \ + (p_)[4],(p_)[5],(p_)[6],(p_)[7], \ + (p_)[8],(p_)[9]} + #define ATL_vcxuld6(r_, p_) r_ = (ATL_VTYPE)\ + {*(p_), (p_)[1], (p_)[2], (p_)[3], \ + (p_)[4],(p_)[5],(p_)[6],(p_)[7], \ + (p_)[8],(p_)[9],(p_)[10],(p_)[11]} + #define ATL_vcxuld7(r_, p_) r_ = (ATL_VTYPE)\ + {*(p_), (p_)[1], (p_)[2], (p_)[3], \ + (p_)[4],(p_)[5],(p_)[6],(p_)[7], \ + (p_)[8],(p_)[9],(p_)[10],(p_)[11], \ + (p_)[12],(p_)[13]} + #define ATL_vcxust4(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + } + #define ATL_vcxust5(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + } + #define ATL_vcxust6(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + (p_)[10] = (r_)[10]; \ + (p_)[11] = (r_)[11]; \ + } + #define ATL_vcxust7(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + (p_)[10] = (r_)[10]; \ + (p_)[11] = (r_)[11]; \ + (p_)[12] = (r_)[12]; \ + (p_)[13] = (r_)[13]; \ + } + + #define ATL_vcxld4 ATL_vcxuld4 + #define ATL_vcxst4 ATL_vcxust4 + #define ATL_vcxld5 ATL_vcxuld5 + #define ATL_vcxst5 ATL_vcxust5 + #define ATL_vcxld6 ATL_vcxuld6 + #define ATL_vcxst6 ATL_vcxust6 + #define ATL_vcxld7 ATL_vcxuld7 + #define ATL_vcxst7 ATL_vcxust7 + #endif + #if ATL_VLEN >= 32 + + #define ATL_vcxuld8(r_, p_) r_ = (ATL_VTYPE){*(p_),\ + (p_)[1], \ + (p_)[2], \ + (p_)[3], \ + (p_)[4], \ + (p_)[5], \ + (p_)[6], \ + (p_)[7], \ + (p_)[8], \ + (p_)[9], \ + (p_)[10], \ + (p_)[11], \ + (p_)[12], \ + (p_)[13], \ + (p_)[14], \ + (p_)[15], \ + } + #define ATL_vcxust8(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + (p_)[10] = (r_)[10]; \ + (p_)[11] = (r_)[11]; \ + (p_)[12] = (r_)[12]; \ + (p_)[13] = (r_)[13]; \ + (p_)[14] = (r_)[14]; \ + (p_)[15] = (r_)[15]; \ + } + #define ATL_vcxuld9(r_, p_) r_ = (ATL_VTYPE){*(p_),\ + (p_)[1], \ + (p_)[2], \ + (p_)[3], \ + (p_)[4], \ + (p_)[5], \ + (p_)[6], \ + (p_)[7], \ + (p_)[8], \ + (p_)[9], \ + (p_)[10], \ + (p_)[11], \ + (p_)[12], \ + (p_)[13], \ + (p_)[14], \ + (p_)[15], \ + (p_)[16], \ + (p_)[17], \ + } + #define ATL_vcxust9(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + (p_)[10] = (r_)[10]; \ + (p_)[11] = (r_)[11]; \ + (p_)[12] = (r_)[12]; \ + (p_)[13] = (r_)[13]; \ + (p_)[14] = (r_)[14]; \ + (p_)[15] = (r_)[15]; \ + (p_)[16] = (r_)[16]; \ + (p_)[17] = (r_)[17]; \ + } + #define ATL_vcxuld10(r_, p_) r_ = (ATL_VTYPE){*(p_),\ + (p_)[1], \ + (p_)[2], \ + (p_)[3], \ + (p_)[4], \ + (p_)[5], \ + (p_)[6], \ + (p_)[7], \ + (p_)[8], \ + (p_)[9], \ + (p_)[10], \ + (p_)[11], \ + (p_)[12], \ + (p_)[13], \ + (p_)[14], \ + (p_)[15], \ + (p_)[16], \ + (p_)[17], \ + (p_)[18], \ + (p_)[19], \ + } + #define ATL_vcxust10(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + (p_)[10] = (r_)[10]; \ + (p_)[11] = (r_)[11]; \ + (p_)[12] = (r_)[12]; \ + (p_)[13] = (r_)[13]; \ + (p_)[14] = (r_)[14]; \ + (p_)[15] = (r_)[15]; \ + (p_)[16] = (r_)[16]; \ + (p_)[17] = (r_)[17]; \ + (p_)[18] = (r_)[18]; \ + (p_)[19] = (r_)[19]; \ + } + #define ATL_vcxuld11(r_, p_) r_ = (ATL_VTYPE){*(p_),\ + (p_)[1], \ + (p_)[2], \ + (p_)[3], \ + (p_)[4], \ + (p_)[5], \ + (p_)[6], \ + (p_)[7], \ + (p_)[8], \ + (p_)[9], \ + (p_)[10], \ + (p_)[11], \ + (p_)[12], \ + (p_)[13], \ + (p_)[14], \ + (p_)[15], \ + (p_)[16], \ + (p_)[17], \ + (p_)[18], \ + (p_)[19], \ + (p_)[20], \ + (p_)[21], \ + } + #define ATL_vcxust11(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + (p_)[10] = (r_)[10]; \ + (p_)[11] = (r_)[11]; \ + (p_)[12] = (r_)[12]; \ + (p_)[13] = (r_)[13]; \ + (p_)[14] = (r_)[14]; \ + (p_)[15] = (r_)[15]; \ + (p_)[16] = (r_)[16]; \ + (p_)[17] = (r_)[17]; \ + (p_)[18] = (r_)[18]; \ + (p_)[19] = (r_)[19]; \ + (p_)[20] = (r_)[20]; \ + (p_)[21] = (r_)[21]; \ + } + #define ATL_vcxuld12(r_, p_) r_ = (ATL_VTYPE){*(p_),\ + (p_)[1], \ + (p_)[2], \ + (p_)[3], \ + (p_)[4], \ + (p_)[5], \ + (p_)[6], \ + (p_)[7], \ + (p_)[8], \ + (p_)[9], \ + (p_)[10], \ + (p_)[11], \ + (p_)[12], \ + (p_)[13], \ + (p_)[14], \ + (p_)[15], \ + (p_)[16], \ + (p_)[17], \ + (p_)[18], \ + (p_)[19], \ + (p_)[20], \ + (p_)[21], \ + (p_)[22], \ + (p_)[23], \ + } + #define ATL_vcxust12(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + (p_)[10] = (r_)[10]; \ + (p_)[11] = (r_)[11]; \ + (p_)[12] = (r_)[12]; \ + (p_)[13] = (r_)[13]; \ + (p_)[14] = (r_)[14]; \ + (p_)[15] = (r_)[15]; \ + (p_)[16] = (r_)[16]; \ + (p_)[17] = (r_)[17]; \ + (p_)[18] = (r_)[18]; \ + (p_)[19] = (r_)[19]; \ + (p_)[20] = (r_)[20]; \ + (p_)[21] = (r_)[21]; \ + (p_)[22] = (r_)[22]; \ + (p_)[23] = (r_)[23]; \ + } + #define ATL_vcxuld13(r_, p_) r_ = (ATL_VTYPE){*(p_),\ + (p_)[1], \ + (p_)[2], \ + (p_)[3], \ + (p_)[4], \ + (p_)[5], \ + (p_)[6], \ + (p_)[7], \ + (p_)[8], \ + (p_)[9], \ + (p_)[10], \ + (p_)[11], \ + (p_)[12], \ + (p_)[13], \ + (p_)[14], \ + (p_)[15], \ + (p_)[16], \ + (p_)[17], \ + (p_)[18], \ + (p_)[19], \ + (p_)[20], \ + (p_)[21], \ + (p_)[22], \ + (p_)[23], \ + (p_)[24], \ + (p_)[25], \ + } + #define ATL_vcxust13(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + (p_)[10] = (r_)[10]; \ + (p_)[11] = (r_)[11]; \ + (p_)[12] = (r_)[12]; \ + (p_)[13] = (r_)[13]; \ + (p_)[14] = (r_)[14]; \ + (p_)[15] = (r_)[15]; \ + (p_)[16] = (r_)[16]; \ + (p_)[17] = (r_)[17]; \ + (p_)[18] = (r_)[18]; \ + (p_)[19] = (r_)[19]; \ + (p_)[20] = (r_)[20]; \ + (p_)[21] = (r_)[21]; \ + (p_)[22] = (r_)[22]; \ + (p_)[23] = (r_)[23]; \ + (p_)[24] = (r_)[24]; \ + (p_)[25] = (r_)[25]; \ + } + #define ATL_vcxuld14(r_, p_) r_ = (ATL_VTYPE){*(p_),\ + (p_)[1], \ + (p_)[2], \ + (p_)[3], \ + (p_)[4], \ + (p_)[5], \ + (p_)[6], \ + (p_)[7], \ + (p_)[8], \ + (p_)[9], \ + (p_)[10], \ + (p_)[11], \ + (p_)[12], \ + (p_)[13], \ + (p_)[14], \ + (p_)[15], \ + (p_)[16], \ + (p_)[17], \ + (p_)[18], \ + (p_)[19], \ + (p_)[20], \ + (p_)[21], \ + (p_)[22], \ + (p_)[23], \ + (p_)[24], \ + (p_)[25], \ + (p_)[26], \ + (p_)[27], \ + } + #define ATL_vcxust14(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + (p_)[10] = (r_)[10]; \ + (p_)[11] = (r_)[11]; \ + (p_)[12] = (r_)[12]; \ + (p_)[13] = (r_)[13]; \ + (p_)[14] = (r_)[14]; \ + (p_)[15] = (r_)[15]; \ + (p_)[16] = (r_)[16]; \ + (p_)[17] = (r_)[17]; \ + (p_)[18] = (r_)[18]; \ + (p_)[19] = (r_)[19]; \ + (p_)[20] = (r_)[20]; \ + (p_)[21] = (r_)[21]; \ + (p_)[22] = (r_)[22]; \ + (p_)[23] = (r_)[23]; \ + (p_)[24] = (r_)[24]; \ + (p_)[25] = (r_)[25]; \ + (p_)[26] = (r_)[26]; \ + (p_)[27] = (r_)[27]; \ + } + #define ATL_vcxuld15(r_, p_) r_ = (ATL_VTYPE){*(p_),\ + (p_)[1], \ + (p_)[2], \ + (p_)[3], \ + (p_)[4], \ + (p_)[5], \ + (p_)[6], \ + (p_)[7], \ + (p_)[8], \ + (p_)[9], \ + (p_)[10], \ + (p_)[11], \ + (p_)[12], \ + (p_)[13], \ + (p_)[14], \ + (p_)[15], \ + (p_)[16], \ + (p_)[17], \ + (p_)[18], \ + (p_)[19], \ + (p_)[20], \ + (p_)[21], \ + (p_)[22], \ + (p_)[23], \ + (p_)[24], \ + (p_)[25], \ + (p_)[26], \ + (p_)[27], \ + (p_)[28], \ + (p_)[29], \ + } + #define ATL_vcxust15(p_, r_) \ + { \ + *(p_) = (r_)[0]; \ + (p_)[1] = (r_)[1]; \ + (p_)[2] = (r_)[2]; \ + (p_)[3] = (r_)[3]; \ + (p_)[4] = (r_)[4]; \ + (p_)[5] = (r_)[5]; \ + (p_)[6] = (r_)[6]; \ + (p_)[7] = (r_)[7]; \ + (p_)[8] = (r_)[8]; \ + (p_)[9] = (r_)[9]; \ + (p_)[10] = (r_)[10]; \ + (p_)[11] = (r_)[11]; \ + (p_)[12] = (r_)[12]; \ + (p_)[13] = (r_)[13]; \ + (p_)[14] = (r_)[14]; \ + (p_)[15] = (r_)[15]; \ + (p_)[16] = (r_)[16]; \ + (p_)[17] = (r_)[17]; \ + (p_)[18] = (r_)[18]; \ + (p_)[19] = (r_)[19]; \ + (p_)[20] = (r_)[20]; \ + (p_)[21] = (r_)[21]; \ + (p_)[22] = (r_)[22]; \ + (p_)[23] = (r_)[23]; \ + (p_)[24] = (r_)[24]; \ + (p_)[25] = (r_)[25]; \ + (p_)[26] = (r_)[26]; \ + (p_)[27] = (r_)[27]; \ + (p_)[28] = (r_)[28]; \ + (p_)[29] = (r_)[29]; \ + } + #define ATL_vcxld8 ATL_vcxuld8 + #define ATL_vcxst8 ATL_vcxust8 + #define ATL_vcxld9 ATL_vcxuld9 + #define ATL_vcxst9 ATL_vcxust9 + #define ATL_vcxld10 ATL_vcxuld10 + #define ATL_vcxst10 ATL_vcxust10 + #define ATL_vcxld11 ATL_vcxuld11 + #define ATL_vcxst11 ATL_vcxust11 + #define ATL_vcxld12 ATL_vcxuld12 + #define ATL_vcxst12 ATL_vcxust12 + #define ATL_vcxld13 ATL_vcxuld13 + #define ATL_vcxst13 ATL_vcxust13 + #define ATL_vcxld14 ATL_vcxuld14 + #define ATL_vcxst14 ATL_vcxust14 + #define ATL_vcxld15 ATL_vcxuld15 + #define ATL_vcxst15 ATL_vcxust15 + #if ATL_VLEN > 32 /* VLEN == 32, gnuvec only */ + #error "Unsupported VLEN > 32" + #endif + #endif + #endif + #endif +#endif +/* + * Define ATL_vcxswapRI, ATL_vcxsplitRI. + * Define ATL_vcxsplitRIld if you dont want vld/splitRI version. + * Define ATL_vcxdotcomb & ATL_vcxPrepAlpha if you don't want to use + * system-indep (slow) versions. ATL_vcxdotcomb has a fast sys-indep + * version for VLEN==2. + * Define ATL_vunpckLO/HI if you don't want to use sys-ind vers. + */ +#ifdef ATL_VSX + #ifdef SCPLX + #define ATL_vunpckLO(d_, s0_, s1_) d_ = (ATL_VTYPE) \ + vec_mergee((vector unsigned int)(s0_), (vector unsigned int)(s1_)); + #define ATL_vunpckHI(d_, s0_, s1_) d_ = (ATL_VTYPE) \ + vec_mergeo((vector unsigned int)(s0_), (vector unsigned int)(s1_)); + #define ATL_vcxswapRI(d_, s_) d_ = vec_perm(s_,s_,(vector unsigned char) \ + {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}) + #if ATL_FULLGCCVSX /* not supported by older gcc (eg. 4.8) */ + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = (ATL_VTYPE) vec_mergee((vector unsigned int)(rXi_), \ + (vector unsigned int) (rXi_)); \ + rXi_ = (ATL_VTYPE) vec_mergeo((vector unsigned int)(rXi_),\ + (vector unsigned int)(rXi_)); \ + } + #define ATL_vcxdotcomb(rR_, rI_) /* low 2 elts rR_ gets ans */ \ + { ATL_VTYPE t1_;\ + ATL_vmul(rR_, rR_, ATL_VONEPN); \ + t1_ = vec_vmrglw(rR_, rI_); \ + rR_ = vec_vmrghw(rR_, rI_); \ + ATL_vadd(rR_, rR_, t1_); \ + t1_ = vec_xxpermdi(rR_, rR_, 2); \ + ATL_vadd(rR_, rR_, t1_); \ + } + #else +/* + * Using these guys as constants isn't so great: gcc 4.8.2 pulls + * the formation of the first iperm vector out of a loop, but then leaves + * the formation of the second (in terms of the first) inside the loop. + * The fix is to have this file define DECL/INIT macros, where we manually + * declare the perm vector, create it, and hoist it ourselves. + * We'll keep with crap way, since later gcc supports mergee/mergeo. + */ + #define ATL_MERGEE (vector unsigned char) \ + {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11} + #define ATL_MERGEO (vector unsigned char) \ + {4, 5, 6, 7, 4, 5, 6, 7, 12, 13, 14, 15, 12, 13, 14, 15} + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = vec_perm(rXi_, rXi, ATL_MERGEE); \ + rXi_ = vec_perm(rXi_, rXi, ATL_MERGEO); \ + } + #endif + #else /* DCPLX */ + #define ATL_vunpckLO(d_, s0_, s1_) d_ = vec_xxpermdi(s0_, s1_, 0) + #define ATL_vunpckHI(d_, s0_, s1_) d_ = vec_xxpermdi(s0_, s1_, 3) + #define ATL_vcxswapRI(d_, s_) d_ = vec_xxpermdi(s_, s_, 2) + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = vec_xxpermdi(rXi_, rXi_, 0); \ + rXi_ = vec_xxpermdi(rXi_, rXi_, 3); \ + } + #define ATL_CXSPLDb ATL_VLENb + #define ATL_vcxsplitRIld(rXr_, rXi_, pX_) \ + { \ + rXr_ = vec_splats(*(pX_)); \ + rXi_ = vec_splats(*((pX_)+1)); \ + } + #endif +#elif defined(ATL_VECARM1) || defined(ATL_NEON) + #ifdef SCPLX + #if 0 + #define ATL_vcxsplitRIld(rXr_, rXi_, pX_) \ + { unsigned long long *lp_=(void*)(pX_), l0_, l1_, rl0_, il0_, rl1_, il1_;\ + l0_ = *lp_; /* l0 = {i0, r0} :: cycle 0 */\ + l1_ = lp_[1]; /* l1 = {i1, r1} :: cycle 0 */\ + rl0_ = l0_ << 32; /* rl0= {r0, 0} :: cycle 1 */\ + il0_ = l0_ >> 32; /* il0= { 0, i0} :: cycle 1*/\ + rl1_ = l1_ << 32; /* rl1= {r1, 0} :: cycle 2*/\ + il1_ = l1_ >> 32; /* il1= { 0, i1} :: cycle 2*/\ + rl0_ |= (rl0_>> 32); /* rl0= {r0, r0} :: cycle 3*/\ + il0_ |= (il0_<< 32); /* il0= {i0, i0} :: cycle 3*/\ + rl1_ |= (rl1_>> 32); /* rl1= {r1, r1} :: cycle 4*/\ + il1_ |= (il1_<< 32); /* il1= {i1, i1} :: cycle 4*/\ + rXr_ = vcombine_f32(vreinterpret_f32_u64(rl0_), \ + vreinterpret_f32_u64(rl1_)); /* cycle 5 */\ + rXi_ = vcombine_f32(vreinterpret_f32_u64(il0_), \ + vreinterpret_f32_u64(il1_)); /* cycle 6 */\ + } + #endif + + #if defined(ATL_VECARM1) + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = vtrn1q_f32(rXi_, rXi_); \ + rXi_ = vtrn2q_f32(rXi_, rXi_); \ + } + #else + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_[0] = rXr_[1] = rXi[0]; \ + rXr_[2] = rXr_[3] = rXi[2]; \ + rXi_[0] = rXi_[1] = rXi[1]; \ + rXi_[2] = rXi_[3] = rXi[3]; \ + } + #endif + #define ATL_vcxswapRI(d_, s_) \ + d_ = vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(s_))) + #define ATL_vcxdotcomb(rR_, rI_) /* low 2 elts rR_ gets ans */ \ + { \ + rR_[0] += (rR_)[2] - (rR_)[1] - (rR_)[3]; \ + (rR_)[1] = (rI_)[0] + (rI_)[1] + (rI_)[2] + (rI_)[3]; \ + } + #define ATL_vcxPrepAlpha(rALn_, rALs_) /* rALs={ral,-ial}*(VLEN/2) */ \ + { \ + rALn_ = vreinterpretq_f32_u64(vdupq_lane_u64(\ + vreinterpret_u64_f32(vget_low_f32(rALn_)),0)); \ + ATL_vmul(rALs_, rALn_, ATL_VONEPN); \ + ATL_vcxswapRI(rALs_, rALs_); \ + } + #else + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = vdupq_laneq_f64(rXi_, 0); \ + rXi_ = vdupq_laneq_f64(rXi_, 1); \ + } + #define ATL_vcxswapRI(d_, s_) \ + d_ = vcombine_f64(vget_high_f64(s_),vget_low_f64(s_)) + #define ATL_vcxdotcomb(rR_, rI_) /* low 2 elts rR_ gets ans */ \ + { \ + ATL_vmul(rR_, rR_, ATL_VONEPN); \ + rR_ = vpaddq_f64(rR_, rI_); \ + } + #define ATL_vcxPrepAlpha(rALn_, rALs_) /* rALs={ral,-ial}*(VLEN/2) */ \ + { \ + ATL_vmul(rALs_, rALn_, ATL_VONEPN); \ + ATL_vcxswapRI(rALs_, rALs_); \ + } + #endif +#elif defined(ATL_AVXMAC) || defined(ATL_AVX) + #ifdef SCPLX + #define ATL_vunpckLO(d_, s0_, s1_) d_ = _mm256_unpacklo_ps(s0_, s1_) + #define ATL_vunpckHI(d_, s0_, s1_) d_ = _mm256_unpackhi_ps(s0_, s1_) + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = _mm256_moveldup_ps(rXi); \ + rXi_ = _mm256_movehdup_ps(rXi); \ + } + #define ATL_vcxswapRI(d_, s_) d_ = _mm256_shuffle_ps(s_, s_, 0xB1) + #define ATL_vcxdotcomb(rR_, rI_) /* low 2 elts rR_ gets ans */ \ + { __m128 t0_, t1_;\ + ATL_vmul(rR_, rR_, ATL_VONEPN); \ + rR_ = _mm256_hadd_ps(rR_, rI_); \ + rR_ = _mm256_hadd_ps(rR_, rR_); \ + t0_ = _mm256_extractf128_ps(rR_, 1); \ + t0_ = _mm_add_ps(t0_, _mm256_extractf128_ps(rR_, 0)); \ + rR_ = _mm256_insertf128_ps(rR_, t0_, 0); \ + } + #define ATL_vcxPrepAlpha(rALn_, rALs_) /* rALs={ral,-ial}*(VLEN/2) */ \ + { __m128 t0_; \ + t0_ = _mm256_extractf128_ps(rALn_,0); \ + t0_ = _mm_movelh_ps(t0_, t0_); \ + rALn_ = _mm256_insertf128_ps(rALn_,t0_, 0); \ + rALn_ = _mm256_insertf128_ps(rALn_,t0_, 1); \ + ATL_vmul(rALs_, rALn_, ATL_VONEPN); \ + ATL_vcxswapRI(rALs_, rALs_); \ + } + #else /* DCPLX */ + #define ATL_vunpckLO(d_, s0_, s1_) d_ = _mm256_unpacklo_pd(s0_, s1_) + #define ATL_vunpckHI(d_, s0_, s1_) d_ = _mm256_unpackhi_pd(s0_, s1_) + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = _mm256_movedup_pd(rXi); \ + rXi_ = _mm256_shuffle_pd(rXi, rXi, 0xF); \ + } + #define ATL_vcxswapRI(d_, s_) d_ = _mm256_shuffle_pd(s_, s_, 5) + #define ATL_vcxdotcomb(rR_, rI_) /* low 2 elts rR_ gets ans */ \ + { __m128d t0_;\ + ATL_vmul(rR_, rR_, ATL_VONEPN); \ + rR_ = _mm256_hadd_pd(rR_, rI_); \ + t0_ = _mm256_extractf128_pd(rR_, 1); \ + t0_ = _mm_add_pd(t0_, _mm256_extractf128_pd(rR_, 0)); \ + rR_ = _mm256_insertf128_pd(rR_, t0_, 0); \ + } + #define ATL_vcxPrepAlpha(rALn_, rALs_) /* rALs={ral,-ial}*(VLEN/2) */ \ + { \ + rALn_ = _mm256_insertf128_pd(rALn_,_mm256_extractf128_pd(rALn_,0),1); \ + ATL_vmul(rALs_, rALn_, ATL_VONEPN); \ + ATL_vcxswapRI(rALs_, rALs_); \ + } + #endif +#elif defined(ATL_SSE2) && defined(DCPLX) + #define ATL_vunpckLO(d_, s0_, s1_) d_ = _mm_unpacklo_pd(s0_, s1_) + #define ATL_vunpckHI(d_, s0_, s1_) d_ = _mm_unpackhi_pd(s0_, s1_) + #ifdef ATL_SSE3 + #define ATL_CXSPLDb 8 + #define ATL_vcxsplitRIld(rXr_, rXi_, pX_) \ + { \ + rXr = _mm_loaddup_pd(pX_); \ + rXi = _mm_loaddup_pd((pX_)+1); \ + } + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = _mm_movedup_pd(rXi); \ + rXi_ = _mm_shuffle_pd(rXi, rXi, 0xF); \ + } + #define ATL_vcxdotcomb(rR_, rI_) /* low 2 elts rR_ gets ans */ \ + { \ + ATL_vmul(rR_, rR_, ATL_VONEPN); \ + rR_ = _mm_hadd_pd(rR_, rI_); \ + } + #else + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = _mm_unpacklo_pd(rXi, rXi); \ + rXi_ = _mm_unpackhi_pd(rXi, rXi); \ + } + #define ATL_vcxdotcomb(rR_, rI_) /* low 2 elts rR_ gets ans */ \ + { __m128d t1_;\ + ATL_vmul(rR_, rR_, ATL_VONEPN); \ + t1_ = _mm_unpacklo_pd(rR_, rI_); \ + rR_ = _mm_unpackhi_pd(rR_, rI_); \ + ATL_vadd(rR_, rR_, t1_); \ + } + #endif + #define ATL_vcxswapRI(d_, s_) d_ = _mm_shuffle_pd(s_, s_, 5); +#elif defined(ATL_SSE1) && defined(SCPLX) + #define ATL_vunpckLO(d_, s0_, s1_) d_ = _mm_unpacklo_ps(s0_, s1_) + #define ATL_vunpckHI(d_, s0_, s1_) d_ = _mm_unpackhi_ps(s0_, s1_) + #ifdef ATL_SSE3 + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = _mm_moveldup_ps(rXi); \ + rXi_ = _mm_movehdup_ps(rXi); \ + } + #define ATL_vcxdotcomb(rR_, rI_) /* low 2 elts rR_ gets ans */ \ + { \ + ATL_vmul(rR_, rR_, ATL_VONEPN); \ + rR_ = _mm_hadd_ps(rR_, rI_); \ + rR_ = _mm_hadd_ps(rR_, rR_); \ + } + #else + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = _mm_shuffle_ps(rXi, rXi, 0xA0); \ + rXi_ = _mm_shuffle_ps(rXi, rXi, 0xF5); \ + } + #define ATL_vcxdotcomb(rR_, rI_) /* low 2 elts rR_ gets ans */ \ + { __m128 t1_;\ + ATL_vmul(rR_, rR_, ATL_VONEPN); \ + t1_ = _mm_unpacklo_ps(rR_, rI_); \ + rR_ = _mm_unpackhi_ps(rR_, rI_); \ + ATL_vadd(rR_, rR_, t1_); \ + t1_ = _mm_movehl_ps(t1_, rR_); \ + ATL_vadd(rR_, rR_, t1_); \ + } + #endif + #define ATL_vcxswapRI(d_, s_) d_ = _mm_shuffle_ps(s_, s_, 0xB1) + #define ATL_vcxPrepAlpha(rALn_, rALs_) /* rALs={ral,-ial}*(VLEN/2) */ \ + { \ + rALn_ = _mm_movelh_ps(rALn_, rALn_); \ + ATL_vmul(rALs_, rALn_, ATL_VONEPN); \ + ATL_vcxswapRI(rALs_, rALs_); \ + } +#else /* gnuvec */ + #if ATL_VLEN == 2 + #ifndef ATL_vunpckLO + #define ATL_vunpckLO(d_, s0_, s1_) d_ = {(s1_)[0],(s0_)[0]} + #define ATL_vunpckHI(d_, s0_, s1_) d_ = {(s1_)[1],(s0_)[1]} + #endif + #define ATL_VIPERMR ((ATL_VITYPE){0, 0}) + #define ATL_VIPERMI ((ATL_VITYPE){1, 1}) + #define ATL_vcxswapRI(rd_, rs_) \ + rd_ = __builtin_shuffle(rs_, (ATL_VITYPE){1,0}) + #define ATL_vcxdotcomb(rR_, rI_) \ + { \ + (rR_)[0] -= (rR_)[1]; \ + (rR_)[1] = (rI_)[0] + (rI_)[1]; \ + } + #elif ATL_VLEN == 4 + #ifndef ATL_vunpckLO + #define ATL_vunpckLO(d_, s0_, s1_) d_ = \ + {(s1_)[2],(s0_)[2],(s1_)[0],(s0_)[0]} + #define ATL_vunpckHI(d_, s0_, s1_) d_ = \ + {(s1_)[3],(s0_)[3],(s1_)[1],(s0_)[1]} + #endif + #define ATL_vcxswapRI(rd_, rs_) \ + rd_ = __builtin_shuffle(rs_, (ATL_VITYPE){1,0,3,2}) + #define ATL_vcxdotcomb(rR_, rI_) \ + { \ + rR_[0] += (rR_)[2] - (rR_)[1] - (rR_)[3]; \ + (rR_)[1] = (rI_)[0] + (rI_)[1] + (rI_)[2] + (rI_)[3]; \ + } + #define ATL_VIPERMR ((ATL_VITYPE){0, 0, 2, 2}) + #define ATL_VIPERMI ((ATL_VITYPE){1, 1, 3, 3}) + #elif ATL_VLEN == 8 + #ifndef ATL_vunpckLO + #define ATL_vunpckLO(d_, s0_, s1_) d_ = \ + {(s1_)[6],(s0_)[6],(s1_)[4],(s0_)[4], \ + (s1_)[2],(s0_)[2],(s1_)[0],(s0_)[0]} + #define ATL_vunpckHI(d_, s0_, s1_) d_ = \ + {(s1_)[7],(s0_)[7],(s1_)[5],(s0_)[5], \ + (s1_)[3],(s0_)[3],(s1_)[1],(s0_)[1]} + #endif + #define ATL_vcxswapRI(rd_, rs_) \ + rd_ = __builtin_shuffle(rs_, (ATL_VITYPE){1,0,3,2,5,4,7,6}) + #define ATL_vcxdotcomb(rR_, rI_) \ + { \ + (rR_)[0] += (rR_)[2]+(rR_)[4]+(rR_)[6] \ + - (rR_)[1]-(rR_)[3]-(rR_)[5]-(rR_)[7]; \ + (rR_)[1] = (rI_)[0] + (rI_)[1] + (rI_)[2] + (rI_)[3] \ + + (rI_)[4] + (rI_)[5] + (rI_)[6] + (rI_)[7]; \ + } + #define ATL_VIPERMR ((ATL_VITYPE){0, 0, 2, 2, 4, 4, 6, 6}) + #define ATL_VIPERMI ((ATL_VITYPE){1, 1, 3, 3, 5, 5, 7, 7}) + #elif ATL_VLEN == 16 + #ifndef ATL_vunpckLO + #define ATL_vunpckLO(d_, s0_, s1_) d_ = \ + {(s1_)[14],(s0_)[14],(s1_)[12],(s0_)[12], \ + (s1_)[10],(s0_)[10],(s1_)[8],(s0_)[8], \ + (s1_)[6],(s0_)[6],(s1_)[4],(s0_)[4], \ + (s1_)[2],(s0_)[2],(s1_)[0],(s0_)[0]} + #define ATL_vunpckHI(d_, s0_, s1_) d_ = \ + {(s1_)[15],(s0_)[15],(s1_)[13],(s0_)[13], \ + (s1_)[11],(s0_)[11],(s1_)[9],(s0_)[9], \ + (s1_)[7],(s0_)[7],(s1_)[5],(s0_)[5], \ + (s1_)[3],(s0_)[3],(s1_)[1],(s0_)[1]} + #endif + #define ATL_vcxswapRI(rd_, rs_) \ + rd_ = __builtin_shuffle(rs_, (ATL_VITYPE){1,0,3,2,5,4,7,6, \ + 9,8,11,10,13,12,15,14}) + #define ATL_vcxdotcomb(rR_, rI_) \ + { \ + (rR_)[0] += (rR_)[2]+(rR_)[4]+(rR_)[6]+(rR_)[8]+(rR_)[10]+(rR_)[12] \ + + (rR_)[14] - (rR_)[1]-(rR_)[3]-(rR_)[5]-(rR_)[7] \ + - (rR_)[9]-(rR_)[11]-(rR_)[13]-(rR_)[15]; \ + (rR_)[1] = (rI_)[0] + (rI_)[1] + (rI_)[2] + (rI_)[3] \ + + (rI_)[4] + (rI_)[5] + (rI_)[6] + (rI_)[7] \ + + (rI_)[8] + (rI_)[9] + (rI_)[10] + (rI_)[11] \ + + (rI_)[12] + (rI_)[13] + (rI_)[14] + (rI_)[15]; \ + } + #define ATL_VIPERMR ((ATL_VITYPE){0, 0, 2, 2, 4, 4, 6, 6, \ + 8, 8, 10, 10, 12, 12, 14, 14}) + #define ATL_VIPERMI ((ATL_VITYPE){1, 1, 3, 3, 5, 5, 7, 7, \ + 9, 9, 11, 11, 13, 13, 15, 15}) + #elif ATL_VLEN == 32 + #define ATL_vcxswapRI(rd_, rs_) \ + rd_ = __builtin_shuffle(rs_, (ATL_VITYPE){1,0,3,2,5,4,7,6, \ + 9,8,11,10,13,12,15,14, \ + 17,16,19,18,21,20,23,22, \ + 25,24,27,26,29,28,31,30}) + #define ATL_vcxdotcomb(rR_, rI_) \ + { \ + (rR_)[0] += (rR_)[2]+(rR_)[4]+(rR_)[6] \ + + (rR_)[8]+(rR_)[10]+(rR_)[12]+(rR_)[14] \ + + (rR_)[16]+(rR_)[18]+(rR_)[20]+(rR_)[22] \ + + (rR_)[24]+(rR_)[26]+(rR_)[28]+(rR_)[30] \ + - (rR_)[1]-(rR_)[3]-(rR_)[5]-(rR_)[7] \ + - (rR_)[9]-(rR_)[11]-(rR_)[13]-(rR_)[15] \ + - (rR_)[17]-(rR_)[19]-(rR_)[21]-(rR_)[23] \ + - (rR_)[25]-(rR_)[27]-(rR_)[29]-(rR_)[31]; \ + (rR_)[1] = (rI_)[0] + (rI_)[1] + (rI_)[2] + (rI_)[3] \ + + (rI_)[4] + (rI_)[5] + (rI_)[6] + (rI_)[7] \ + + (rI_)[8] + (rI_)[9] + (rI_)[10] + (rI_)[11] \ + + (rI_)[12] + (rI_)[13] + (rI_)[14] + (rI_)[15] \ + + (rI_)[16] + (rI_)[17] + (rI_)[18] + (rI_)[19] \ + + (rI_)[20] + (rI_)[21] + (rI_)[22] + (rI_)[23] \ + + (rI_)[24] + (rI_)[25] + (rI_)[26] + (rI_)[27] \ + + (rI_)[28] + (rI_)[29] + (rI_)[30] + (rI_)[31]; \ + } + #define ATL_VIPERMR ((ATL_VITYPE) \ + { 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, \ + 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 28, 28, 30, 30}) + #define ATL_VIPERMI ((ATL_VITYPE) \ + { 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, \ + 17, 17, 19, 19, 21, 21, 23, 23, 25, 25, 27, 27, 29, 29, 31, 31}) + #else + #error "unsupported ATL_VLEN!" + #endif + #define ATL_vcxsplitRI(rXr_, rXi_) /* rXi input & output */ \ + { \ + rXr_ = __builtin_shuffle(rXi_, ATL_VIPERMR); \ + rXi_ = __builtin_shuffle(rXi_, ATL_VIPERMI); \ + } +#endif +/* + * brute-force alpha prep works on any gcc-compat compiler & vec ISA + */ +#ifndef ATL_vcxPrepAlpha + #if ATL_VLEN == 2 /* case that can be vec wt predef ops */ + #define ATL_vcxPrepAlpha(rALn_, rALs_) /* rALs={ral,-ial}*(VLEN/2) */ \ + { \ + ATL_vmul(rALs_, rALn_, ATL_VONEPN); \ + ATL_vcxswapRI(rALs_, rALs_); \ + } + #else + #define ATL_vcxPrepAlpha(rALn_, rALs_) /* rALs={ral,-ial}*(VLEN/2) */ \ + { \ + TYPE mr_[ATL_VLEN] __attribute__ ((aligned (ATL_VLENb)));\ + TYPE a0_, a1_; int i_; \ + ATL_vst(mr_, rALn_); \ + a0_ = *(mr_); a1_ = mr_[1]; \ + for (i_=2; i_ < ATL_VLEN; i_ += 2) \ + { mr_[i_] = a0_; mr_[i_+1] = a1_; } \ + ATL_vld(rALn_, mr_); \ + a0_ = -a1_; a1_ = *(mr_); \ + for (i_=0; i_ < ATL_VLEN; i_ += 2) \ + { mr_[i_] = a0_; mr_[i_+1] = a1_; } \ + ATL_vld(rALs_, mr_); \ + } + #endif +#endif +/* + * brute force combine that will work on any gcc-compatible compiler/vec ISA + */ +#ifndef ATL_vcxdotcomb + #define ATL_vcxdotcomb(rR_, rI_) /* low 2 elts rR gets ans */ \ + { \ + TYPE mr_[ATL_VLEN] __attribute__ ((aligned (ATL_VLENb)));\ + TYPE mi_[ATL_VLEN] __attribute__ ((aligned (ATL_VLENb)));\ + int i_; \ + register TYPE dr_=ATL_rzero, di_=ATL_rzero; \ + ATL_vst(mr_, rR_); \ + ATL_vst(mi_, rI_); \ + for (i_=0; i_ < ATL_VLEN; i_ += 2) \ + { \ + dr_ += mr_[i_] - mr_[i_+1]; \ + di_ += mi_[i_] + mi_[i_+1]; \ + } \ + mr_[0] = dr_; \ + mr_[1] = di_; \ + ATL_vld(rR_, mr_); \ + } +#endif +/* + * Default vcxsplitRIld that uses ATL_vld & ATL_vcxsplitRI + */ +#ifndef ATL_CXSPLDb + #define ATL_CXSPLDb ATL_VLENb +#endif +#ifndef ATL_vcxsplitRIld + #define ATL_vcxsplitRIld(rXr_, rXi_, pX_) \ + { \ + ATL_vld(rXi_, pX_); \ + ATL_vcxsplitRI(rXr_, rXi_); \ + } +#endif +/* + * Convenience funcs for one vector iteration of DOT product, aligned & + * unaligned Y. rX_ comes in already preloaded so that it can be used across mul + * cols, as in GEMVT. vX is natural order, vXs real/imag swapped (cxriswap). + */ +#define ATL_vcxdotA(dotR_, dotI_, vX_, vXs_, pY_) \ +{\ + register ATL_VTYPE vY_; \ + ATL_vld(vY_, pY_); \ + ATL_vmac(dotR_, vX_, vY_); \ + ATL_vmac(dotI_, vXs_, vY); \ +} + +#define ATL_vcxdotU(dotR_, dotI_, vX_, vXs_, pY_) \ +{\ + register ATL_VTYPE vY_; \ + ATL_vuld(vY_, pY_); \ + ATL_vmac(dotR_, vX_, vY_); \ + ATL_vmac(dotI_, vXs_, vY); \ +} +/* + * Convenience funcs for one vector iteration of AXPLY, [Un]&Aligned pY_ + * X comes in already split into vXr_ (real elts) and vXi_ (imag elts) so + * that the same X values can be used across multiple Y vecs (which are + * actually A columns for GER. + */ +#define ATL_vcxaxpyA(pY_, vXr_, vXi_, vALn_, vALs_) \ +{ /* ALs={ALr,-iAL}; */ \ + register ATL_VTYPE vY_; /* ALn={iAL,rAL} */ \ + ATL_vld(vY_, pY_); /* vY = {iY,rY, ...} */ \ + ATL_vmac(vY_, vXi_, vALs_); /* vY += {iX*rAL, -iX*iAL} */ \ + ATL_vmac(vY_, vXr_, vALn_); /* vY += {rX*iAL, rX*rAL} */ \ + ATL_vst(pY_, vY_); \ +} + +#define ATL_vcxaxpyU(pY_, vXr_, vXi_, vALn_, vALs_) \ +{ /* ALs={ALr,-iAL}; */ \ + register ATL_VTYPE vY_, vXr_, vXi_; /* ALn={iAL,rAL} */ \ + ATL_vuld(vY_, pY_); /* vY = {iY,rY, ...} */ \ + ATL_vmac(vY_, vXi_, vALs_); /* vY += {iX*rAL, -iX*iAL} */ \ + ATL_vmac(vY_, vXr_, vALn_); /* vY += {rX*iAL, rX*rAL} */ \ + ATL_vst(pY_, vY_); \ +} +/* + * Remainder load/store functions. They take 0 < n_ < (ATL_VLEN/2), which + * is the number of complex elts to load/store from/to the ptr + */ + +#if ATL_VLEN <= 4 + #define ATL_vcxldR(r_, p_, n_) ATL_vcxld1(r_, p_) + #define ATL_vcxuldR(r_, p_, n_) ATL_vcxuld1(r_, p_) + #define ATL_vcxstR(p_, r_, n_) ATL_vcxst1(p_, r_) + #define ATL_vcxustR(p_, r_, n_) ATL_vcxust1(p_, r_) + #define ATL_vcxldXYR(rX_, pX_, rY_, pY_, n_) \ + { \ + ATL_vcxld1(rX_, pX_); \ + ATL_vcxld1(rY_, pY_); \ + } + #define ATL_vcxldXuYR(rX_, pX_, rY_, pY_, n_) \ + { \ + ATL_vcxld1(rX_, pX_); \ + ATL_vcxuld1(rY_, pY_); \ + } + #define ATL_vcxlduXuYR(rX_, pX_, rY_, pY_, n_) \ + { \ + ATL_vcxuld1(rX_, pX_); \ + ATL_vcxuld1(rY_, pY_); \ + } +#elif ATL_VLEN == 8 + #define ATL_vcxldXYR(rX_,pX_,rY_,pY_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxld1(rX_,pX_); \ + ATL_vcxld1(rY_,pY_); \ + break; \ + case 2 : \ + ATL_vcxld2(rX_,pX_); \ + ATL_vcxld2(rY_,pY_); \ + break; \ + default: \ + ATL_vcxld3(rX_,pX_); \ + ATL_vcxld3(rY_,pY_); \ + } \ + } + #define ATL_vcxldXuYR(rX_,pX_,rY_,pY_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxld1(rX_,pX_); \ + ATL_vcxuld1(rY_,pY_); \ + break; \ + case 2 : \ + ATL_vcxld2(rX_,pX_); \ + ATL_vcxuld2(rY_,pY_); \ + break; \ + default: \ + ATL_vcxld3(rX_,pX_); \ + ATL_vcxuld3(rY_,pY_); \ + } \ + } + #define ATL_vcxlduXuYR(rX_,pX_,rY_,pY_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxuld1(rX_,pX_); \ + ATL_vcxuld1(rY_,pY_); \ + break; \ + case 2 : \ + ATL_vcxuld2(rX_,pX_); \ + ATL_vcxuld2(rY_,pY_); \ + break; \ + default: \ + ATL_vcxuld3(rX_,pX_); \ + ATL_vcxuld3(rY_,pY_); \ + } \ + } + #define ATL_vcxldR(r_,p_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxld1(r_,p_); \ + break; \ + case 2 : \ + ATL_vcxld2(r_,p_); \ + break; \ + default: \ + ATL_vcxld3(r_,p_); \ + } \ + } + #define ATL_vcxstR(p_,r_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxst1(p_,r_); \ + break; \ + case 2 : \ + ATL_vcxst2(p_,r_); \ + break; \ + default: \ + ATL_vcxst3(p_,r_); \ + } \ + } + #define ATL_vcxuldR(r_,p_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxuld1(r_,p_); \ + break; \ + case 2 : \ + ATL_vcxuld2(r_,p_); \ + break; \ + default: \ + ATL_vcxuld3(r_,p_); \ + } \ + } + #define ATL_vcxustR(p_,r_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxust1(p_,r_); \ + break; \ + case 2 : \ + ATL_vcxust2(p_,r_); \ + break; \ + default: \ + ATL_vcxust3(p_,r_); \ + } \ + } +#elif ATL_VLEN == 16 + #define ATL_vcxldXYR(rX_,pX_,rY_,pY_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxld1(rX_,pX_); \ + ATL_vcxld1(rY_,pY_); \ + break; \ + case 2 : \ + ATL_vcxld2(rX_,pX_); \ + ATL_vcxld2(rY_,pY_); \ + break; \ + case 3 : \ + ATL_vcxld3(rX_,pX_); \ + ATL_vcxld3(rY_,pY_); \ + break; \ + case 4 : \ + ATL_vcxld4(rX_,pX_); \ + ATL_vcxld4(rY_,pY_); \ + break; \ + case 5 : \ + ATL_vcxld5(rX_,pX_); \ + ATL_vcxld5(rY_,pY_); \ + break; \ + case 6 : \ + ATL_vcxld6(rX_,pX_); \ + ATL_vcxld6(rY_,pY_); \ + break; \ + default: \ + ATL_vcxld7(rX_,pX_); \ + ATL_vcxld7(rY_,pY_); \ + } \ + } + #define ATL_vcxldXuYR(rX_,pX_,rY_,pY_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxld1(rX_,pX_); \ + ATL_vcxuld1(rY_,pY_); \ + break; \ + case 2 : \ + ATL_vcxld2(rX_,pX_); \ + ATL_vcxuld2(rY_,pY_); \ + break; \ + case 3 : \ + ATL_vcxld3(rX_,pX_); \ + ATL_vcxuld3(rY_,pY_); \ + break; \ + case 4 : \ + ATL_vcxld4(rX_,pX_); \ + ATL_vcxuld4(rY_,pY_); \ + break; \ + case 5 : \ + ATL_vcxld5(rX_,pX_); \ + ATL_vcxuld5(rY_,pY_); \ + break; \ + case 6 : \ + ATL_vcxld6(rX_,pX_); \ + ATL_vcxuld6(rY_,pY_); \ + break; \ + default: \ + ATL_vcxld7(rX_,pX_); \ + ATL_vcxuld7(rY_,pY_); \ + } \ + } + #define ATL_vcxlduXuYR(rX_,pX_,rY_,pY_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxuld1(rX_,pX_); \ + ATL_vcxuld1(rY_,pY_); \ + break; \ + case 2 : \ + ATL_vcxuld2(rX_,pX_); \ + ATL_vcxuld2(rY_,pY_); \ + break; \ + case 3 : \ + ATL_vcxuld3(rX_,pX_); \ + ATL_vcxuld3(rY_,pY_); \ + break; \ + case 4 : \ + ATL_vcxuld4(rX_,pX_); \ + ATL_vcxuld4(rY_,pY_); \ + break; \ + case 5 : \ + ATL_vcxuld5(rX_,pX_); \ + ATL_vcxuld5(rY_,pY_); \ + break; \ + case 6 : \ + ATL_vcxuld6(rX_,pX_); \ + ATL_vcxuld6(rY_,pY_); \ + break; \ + default: \ + ATL_vcxuld7(rX_,pX_); \ + ATL_vcxuld7(rY_,pY_); \ + } \ + } + #define ATL_vcxldR(r_,p_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxld1(r_,p_); \ + break; \ + case 2 : \ + ATL_vcxld2(r_,p_); \ + break; \ + case 3 : \ + ATL_vcxld3(r_,p_); \ + break; \ + case 4 : \ + ATL_vcxld4(r_,p_); \ + break; \ + case 5 : \ + ATL_vcxld5(r_,p_); \ + break; \ + case 6 : \ + ATL_vcxld6(r_,p_); \ + break; \ + default: \ + ATL_vcxld7(r_,p_); \ + } \ + } + #define ATL_vcxstR(p_,r_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxst1(p_,r_); \ + break; \ + case 2 : \ + ATL_vcxst2(p_,r_); \ + break; \ + case 3 : \ + ATL_vcxst3(p_,r_); \ + break; \ + case 4 : \ + ATL_vcxst4(p_,r_); \ + break; \ + case 5 : \ + ATL_vcxst5(p_,r_); \ + break; \ + case 6 : \ + ATL_vcxst6(p_,r_); \ + break; \ + default: \ + ATL_vcxst7(p_,r_); \ + } \ + } + #define ATL_vcxuldR(r_,p_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxuld1(r_,p_); \ + break; \ + case 2 : \ + ATL_vcxuld2(r_,p_); \ + break; \ + case 3 : \ + ATL_vcxuld3(r_,p_); \ + break; \ + case 4 : \ + ATL_vcxuld4(r_,p_); \ + break; \ + case 5 : \ + ATL_vcxuld5(r_,p_); \ + break; \ + case 6 : \ + ATL_vcxuld6(r_,p_); \ + break; \ + default: \ + ATL_vcxuld7(r_,p_); \ + } \ + } + #define ATL_vcxustR(p_,r_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxust1(p_,r_); \ + break; \ + case 2 : \ + ATL_vcxust2(p_,r_); \ + break; \ + case 3 : \ + ATL_vcxust3(p_,r_); \ + break; \ + case 4 : \ + ATL_vcxust4(p_,r_); \ + break; \ + case 5 : \ + ATL_vcxust5(p_,r_); \ + break; \ + case 6 : \ + ATL_vcxust6(p_,r_); \ + break; \ + default: \ + ATL_vcxust7(p_,r_); \ + } \ + } +#elif ATL_VLEN == 32 + #define ATL_vcxldXYR(rX_,pX_,rY_,pY_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxld1(rX_,pX_); \ + ATL_vcxld1(rY_,pY_); \ + break; \ + case 2 : \ + ATL_vcxld2(rX_,pX_); \ + ATL_vcxld2(rY_,pY_); \ + break; \ + case 3 : \ + ATL_vcxld3(rX_,pX_); \ + ATL_vcxld3(rY_,pY_); \ + break; \ + case 4 : \ + ATL_vcxld4(rX_,pX_); \ + ATL_vcxld4(rY_,pY_); \ + break; \ + case 5 : \ + ATL_vcxld5(rX_,pX_); \ + ATL_vcxld5(rY_,pY_); \ + break; \ + case 6 : \ + ATL_vcxld6(rX_,pX_); \ + ATL_vcxld6(rY_,pY_); \ + break; \ + case 7 : \ + ATL_vcxld7(rX_,pX_); \ + ATL_vcxld7(rY_,pY_); \ + break; \ + case 8 : \ + ATL_vcxld8(rX_,pX_); \ + ATL_vcxld8(rY_,pY_); \ + break; \ + case 9 : \ + ATL_vcxld9(rX_,pX_); \ + ATL_vcxld9(rY_,pY_); \ + break; \ + case 10 : \ + ATL_vcxld10(rX_,pX_); \ + ATL_vcxld10(rY_,pY_); \ + break; \ + case 11 : \ + ATL_vcxld11(rX_,pX_); \ + ATL_vcxld11(rY_,pY_); \ + break; \ + case 12 : \ + ATL_vcxld12(rX_,pX_); \ + ATL_vcxld12(rY_,pY_); \ + break; \ + case 13 : \ + ATL_vcxld13(rX_,pX_); \ + ATL_vcxld13(rY_,pY_); \ + break; \ + case 14 : \ + ATL_vcxld14(rX_,pX_); \ + ATL_vcxld14(rY_,pY_); \ + break; \ + default: \ + ATL_vcxld15(rX_,pX_); \ + ATL_vcxld15(rY_,pY_); \ + } \ + } + #define ATL_vcxldXuYR(rX_,pX_,rY_,pY_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxld1(rX_,pX_); \ + ATL_vcxuld1(rY_,pY_); \ + break; \ + case 2 : \ + ATL_vcxld2(rX_,pX_); \ + ATL_vcxuld2(rY_,pY_); \ + break; \ + case 3 : \ + ATL_vcxld3(rX_,pX_); \ + ATL_vcxuld3(rY_,pY_); \ + break; \ + case 4 : \ + ATL_vcxld4(rX_,pX_); \ + ATL_vcxuld4(rY_,pY_); \ + break; \ + case 5 : \ + ATL_vcxld5(rX_,pX_); \ + ATL_vcxuld5(rY_,pY_); \ + break; \ + case 6 : \ + ATL_vcxld6(rX_,pX_); \ + ATL_vcxuld6(rY_,pY_); \ + break; \ + case 7 : \ + ATL_vcxld7(rX_,pX_); \ + ATL_vcxuld7(rY_,pY_); \ + break; \ + case 8 : \ + ATL_vcxld8(rX_,pX_); \ + ATL_vcxuld8(rY_,pY_); \ + break; \ + case 9 : \ + ATL_vcxld9(rX_,pX_); \ + ATL_vcxuld9(rY_,pY_); \ + break; \ + case 10 : \ + ATL_vcxld10(rX_,pX_); \ + ATL_vcxuld10(rY_,pY_); \ + break; \ + case 11 : \ + ATL_vcxld11(rX_,pX_); \ + ATL_vcxuld11(rY_,pY_); \ + break; \ + case 12 : \ + ATL_vcxld12(rX_,pX_); \ + ATL_vcxuld12(rY_,pY_); \ + break; \ + case 13 : \ + ATL_vcxld13(rX_,pX_); \ + ATL_vcxuld13(rY_,pY_); \ + break; \ + case 14 : \ + ATL_vcxld14(rX_,pX_); \ + ATL_vcxuld14(rY_,pY_); \ + break; \ + default: \ + ATL_vcxld15(rX_,pX_); \ + ATL_vcxuld15(rY_,pY_); \ + } \ + } + #define ATL_vcxlduXuYR(rX_,pX_,rY_,pY_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxuld1(rX_,pX_); \ + ATL_vcxuld1(rY_,pY_); \ + break; \ + case 2 : \ + ATL_vcxuld2(rX_,pX_); \ + ATL_vcxuld2(rY_,pY_); \ + break; \ + case 3 : \ + ATL_vcxuld3(rX_,pX_); \ + ATL_vcxuld3(rY_,pY_); \ + break; \ + case 4 : \ + ATL_vcxuld4(rX_,pX_); \ + ATL_vcxuld4(rY_,pY_); \ + break; \ + case 5 : \ + ATL_vcxuld5(rX_,pX_); \ + ATL_vcxuld5(rY_,pY_); \ + break; \ + case 6 : \ + ATL_vcxuld6(rX_,pX_); \ + ATL_vcxuld6(rY_,pY_); \ + break; \ + case 7 : \ + ATL_vcxuld7(rX_,pX_); \ + ATL_vcxuld7(rY_,pY_); \ + break; \ + case 8 : \ + ATL_vcxuld8(rX_,pX_); \ + ATL_vcxuld8(rY_,pY_); \ + break; \ + case 9 : \ + ATL_vcxuld9(rX_,pX_); \ + ATL_vcxuld9(rY_,pY_); \ + break; \ + case 10 : \ + ATL_vcxuld10(rX_,pX_); \ + ATL_vcxuld10(rY_,pY_); \ + break; \ + case 11 : \ + ATL_vcxuld11(rX_,pX_); \ + ATL_vcxuld11(rY_,pY_); \ + break; \ + case 12 : \ + ATL_vcxuld12(rX_,pX_); \ + ATL_vcxuld12(rY_,pY_); \ + break; \ + case 13 : \ + ATL_vcxuld13(rX_,pX_); \ + ATL_vcxuld13(rY_,pY_); \ + break; \ + case 14 : \ + ATL_vcxuld14(rX_,pX_); \ + ATL_vcxuld14(rY_,pY_); \ + break; \ + default: \ + ATL_vcxuld15(rX_,pX_); \ + ATL_vcxuld15(rY_,pY_); \ + } \ + } + #define ATL_vcxldR(r_,p_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxld1(r_,p_); \ + break; \ + case 2 : \ + ATL_vcxld2(r_,p_); \ + break; \ + case 3 : \ + ATL_vcxld3(r_,p_); \ + break; \ + case 4 : \ + ATL_vcxld4(r_,p_); \ + break; \ + case 5 : \ + ATL_vcxld5(r_,p_); \ + break; \ + case 6 : \ + ATL_vcxld6(r_,p_); \ + break; \ + case 7 : \ + ATL_vcxld7(r_,p_); \ + break; \ + case 8 : \ + ATL_vcxld8(r_,p_); \ + break; \ + case 9 : \ + ATL_vcxld9(r_,p_); \ + break; \ + case 10 : \ + ATL_vcxld10(r_,p_); \ + break; \ + case 11 : \ + ATL_vcxld11(r_,p_); \ + break; \ + case 12 : \ + ATL_vcxld12(r_,p_); \ + break; \ + case 13 : \ + ATL_vcxld13(r_,p_); \ + break; \ + case 14 : \ + ATL_vcxld14(r_,p_); \ + break; \ + default: \ + ATL_vcxld15(r_,p_); \ + } \ + } + #define ATL_vcxstR(p_,r_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxst1(p_,r_); \ + break; \ + case 2 : \ + ATL_vcxst2(p_,r_); \ + break; \ + case 3 : \ + ATL_vcxst3(p_,r_); \ + break; \ + case 4 : \ + ATL_vcxst4(p_,r_); \ + break; \ + case 5 : \ + ATL_vcxst5(p_,r_); \ + break; \ + case 6 : \ + ATL_vcxst6(p_,r_); \ + break; \ + case 7 : \ + ATL_vcxst7(p_,r_); \ + break; \ + case 8 : \ + ATL_vcxst8(p_,r_); \ + break; \ + case 9 : \ + ATL_vcxst9(p_,r_); \ + break; \ + case 10 : \ + ATL_vcxst10(p_,r_); \ + break; \ + case 11 : \ + ATL_vcxst11(p_,r_); \ + break; \ + case 12 : \ + ATL_vcxst12(p_,r_); \ + break; \ + case 13 : \ + ATL_vcxst13(p_,r_); \ + break; \ + case 14 : \ + ATL_vcxst14(p_,r_); \ + break; \ + default: \ + ATL_vcxst15(p_,r_); \ + } \ + } + #define ATL_vcxuldR(r_,p_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxuld1(r_,p_); \ + break; \ + case 2 : \ + ATL_vcxuld2(r_,p_); \ + break; \ + case 3 : \ + ATL_vcxuld3(r_,p_); \ + break; \ + case 4 : \ + ATL_vcxuld4(r_,p_); \ + break; \ + case 5 : \ + ATL_vcxuld5(r_,p_); \ + break; \ + case 6 : \ + ATL_vcxuld6(r_,p_); \ + break; \ + case 7 : \ + ATL_vcxuld7(r_,p_); \ + break; \ + case 8 : \ + ATL_vcxuld8(r_,p_); \ + break; \ + case 9 : \ + ATL_vcxuld9(r_,p_); \ + break; \ + case 10 : \ + ATL_vcxuld10(r_,p_); \ + break; \ + case 11 : \ + ATL_vcxuld11(r_,p_); \ + break; \ + case 12 : \ + ATL_vcxuld12(r_,p_); \ + break; \ + case 13 : \ + ATL_vcxuld13(r_,p_); \ + break; \ + case 14 : \ + ATL_vcxuld14(r_,p_); \ + break; \ + default: \ + ATL_vcxuld15(r_,p_); \ + } \ + } + #define ATL_vcxustR(p_,r_,n_) \ + { \ + switch(n_) \ + { \ + case 1 : \ + ATL_vcxust1(p_,r_); \ + break; \ + case 2 : \ + ATL_vcxust2(p_,r_); \ + break; \ + case 3 : \ + ATL_vcxust3(p_,r_); \ + break; \ + case 4 : \ + ATL_vcxust4(p_,r_); \ + break; \ + case 5 : \ + ATL_vcxust5(p_,r_); \ + break; \ + case 6 : \ + ATL_vcxust6(p_,r_); \ + break; \ + case 7 : \ + ATL_vcxust7(p_,r_); \ + break; \ + case 8 : \ + ATL_vcxust8(p_,r_); \ + break; \ + case 9 : \ + ATL_vcxust9(p_,r_); \ + break; \ + case 10 : \ + ATL_vcxust10(p_,r_); \ + break; \ + case 11 : \ + ATL_vcxust11(p_,r_); \ + break; \ + case 12 : \ + ATL_vcxust12(p_,r_); \ + break; \ + case 13 : \ + ATL_vcxust13(p_,r_); \ + break; \ + case 14 : \ + ATL_vcxust14(p_,r_); \ + break; \ + default: \ + ATL_vcxust15(p_,r_); \ + } \ + } +#endif /* end VLEN test for remainder definitions */ + +#endif /* end multiple inclusion guard */ diff -Nru atlas-3.10.2/include/atlas_enum.h atlas-3.10.3/include/atlas_enum.h --- atlas-3.10.2/include/atlas_enum.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_enum.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_f77.h atlas-3.10.3/include/atlas_f77.h --- atlas-3.10.2/include/atlas_f77.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_f77.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_genparse.h atlas-3.10.3/include/atlas_genparse.h --- atlas-3.10.2/include/atlas_genparse.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_genparse.h 2016-07-28 19:42:59.000000000 +0000 @@ -6,13 +6,13 @@ #include #include #include -#define NASMD 9 +#define NASMD 11 enum ASMDIA {ASM_None=0, gas_x86_32, gas_x86_64, gas_sparc, gas_ppc, gas_parisc, - gas_mips, gas_arm, gas_s390}; + gas_mips, gas_arm, gas_arm64, gas_wow64, gas_s390}; static char *ASMNAM[NASMD] = {"", "GAS_x8632", "GAS_x8664", "GAS_SPARC", "GAS_PPC", "GAS_PARISC", - "GAS_MIPS", "GAS_ARM", "GAS_S390"}; + "GAS_MIPS", "GAS_ARM", "GAS_ARM64", "GAS_WOW64", "GAS_S390"}; /* * Basic data structure for forming queues with some minimal info */ diff -Nru atlas-3.10.2/include/atlas_kern3.h atlas-3.10.3/include/atlas_kern3.h --- atlas-3.10.2/include/atlas_kern3.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_kern3.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_kernel3.h atlas-3.10.3/include/atlas_kernel3.h --- atlas-3.10.2/include/atlas_kernel3.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_kernel3.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_lapack.h atlas-3.10.3/include/atlas_lapack.h --- atlas-3.10.2/include/atlas_lapack.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_lapack.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_level1.h atlas-3.10.3/include/atlas_level1.h --- atlas-3.10.2/include/atlas_level1.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_level1.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_level2.h atlas-3.10.3/include/atlas_level2.h --- atlas-3.10.2/include/atlas_level2.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_level2.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_level3.h atlas-3.10.3/include/atlas_level3.h --- atlas-3.10.2/include/atlas_level3.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_level3.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_lvl2.h atlas-3.10.3/include/atlas_lvl2.h --- atlas-3.10.2/include/atlas_lvl2.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_lvl2.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_lvl3.h atlas-3.10.3/include/atlas_lvl3.h --- atlas-3.10.2/include/atlas_lvl3.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_lvl3.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_misc.h atlas-3.10.3/include/atlas_misc.h --- atlas-3.10.2/include/atlas_misc.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_misc.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_mmparse.h atlas-3.10.3/include/atlas_mmparse.h --- atlas-3.10.2/include/atlas_mmparse.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_mmparse.h 2016-07-28 19:42:59.000000000 +0000 @@ -918,8 +918,9 @@ if (i > 70) { fprintf(fpout, " \\\n "); i = 3; } i += fprintf(fpout, "X87=%d ", FLAG_IS_SET(np->flag, MMF_X87)); - if (np->mflop[0]+np->mflop[1]+np->mflop[2]+np->mflop[3]+np->mflop[4]+ - np->mflop[5]+np->mflop[6] != 0.0) + if (np->mflop[0] != 0.0 || np->mflop[1] != 0.0 || np->mflop[2] != 0.0 || + np->mflop[3] != 0.0 || np->mflop[4] != 0.0 || np->mflop[5] != 0.0 || + np->mflop[6] != 0.0) { if (i > 3) { fprintf(fpout, " \\\n "); i = 3; } i += fprintf(fpout, "MFLOP=%le", np->mflop[0]); diff -Nru atlas-3.10.2/include/atlas_mvparse.h atlas-3.10.3/include/atlas_mvparse.h --- atlas-3.10.2/include/atlas_mvparse.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_mvparse.h 2016-07-28 19:42:59.000000000 +0000 @@ -821,8 +821,9 @@ if (i > 70) { fprintf(fpout, " \\\n "); i = 3; } i += fprintf(fpout, "X87=%d ", FLAG_IS_SET(np->flag, MVF_X87)); - if (np->mflop[0]+np->mflop[1]+np->mflop[2]+np->mflop[3]+np->mflop[4]+ - np->mflop[5]+np->mflop[6] != 0.0) + if (np->mflop[0] != 0.0 || np->mflop[1] != 0.0 || np->mflop[2] != 0.0 || + np->mflop[3] != 0.0 || np->mflop[4] != 0.0 || np->mflop[5] != 0.0 || + np->mflop[6] != 0.0) { if (i > 3) { fprintf(fpout, " \\\n "); i = 3; } i += fprintf(fpout, "MFLOP=%le", np->mflop[0]); diff -Nru atlas-3.10.2/include/atlas_pca.h atlas-3.10.3/include/atlas_pca.h --- atlas-3.10.2/include/atlas_pca.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_pca.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,53 +1,14 @@ #ifndef ATLAS_PCA_H #define ATLAS_PCA_H /* - * PowerPCs, POWERs and ARMs are weakly ordered, meaning that a given - * processor's writes may appear out-of-order to other processors, - * which breaks PCA's syncs since PCA depends on in-order writes. - * To fix, we must issue a memory barrier call before giving the go-ahead. - * PowerPC: SYNC ensures that all prior stores complete before the next one. - * POWER: DCS waits until all pending writes are written before preceeding - * ARM: DMB (data mem barrier) - all prior mem accesses (in program order) - * complete before DMB returns - * - * Older x86's have a special mode where stores can become out-of-order, but - * it was rarely enabled and does not seem to exist on modern hardware, so - * we don't have to bother there. - * - * SPARCs do not change the order of stores. - * - * PowerPC and ARM syncs do not fix problem, so don't allow PCA on machines - * with out-of-order write schemes. + * Only x86 is known to be strongly ordered (ATLAS does not use the special + * writes that make it weakly-ordered). */ -#if defined(ATL_ARCH_PPCG4) || defined(ATL_ARCH_PPCG5) - #ifdef __GNUC__ - #define ATL_membarrier __asm__ __volatile__ ("sync") -/* #define ATL_USEPCA 1 */ - #endif -#elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \ - defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \ - defined(ATL_ARCH_POWER7) - #ifdef __GNUC__ - #define ATL_membarrier __asm__ __volatile__ ("dcs") -/* #define ATL_USEPCA 1 */ - #endif -/* - * Unfortunately, none of the memory fence instructions seems to work - * adequately on ARM - */ -#elif defined(ATL_ARCH_ARMv7) - #ifdef __GNUC__ - #define ATL_membarrier __asm__ __volatile__ ("dmb") -/* #define ATL_USEPCA 1 */ - #endif -#elif defined(ATL_ARCH_IA64Itan) || defined(ATL_ARCH_IA64Itan2) - #ifdef __GNUC__ - #define ATL_membarrier __asm__ __volatile__ ("mf") -/* #define ATL_USEPCA 1 */ - #endif -#else #define ATL_membarrier - #define ATL_USEPCA 1 -#endif + #if defined(ATL_GAS_x8632) || defined(ATL_GAS_x8632) + #define ATL_USEPCA 1 + #elif defined(ATL_USEPCA) + #undef ATL_USEPCA + #endif #endif diff -Nru atlas-3.10.2/include/atlas_pkblas.h atlas-3.10.3/include/atlas_pkblas.h --- atlas-3.10.2/include/atlas_pkblas.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_pkblas.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_prefetch.h atlas-3.10.3/include/atlas_prefetch.h --- atlas-3.10.2/include/atlas_prefetch.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_prefetch.h 2016-07-28 19:42:59.000000000 +0000 @@ -154,7 +154,8 @@ #define ATL_GOT_L1PREFETCH #define ATL_L1LS 32 #define ATL_L2LS 64 -#elif defined(ATL_ARCH_IbmZ196) || defined(ATL_ARCH_IbmZ10) +#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \ + defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) #define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3) #define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3) #define ATL_GOT_L1PREFETCH diff -Nru atlas-3.10.2/include/atlas_r1.h atlas-3.10.3/include/atlas_r1.h --- atlas-3.10.2/include/atlas_r1.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_r1.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/include/atlas_r1parse.h atlas-3.10.3/include/atlas_r1parse.h --- atlas-3.10.2/include/atlas_r1parse.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_r1parse.h 2016-07-28 19:42:59.000000000 +0000 @@ -786,8 +786,9 @@ if (i > 70) { fprintf(fpout, " \\\n "); i = 3; } i += fprintf(fpout, "X87=%d ", FLAG_IS_SET(np->flag, R1F_X87)); - if (np->mflop[0]+np->mflop[1]+np->mflop[2]+np->mflop[3]+np->mflop[4]+ - np->mflop[5]+np->mflop[6] != 0.0) + if (np->mflop[0] != 0.0 || np->mflop[1] != 0.0 || np->mflop[2] != 0.0 || + np->mflop[3] != 0.0 || np->mflop[4] != 0.0 || np->mflop[5] != 0.0 || + np->mflop[6] != 0.0) { if (i > 3) { fprintf(fpout, " \\\n "); i = 3; } i += fprintf(fpout, "MFLOP=%le", np->mflop[0]); diff -Nru atlas-3.10.2/include/atlas_simd.h atlas-3.10.3/include/atlas_simd.h --- atlas-3.10.2/include/atlas_simd.h 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_simd.h 2016-07-28 19:42:59.000000000 +0000 @@ -0,0 +1,1479 @@ +#ifndef ATLAS_SIMD_H + #define ATLAS_SIMD_H 1 +#ifdef ATL_GAS_ARM64 + #define ATL_VECARM1 1 +/* + * On 32-bit ARM, disable SIMD unless NONIEEE flag is thrown + */ +#elif defined(ATL_GAS_ARM) + #if !defined(ATL_NONIEEE) || !(defined(SREAL) || defined(SCPLX)) + #ifdef ATL_VLEN + #undef ATL_VLEN + #endif + #define ATL_VLEN 1 + #else + #define ATL_NEON 1 + #endif +#endif +//#undef ATL_AVX +//#undef ATL_SSE3 +//#undef ATL_SSE2 +//#undef ATL_SSE1 +//#define ATL_FRCGNUVEC 1 +//#define ATL_VLEN 32 +/* + * This header files contains wrappers to allow you to use SIMD vector + * extensions in a very simplified way in a type-independent manner. + * ATL_VLEN is treated differently, depending on whether we are using + * system-dependent vectorization (eg., AVX, VSX, etc.) or gnu vectorization: + * - For gnu vectorization, ATL_VLEN must be defined as a power of 2. + * - For non-gnu vec ATL_VLEN should match the system, or be undefined. +* All macro funcs first arg is the destination. vr stands for vector register. + * We support the following miscellaneous instructions: + * ATL_vzero(vr) : zero all vr entries + * ATL_vcopy(vrd, vrs) : vrd = vrs + * + * We support 5 load/store operations (where p is a pointer): + * ATL_vbcast(vr, p) : broadcast pointed-to scalar to all vr entries + * ATL_vuld(vr, p) : unaligned load from ptr to vr + * ATL_vld(vr, p) : aligned load from ptr to vr + * ATL_vust(p, vr) : unaligned store to ptr from vr + * ATL_vst(p, vr) : aligned store to ptr from vr + * NOTE: if VLEN < native length, all usually assume unaligned data, + * and (except bcast) become a series of instructions rather than one. + * + * We support 3 computational macros: + * ATL_vadd(vrd, vrs1, vrs2) : vrd = vrs1 + vrs2 + * ATL_vsub(vrd, vrs1, vrs2) : vrd = vrs1 - vrs2 + * ATL_vmul(vrd, vrs1, vrs2) : vrd = vrs1 * vrs2 + * ATL_vmac(vrd, vrs1, vrs2) : vrd += vrs1 * vrs2 + * + * For L1BLAS, we support a vector being summed to a scalar. + * NOTE: srd must be a scalar reg + * ATL_vrsum1(srd, vrs) : srd = sum(vrs[:]) + * For k-vec amm, we need to support summing up VLEN different accumulators, + * and placing the result in one destination. This requires the using code + * to know VLEN (perhaps with a cpp if/else chain), but allows us to get + * high performance on C stores. We show the answer for vvrsum2 & 4, but + * remember that only vvrsumVLEN will actually exist: + * ATL_vvrsum2(d, s0, s1) : d[0] = sum(s0[:]), d[1] = sum(s1[:]) + * ATL_vvrsum4(d, s1, s2, s3, s4) : d[0:3] = sum(s0:4) + */ +/* + * If ATL_VLEN is set, force gnuvec if it isn't set to the native length + */ +#ifdef ATL_VLEN + #ifdef ATL_VSX + #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \ + ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2) + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_VXZ) + #if ATL_VLEN != 2; + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_NEON) + #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \ + ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 1) + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_VECARM1) + #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \ + ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2) + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_AVXMAC) + #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 8) || \ + ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 4) + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_AVX) + #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 8) || \ + ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 4) + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_SSE2) + #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \ + ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2) + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_SSE1) + #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \ + ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2) + #define ATL_FRCGNUVEC + #endif + #endif +/* + * Compute ATL_VLEN based on SIMD extension & TYPE, if not already set + */ +#else + #ifdef ATL_VSX + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLEN 4 + #else + #define ATL_VLEN 2 + #endif + #elif defined(ATL_VXZ) + #define ATL_VLEN 2 + #elif defined(ATL_NEON) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLEN 4 + #else + #define ATL_VLEN 1 + #endif + #elif defined(ATL_VECARM1) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLEN 4 + #else + #define ATL_VLEN 2 + #endif + #elif defined(ATL_AVXMAC) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLEN 8 + #else + #define ATL_VLEN 4 + #endif + #elif defined(ATL_AVX) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLEN 8 + #else + #define ATL_VLEN 4 + #endif + #elif defined(ATL_SSE2) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLEN 4 + #else + #define ATL_VLEN 2 + #endif + #elif defined(ATL_SSE1) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLEN 4 + #else + #define ATL_VLEN 2 + #endif + #endif +#endif +/* + * Derive ATL_VLENb (veclen in bytes) from ATL_VLEN + */ +#ifndef ATL_VLEN + #error "ATL_VLEN not defined!" +#else + #if ATL_VLEN == 1 + #define ATL_VLSH 0 + #define ATL_DivByVLEN(i_) (i_) + #define ATL_MulByVLEN(i_) (i_) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLENb 4 + #else + #define ATL_VLENb 8 + #endif + #elif ATL_VLEN == 2 + #define ATL_VLSH 1 + #define ATL_DivByVLEN(i_) ((i_)>>1) + #define ATL_MulByVLEN(i_) ((i_)<<1) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLENb 8 + #else + #define ATL_VLENb 16 + #endif + #elif ATL_VLEN == 4 + #define ATL_VLSH 2 + #define ATL_DivByVLEN(i_) ((i_)>>2) + #define ATL_MulByVLEN(i_) ((i_)<<2) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLENb 16 + #else + #define ATL_VLENb 32 + #endif + #elif ATL_VLEN == 8 + #define ATL_VLSH 3 + #define ATL_DivByVLEN(i_) ((i_)>>3) + #define ATL_MulByVLEN(i_) ((i_)<<3) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLENb 32 + #else + #define ATL_VLENb 64 + #endif + #elif ATL_VLEN == 16 + #define ATL_VLSH 4 + #define ATL_DivByVLEN(i_) ((i_)>>4) + #define ATL_MulByVLEN(i_) ((i_)<<4) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLENb 64 + #else + #define ATL_VLENb 128 + #endif + #elif ATL_VLEN == 32 + #define ATL_VLSH 5 + #define ATL_DivByVLEN(i_) ((i_)>>5) + #define ATL_MulByVLEN(i_) ((i_)<<5) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLENb 128 + #else + #define ATL_VLENb 256 + #endif + #else + #define ATL_DivByVLEN(i_) ((i_)/ATL_VLEN) + #define ATL_MulByVLEN(i_) ((i_)*ATL_VLEN) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VLENb (ATL_VLEN*4) + #else + #define ATL_VLENb (ATL_VLEN*8) + #endif + #endif +#endif +/* + * We may want to force use of GNU vectorization on any platform. If so, + * undefine any defined system-specific vectorization. + * Undefine all vectorization if VLEN=1 (scalar code)! + */ +#if defined(ATL_FRCGNUVEC) || ATL_VLEN < 2 + #ifdef ATL_NEON + #undef ATL_NEON + #endif + #ifdef ATL_VECARM1 + #undef ATL_VECARM1 + #endif + #ifdef ATL_SSE1 + #undef ATL_SSE1 + #endif + #ifdef ATL_SSE2 + #undef ATL_SSE2 + #endif + #ifdef ATL_SSE3 + #undef ATL_SSE3 + #endif + #ifdef ATL_AVX + #undef ATL_AVX + #endif + #ifdef ATL_AVXMAC + #undef ATL_AVXMAC + #endif + #ifdef ATL_VSXZ + #undef ATL_VSXZ + #endif + #ifdef ATL_VSX + #undef ATL_VSX + #endif + #if ATL_VLEN < 2 && defined(ATL_FRCGNUVEC) + #undef ATL_FRCGNUVEC + #endif +#endif +/* + * Now set computational macros based on ATL_VLEN & SIMD defines + */ +#if defined(ATL_VSX) + #include +/* + * Older gcc don't support don't support xxpermdi, merge[o,e], xxsel + */ + #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ > 9) || \ + (__GNUC__ == 4 && __GNUC_MINOR__ == 9 && __GNU_PATHLEVEL__ > 1) || \ + !defined(__GNUC__) + #define ATL_FULLGCCVSX 1 + #else + #define ATL_FULLGCCVSX 0 + #endif + #if defined(SREAL) || defined(SCPLX) + #define ATL_VTYPE vector float + #if ATL_VLEN != 4 + #error "VSX supports only VLEN = 4 for floats!" + #endif + #else /* double precision */ + #define ATL_VTYPE vector double + #if ATL_VLEN != 2 + #error "VSX supports only VLEN = 2 for doubles!" + #endif + #endif + #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0) + #define ATL_vcopy(d_, s_) d_ = s_ + #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_))) + #define ATL_vuld(v_, p_) v_ = vec_vsx_ld(0, (ATL_VTYPE*)(p_)) + #define ATL_vld(v_, p_) v_ = vec_ld(0, (ATL_VTYPE*)(p_)) + #define ATL_vust(p_, v_) vec_vsx_st(v_, 0, (ATL_VTYPE*)(p_)) + #define ATL_vst(p_, v_) vec_st(v_, 0, (ATL_VTYPE*)(p_)) + #define ATL_vadd(d_, s1_, s2_) d_ = vec_add(s1_, s2_) + #define ATL_vsub(d_, s1_, s2_) d_ = vec_sub(s1_, s2_) + #define ATL_vmul(d_, s1_, s2_) d_ = vec_mul(s1_, s2_) + #define ATL_vmac(d_, s1_, s2_) d_ = vec_madd(s1_, s2_, d_) + #if defined(SREAL) || defined(SCPLX) + #define ATL_vrsum1(d_, s_) \ + { \ + VTYPE t_; \ + d_ = vec_splat(s_, 1); \ + d_ = vec_add(d_, s_) ; \ + t_ = vec_splat(s_, 2); \ + d_ = vec_add(d_, t_) ; \ + t_ = vec_splat(s_, 3); \ + d_ = vec_add(d_, t_) ; \ + } + #if ATL_FULLGCCVSX + #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \ + { ATL_VTYPE t_, h_; /*{s0d,s0c,s0b,s0a}*/\ + t_ = vec_vmrghw(s0_, s1_); /*{s1b,s0b,s1a,s0a}*/\ + s0_ = vec_vmrglw(s0_, s1_); /* s1d,s0d,s1c,s0c}*/ \ + s0_ = ATL_vadd(s0_, s0_, t_); /*{s1bd,s0bd,s1ac,s0ac}*/\ + h_ = vec_vmrghw(s2_, s3_); /*{s3b,s2b,s3a,s2a}*/\ + s2_ = vec_vmrglw(s2_, s3_); /*{s3d,s2d,s3c,s2c}*/ \ + s2_ = ATL_vadd(s2_, s2_, h_); /*{s3bd,s2bd,s3ac,s2ac}*/\ + t_ = vec_xxpermdi(s0_, s2_, 0); /*{s3ac,s2ac,s1ac,s0ac}*/\ + s0_ = vec_xxpermdi(s0_, s2_, 3); /*{s3bd,s2bd,s1bd,s0bd}*/ \ + ATL_vadd(s0_, s0_, t_); \ + s0_ = vec_xxpermdi(s0_, s0_, 2); /* pwr8 endian-insanity */ \ + } + #define ATL_vvrsum2(s0_, s1_) \ + { ATL_VTYPE t_, h_; /*{s0d,s0c,s0b,s0a}*/\ + t_ = vec_vmrghw(s0_, s1_); /*{s1b,s0b,s1a,s0a}*/\ + s0_ = vec_vmrglw(s0_, s1_); /*{s1d,s0d,s1c,s0c}*/ \ + s0_ = ATL_vadd(s0_, s0_, t_); /*{s1bd,s0bd,s1ac,s0ac}*/\ + t_ = vec_xxpermdi(s0_, s0_, 0); /*{s1ac,s0ac,s1ac,s0ac}*/\ + s0_ = vec_xxpermdi(s0_, s0_, 3); /*{s1bd,s0bd,s1bd,s0bd}*/ \ + ATL_vadd(s0_, s0_, t_); \ + s0_ = vec_xxpermdi(s0_, s0_, 2); /* pwr8 endian-insanity */ \ + } + #define ATL_vvrsum1(s0_) \ + { ATL_VTYPE t_, h_; /*{s0d,s0c,s0b,s0a}*/\ + t_ = vec_vmrghw(s0_, s0_); /*{s0b,s0b,s0a,s0a}*/\ + s0_ = vec_vmrglw(s0_, s0_); /*{s0d,s0d,s0c,s0c}*/ \ + s0_ = ATL_vadd(s0_, s0_, t_); /*{s0bd,s0bd,s0ac,s0ac}*/\ + t_ = vec_xxpermdi(s0_, s0_, 0); /*{s0ac,s0ac,s0ac,s0ac}*/\ + s0_ = vec_xxpermdi(s0_, s0_, 3); /*{s0bd,s0bd,s0bd,s0bd}*/ \ + ATL_vadd(s0_, s0_, t_); \ + s0_ = vec_xxpermdi(s0_, s0_, 2); /* pwr8 endian-insanity */ \ + } + #endif + #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0) + #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1) + #define ATL_vsplat2(d_, s_) d_ = vec_splat(s_, 2) + #define ATL_vsplat3(d_, s_) d_ = vec_splat(s_, 3) + #else + #define ATL_vrsum1(d_, s_) \ + { \ + d_ = vec_splat(s_, 1); \ + d_ = vec_add(d_, s_) ; \ + } + #if ATL_FULLGCCVSX + #define ATL_vvrsum2(s0_, s1_) \ + { ATL_VTYPE t_;\ + t_ = vec_xxpermdi(s0_, s1_, 0); \ + s0_ = vec_xxpermdi(s0_, s1_, 3); \ + ATL_vadd(s0_, s0_, t_); \ + s0_ = vec_xxpermdi(s0_, s0_, 2); /* pwr8 endian-insanity */ \ + } + #define ATL_vvrsum1(s0_) \ + { ATL_VTYPE t_;\ + t_ = vec_xxpermdi(s0_, s0_, 0); \ + ATL_vadd(s0_, s0_, t_); \ + } + #else + #define ATL_vvrsum1(s0_) \ + { ATL_VTYPE t_;\ + t_ = vec_splat(s0_, 1); \ + ATL_vadd(s0_, s0_, t_); \ + } + #endif + #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0) + #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1) + #endif +#elif defined(ATL_VXZ) + #include + + #if ATL_VLEN != 2 + #error "VSXZ supports only VLEN = 2!" + #endif + #define ATL_VTYPE vector double + #if (defined(DREAL) || defined(DCPLX)) + #define ATL_vld(v_, p_) {v_[0] = *(p_); v_[1] = (p_)[1]; } + #define ATL_vst(p_, v_) {*(p_) = v_[0]; (p_)[1] = v_[1];} + #else + #define ATL_vld(v_, p_) v_ = vec_ld2f(p_); + #define ATL_vst(p_, v_) vec_st2f(v_, p_); + #endif + #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0) + #define ATL_vcopy(d_, s_) d_ = s_ + #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_))) + #define ATL_vuld(v_, p_) ATL_vld(v_, p_) + #define ATL_vust(p_, v_) ATL_vst(p_, v_) + #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_ + #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_ + #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_ + #define ATL_vmac(d_, s1_, s2_) d_ = vec_madd(s1_, s2_, d_) + #define ATL_vvrsum1(s0_) \ + { ATL_VTYPE t_;\ + t_ = vec_splat(s0_, 1); \ + s0 += t_; \ + } + #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0) + #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1) +#elif defined(ATL_NEON) && (defined(SREAL) || defined(SCPLX)) + #include "arm_neon.h" + #define ATL_VTYPE float32x4_t + #define ATL_vzero(v_) v_ = vdupq_n_f32(0.0f) + #define ATL_vbcast(v_, p_) v_ = vdupq_n_f32(*(p_)); + #define ATL_vld(v_, p_) v_ = vld1q_f32(p_) + #define ATL_vst(p_, v_) vst1q_f32(p_, v_) + #define ATL_vadd(d_, s1_, s2_) d_ = vaddq_f32(s1_, s2_) + #define ATL_vsub(d_, s1_, s2_) d_ = vsubq_f32(s1_, s2_) + #define ATL_vmul(d_, s1_, s2_) d_ = vmulq_f32(s1_, s2_) + #define ATL_vmac(d_, s1_, s2_) d_ = vmlaq_f32(d_, s1_, s2_) + #define ATL_vrsum1(d_, s_) \ + { ATL_VTYPE t4_; float32x2_t t2_, t1_; \ + t1_ = vget_high_f32(s_); \ + t2_ = vget_low_f32(s_); \ + t2_ = vpadd_f32(t1_, t2_); \ + d_ = vget_lane_f32(t2_, 0); \ + d_ += vget_lane_f32(t2_, 1); \ + } + #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \ + { ATL_VTYPE t0_, t1_; \ + t0_[0] = s0_[0]; \ + t0_[1] = s1_[0]; \ + t0_[2] = s2_[0]; \ + t0_[3] = s3_[0]; \ + t1_[0] = s0_[1]; \ + t1_[1] = s1_[1]; \ + t1_[2] = s2_[1]; \ + t1_[3] = s3_[1]; \ + t0_ = vaddq_f32(t0_, t1_); \ + t1_[0] = s0_[2]; \ + t1_[1] = s1_[2]; \ + t1_[2] = s2_[2]; \ + t1_[3] = s3_[2]; \ + t0_ = vaddq_f32(t0_, t1_); \ + t1_[0] = s0_[3]; \ + t1_[1] = s1_[3]; \ + t1_[2] = s2_[3]; \ + t1_[3] = s3_[3]; \ + s0_ = vaddq_f32(t0_, t1_); \ + } + #define ATL_vsplat0(d_, s_) d_ = vmovq_n_f32(vgetq_lane_f32(s_, 0)) + #define ATL_vsplat1(d_, s_) d_ = vmovq_n_f32(vgetq_lane_f32(s_, 1)) + #define ATL_vsplat2(d_, s_) d_ = vmovq_n_f32(vgetq_lane_f32(s_, 2)) + #define ATL_vsplat3(d_, s_) d_ = vmovq_n_f32(vgetq_lane_f32(s_, 3)) + #define ATL_vuld(v_, p_) ATL_vld(v_, p_) + #define ATL_vust(p_, v_) ATL_vst(p_, v_) +#elif defined(ATL_VECARM1) + #include "arm_neon.h" + #if defined(SREAL) || defined(SCPLX) + #define ATL_VTYPE float32x4_t + #else + #define ATL_VTYPE float64x2_t + #endif + #if defined(SREAL) || defined(SCPLX) + #define ATL_vzero(v_) v_ = vdupq_n_f32(0.0f) + #define ATL_vbcast(v_, p_) v_ = vld1q_dup_f32(p_) + #define ATL_vld(v_, p_) v_ = vld1q_f32(p_) + #define ATL_vst(p_, v_) vst1q_f32(p_, v_) + #define ATL_vadd(d_, s1_, s2_) d_ = vaddq_f32(s1_, s2_) + #define ATL_vsub(d_, s1_, s2_) d_ = vsubq_f32(s1_, s2_) + #define ATL_vmul(d_, s1_, s2_) d_ = vmulq_f32(s1_, s2_) + #define ATL_vmac(d_, s1_, s2_) d_ = vfmaq_f32(d_, s1_, s2_) + #define ATL_vrsum1(d_, s_) \ + { ATL_VTYPE t4_; float32x2_t t2_, t1_; \ + t1_ = vget_high_f32(s_); \ + t2_ = vget_low_f32(s_); \ + t2_ = vpadd_f32(t1_, t2_); \ + d_ = vget_lane_f32(t2_, 0); \ + d_ += vget_lane_f32(t2_, 1); \ + } + #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \ + { ATL_VTYPE t0_, t1_; \ + t0_[0] = s0_[0]; \ + t0_[1] = s1_[0]; \ + t0_[2] = s2_[0]; \ + t0_[3] = s3_[0]; \ + t1_[0] = s0_[1]; \ + t1_[1] = s1_[1]; \ + t1_[2] = s2_[1]; \ + t1_[3] = s3_[1]; \ + t0_ = vaddq_f32(t0_, t1_); \ + t1_[0] = s0_[2]; \ + t1_[1] = s1_[2]; \ + t1_[2] = s2_[2]; \ + t1_[3] = s3_[2]; \ + t0_ = vaddq_f32(t0_, t1_); \ + t1_[0] = s0_[3]; \ + t1_[1] = s1_[3]; \ + t1_[2] = s2_[3]; \ + t1_[3] = s3_[3]; \ + s0_ = vaddq_f32(t0_, t1_); \ + } + #define ATL_vsplat0(d_, s_) d_ = vdupq_laneq_f32(s_, 0) + #define ATL_vsplat1(d_, s_) d_ = vdupq_laneq_f32(s_, 1) + #define ATL_vsplat2(d_, s_) d_ = vdupq_laneq_f32(s_, 2) + #define ATL_vsplat3(d_, s_) d_ = vdupq_laneq_f32(s_, 3) + #else /* double */ + #define ATL_vzero(v_) v_ = vdupq_n_f64(0.0) + #define ATL_vbcast(v_, p_) v_ = vld1q_dup_f64(p_) + #define ATL_vld(v_, p_) v_ = vld1q_f64(p_) + #define ATL_vst(p_, v_) vst1q_f64(p_, v_) + #define ATL_vadd(d_, s1_, s2_) d_ = vaddq_f64(s1_, s2_) + #define ATL_vsub(d_, s1_, s2_) d_ = vsubq_f64(s1_, s2_) + #define ATL_vmul(d_, s1_, s2_) d_ = vmulq_f64(s1_, s2_) + #define ATL_vmac(d_, s1_, s2_) d_ = vfmaq_f64(d_, s1_, s2_) + #define ATL_vrsum1(d_, s_) d_ = vget_low_f64(vpaddq_f64(s_, s_)) + #define ATL_vvrsum2(s0_, s1_) s0_ = vpaddq_f64(s0_, s1_) + #define ATL_vsplat0(d_, s_) d_ = vdupq_laneq_f64(s_, 0) + #define ATL_vsplat1(d_, s_) d_ = vdupq_laneq_f64(s_, 1) + #endif + #define ATL_vuld(v_, p_) ATL_vld(v_, p_) + #define ATL_vust(p_, v_) ATL_vst(p_, v_) +#elif defined(ATL_AVXMAC) || defined(ATL_AVX) + #include + #if defined(SREAL) || defined(SCPLX) + #if ATL_VLEN != 8 + #error "VLEN != 8 not supported for AVX or AVX2!" + #endif + #define ATL_VTYPE __m256 + #define ATL_vzero(v_) v_ = _mm256_setzero_ps() + #define ATL_vbcast(v_, p_) v_ = _mm256_broadcast_ss(p_) + #define ATL_vuld(v_, p_) v_ = _mm256_loadu_ps(p_) + #define ATL_vld(v_, p_) v_ = _mm256_load_ps(p_) + #define ATL_vust(p_, v_) _mm256_storeu_ps(p_, v_) + #define ATL_vst(p_, v_) _mm256_store_ps(p_, v_) + #define ATL_vadd(d_, s1_, s2_) d_ = _mm256_add_ps(s1_, s2_) + #define ATL_vsub(d_, s1_, s2_) d_ = _mm256_sub_ps(s1_, s2_) + #define ATL_vmul(d_, s1_, s2_) d_ = _mm256_mul_ps(s1_, s2_) + #ifdef ATL_AVXMAC + #define ATL_vmac(d_, s1_, s2_) \ + d_ = _mm256_fmadd_ps(s1_, s2_, d_) + #else + #define ATL_vmac(d_, s1_, s2_) \ + { ATL_VTYPE t_; \ + t_ = _mm256_mul_ps(s1_, s2_); \ + d_ = _mm256_add_ps(t_, d_); \ + } + #endif + #define ATL_vvrsum8(s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_) \ + { \ + s0_ = _mm256_hadd_ps(s0_, s1_); \ + /*{s1gh,s1ef,s0gh,s0ef,s1cd,s1ab,s0cd,s0ab}*/\ + s2_ = _mm256_hadd_ps(s2_, s3_); \ + /*{s3gh,s3ef,s2gh,s2ef,s3cd,s3ab,s2cd,s2ab}*/\ + s4_ = _mm256_hadd_ps(s4_, s5_); \ + /*{s5gh,s5ef,s4gh,s4ef,s5cd,s5ab,s4cd,s4ab}*/\ + s6_ = _mm256_hadd_ps(s6_, s7_); \ + /*{s7gh,s7ef,s6gh,s6ef,s7cd,s7ab,s6cd,s6ab}*/\ + s0_ = _mm256_hadd_ps(s0_, s2_); \ + /*{s3e-h,s2e-h,s1e-h,s0e-g,s3a-d,s2a-d,s1a-d,s0a-d}*/\ + s4_ = _mm256_hadd_ps(s4_, s6_); \ + /*{s7e-h,s6e-h,s5e-h,s4e-g,s7a-d,s6a-d,s5a-d,s4a-d}*/\ + s1_ = _mm256_permute2f128_ps(s0_, s4_, 0x31); \ + /*{s7e-h,s6e-h,s5e-h,s4e-g,s3e-h,s2e-h,s1e-h,s0e-g}*/\ + s0_ = _mm256_permute2f128_ps(s0_, s4_, 0x20); \ + /*{s7a-d,s6a-d,s5a-d,s4a-d,s3a-d,s2a-d,s1a-d,s0a-d}*/\ + ATL_vadd(s0_, s0_, s1_); \ + } + #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \ + { \ + s0_ = _mm256_hadd_ps(s0_, s1_); \ + /*{s1gh,s1ef,s0gh,s0ef,s1cd,s1ab,s0cd,s0ab}*/\ + s2_ = _mm256_hadd_ps(s2_, s3_); \ + /*{s3gh,s3ef,s2gh,s2ef,s3cd,s3ab,s2cd,s2ab}*/\ + s0_ = _mm256_hadd_ps(s0_, s2_); \ + /*{s3e-h,s2e-h,s1e-h,s0e-g,s3a-d,s2a-d,s1a-d,s0a-d}*/\ + s1_ = _mm256_permute2f128_ps(s0_, s0_, 0x31); \ + /*{s3e-h,s2e-h,s1e-h,s0e-g,s3e-h,s2e-h,s1e-h,s0e-g}*/\ + s0_ = _mm256_permute2f128_ps(s0_, s0_, 0x20); \ + /*{s3a-d,s2a-d,s1a-d,s0a-d,s3a-d,s2a-d,s1a-d,s0a-d}*/\ + ATL_vadd(s0_, s0_, s1_); \ + } + #define ATL_vvrsum2(s0_, s1_) \ + { \ + s0_ = _mm256_hadd_ps(s0_, s1_); \ + /*{s1gh,s1ef,s0gh,s0ef,s1cd,s1ab,s0cd,s0ab}*/\ + s0_ = _mm256_hadd_ps(s0_, s0_); \ + /*{s1e-h,s0e-h,s1e-h,s0e-g,s1a-d,s0a-d,s1a-d,s0a-d}*/\ + s1_ = _mm256_permute2f128_ps(s0_, s0_, 0x31); \ + /*{s1e-h,s0e-h,s1e-h,s0e-g,s1e-h,s0e-h,s1e-h,s0e-g}*/\ + s0_ = _mm256_permute2f128_ps(s0_, s0_, 0x20); \ + /*{s1a-d,s0a-d,s1a-d,s0a-d,s1a-d,s0a-d,s1a-d,s0a-d}*/\ + ATL_vadd(s0_, s0_, s1_); \ + } + #define ATL_vvrsum1(s0_) \ + { ATL_VTYPE t1_; \ + s0_ = _mm256_hadd_ps(s0_, s0_); \ + /*{s0gh,s0ef,s0gh,s0ef,s0cd,s0ab,s0cd,s0ab}*/\ + s0_ = _mm256_hadd_ps(s0_, s0_); \ + /*{s0e-h,s0e-h,s0e-h,s0e-g,s0a-d,s0a-d,s0a-d,s0a-d}*/\ + t1_ = _mm256_permute2f128_ps(s0_, s0_, 0x31); \ + /*{s0e-h,s0e-h,s0e-h,s0e-g,s0e-h,s0e-h,s0e-h,s0e-g}*/\ + s0_ = _mm256_permute2f128_ps(s0_, s0_, 0x20); \ + /*{s0a-d,s0a-d,s0a-d,s0a-d,s0a-d,s0a-d,s0a-d,s0a-d}*/\ + ATL_vadd(s0_, s0_, t1_); \ + } + #define ATL_vsplat0(d_, s_) \ + { \ + d_ = _mm256_shuffle_ps(s_, s_, 0); \ + d_ = _mm256_insertf128_ps(d_, _mm256_extractf128_ps(d_,0), 1); \ + } + #define ATL_vsplat1(d_, s_) \ + { \ + d_ = _mm256_shuffle_ps(s_, s_, 0x55); \ + d_ = _mm256_insertf128_ps(d_, _mm256_extractf128_ps(d_,0), 1); \ + } + #define ATL_vsplat2(d_, s_) \ + { \ + d_ = _mm256_shuffle_ps(s_, s_, 0xAA); \ + d_ = _mm256_insertf128_ps(d_, _mm256_extractf128_ps(d_,0), 1); \ + } + #define ATL_vsplat3(d_, s_) \ + { \ + d_ = _mm256_shuffle_ps(s_, s_, 0xFF); \ + d_ = _mm256_insertf128_ps(d_, _mm256_extractf128_ps(d_,0), 1); \ + } + #define ATL_vsplat4(d_, s_) \ + { \ + d_ = _mm256_shuffle_ps(s_, s_, 0); \ + d_ = _mm256_insertf128_ps(d_, _mm256_extractf128_ps(d_,1), 0); \ + } + #define ATL_vsplat5(d_, s_) \ + { \ + d_ = _mm256_shuffle_ps(s_, s_, 0x55); \ + d_ = _mm256_insertf128_ps(d_, _mm256_extractf128_ps(d_,1), 0); \ + } + #define ATL_vsplat6(d_, s_) \ + { \ + d_ = _mm256_shuffle_ps(s_, s_, 0xAA); \ + d_ = _mm256_insertf128_ps(d_, _mm256_extractf128_ps(d_,1), 0); \ + } + #define ATL_vsplat7(d_, s_) \ + { \ + d_ = _mm256_shuffle_ps(s_, s_, 0xFF); \ + d_ = _mm256_insertf128_ps(d_, _mm256_extractf128_ps(d_,1), 0); \ + } + #else /* double precision */ + #if ATL_VLEN != 4 + #error "AVX SUPPORTS ONLY VLEN=4 FOR DOUBLE!" + #endif + #define ATL_VTYPE __m256d + #define ATL_vzero(v_) v_ = _mm256_setzero_pd() + #define ATL_vbcast(v_, p_) v_ = _mm256_broadcast_sd(p_) + #define ATL_vuld(v_, p_) v_ = _mm256_loadu_pd(p_) + #define ATL_vld(v_, p_) v_ = _mm256_load_pd(p_) + #define ATL_vust(p_, v_) _mm256_storeu_pd(p_, v_) + #define ATL_vst(p_, v_) _mm256_store_pd(p_, v_) + #define ATL_vadd(d_, s1_, s2_) d_ = _mm256_add_pd(s1_, s2_) + #define ATL_vsub(d_, s1_, s2_) d_ = _mm256_sub_pd(s1_, s2_) + #define ATL_vmul(d_, s1_, s2_) d_ = _mm256_mul_pd(s1_, s2_) + #ifdef ATL_AVXMAC + #define ATL_vmac(d_, s1_, s2_) \ + d_ = _mm256_fmadd_pd(s1_, s2_, d_) + #else + #define ATL_vmac(d_, s1_, s2_) \ + { ATL_VTYPE t_; \ + t_ = _mm256_mul_pd(s1_, s2_); \ + d_ = _mm256_add_pd(t_, d_); \ + } + #endif + #define ATL_vrsum1(d_, s_) \ + { __m128d t_; \ + t_ = _mm_add_pd(_mm256_extractf128_pd(s_, 0), \ + _mm256_extractf128_pd(s_, 1)); \ + t_ = _mm_hadd_pd(t_, t_); \ + d_ = _mm_cvtsd_f64(t_); \ + } + #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \ + { \ + s0_ = _mm256_hadd_pd(s0_, s1_); /*{s1cd,s0cd,s1ab,s0ab}*/ \ + s2_ = _mm256_hadd_pd(s2_, s3_); /*{s3cd,s2cd,s3ab,s2ab}*/ \ + s1_ = _mm256_permute2f128_pd(s0_, s2_,0x31);/*{s3cd,s2cd,s1cd,s0cd}*/ \ + s0_ = _mm256_permute2f128_pd(s0_, s2_,0x20);/*{s3ab,s2ab,s1ab,s0ab}*/ \ + ATL_vadd(s0_, s0_, s1_); \ + } + #define ATL_vvrsum2(s0_, s1_) \ + { \ + s0_ = _mm256_hadd_pd(s0_, s1_); /*{s1cd,s0cd,s1ab,s0ab}*/ \ + s1_ = _mm256_permute2f128_pd(s0_, s1_,0x31);/*{s3cd,s2cd,s1cd,s0cd}*/ \ + ATL_vadd(s0_, s0_, s1_); \ + } + #define ATL_vvrsum1(s0_) \ + { ATL_VTYPE s1_; \ + s0_ = _mm256_hadd_pd(s0_, s0_); /*{s0cd,s0cd,s0ab,s0ab}*/ \ + s1_ = _mm256_permute2f128_pd(s0_, s1_,0x31);/*{s0cd,s0cd,s0cd,s0cd}*/ \ + ATL_vadd(s0_, s0_, s1_); \ + } + #define ATL_vsplat0(d_, s_) \ + { \ + d_ = _mm256_unpacklo_pd(s_, s_); \ + d_ = _mm256_insertf128_pd(d_, _mm256_extractf128_pd(d_,0), 1); \ + } + #define ATL_vsplat2(d_, s_) \ + { \ + d_ = _mm256_unpacklo_pd(s_, s_); \ + d_ = _mm256_insertf128_pd(d_, _mm256_extractf128_pd(d_,1), 0); \ + } + #define ATL_vsplat1(d_, s_) \ + { \ + d_ = _mm256_unpackhi_pd(s_, s_); \ + d_ = _mm256_insertf128_pd(d_, _mm256_extractf128_pd(d_,0), 1); \ + } + #define ATL_vsplat3(d_, s_) \ + { \ + d_ = _mm256_unpackhi_pd(s_, s_); \ + d_ = _mm256_insertf128_pd(d_, _mm256_extractf128_pd(d_,1), 0); \ + } + #endif +#elif defined(ATL_SSE2) && (defined(DREAL) || defined(DCPLX)) + #include + #if defined(ATL_SSE3) + #include + #include + #endif + #define ATL_VTYPE __m128d + #if ATL_VLEN != 2 + #error "VLEN == 2 only supported size for double precision SSE!" + #endif + #define ATL_vzero(v_) v_ = _mm_setzero_pd() + #define ATL_vbcast(v_, p_) v_ = _mm_load1_pd(p_) + #define ATL_vuld(v_, p_) v_ = _mm_loadu_pd(p_) + #define ATL_vld(v_, p_) v_ = _mm_load_pd(p_) + #define ATL_vust(p_, v_) _mm_storeu_pd(p_, v_) + #define ATL_vst(p_, v_) _mm_store_pd(p_, v_) + #define ATL_vadd(d_, s1_, s2_) d_ = _mm_add_pd(s1_, s2_) + #define ATL_vsub(d_, s1_, s2_) d_ = _mm_sub_pd(s1_, s2_) + #define ATL_vmul(d_, s1_, s2_) d_ = _mm_mul_pd(s1_, s2_) + #define ATL_vmac(d_, s1_, s2_) \ + { ATL_VTYPE t_; \ + t_ = _mm_mul_pd(s1_, s2_); \ + d_ = _mm_add_pd(t_, d_); \ + } + #ifdef ATL_SSE3 + #define ATL_vrsum1(d_, s_) d_ = _mm_cvtsd_f64(_mm_hadd_pd(s_, s_)) + #define ATL_vvrsum2(s0_, s1_) s0_ = _mm_hadd_pd(s0_, s1_) + #define ATL_vvrsum1(s0_) s0_ = _mm_hadd_pd(s0_, s0_) + #define ATL_vsplat0(d_, s_) d_ = _mm_movedup_pd(s_) + #define ATL_vsplat1(d_, s_) d_ = (ATL_VTYPE) \ + _mm_shuffle_epi32((__m128i)(s_), 0xEE) + #else + #define ATL_vrsum1(d_, s_) \ + d_ = _mm_cvtsd_f64(_mm_add_sd(_mm_unpackhi_pd(s_, s_), s_)) + #define ATL_vvrsum2(s0_, s1_) \ + { \ + __m128d t0_; \ + t0_ = _mm_unpackhi_pd(s0_, s1_); \ + s0_ = _mm_unpacklo_pd(s0_, s1_); \ + ATL_vadd(s0_, s0_, t0_); \ + } + #define ATL_vvrsum1(s0_) \ + { \ + __m128d t0_; \ + t0_ = _mm_unpackhi_pd(s0_, s0_); \ + s0_ = _mm_unpacklo_pd(s0_, s0_); \ + ATL_vadd(s0_, s0_, t0_); \ + } + #define ATL_vsplat0(d_, s_) d_ = (ATL_VTYPE) \ + _mm_shuffle_epi32((__m128i)(s_), 0x0) + #define ATL_vsplat1(d_, s_) d_ = (ATL_VTYPE) \ + _mm_shuffle_epi32((__m128i)(s_), 0x55) + #define ATL_vsplat2(d_, s_) d_ = (ATL_VTYPE) \ + _mm_shuffle_epi32((__m128i)(s_), 0xAA) + #define ATL_vsplat3(d_, s_) d_ = (ATL_VTYPE) \ + _mm_shuffle_epi32((__m128i)(s_), 0xFF) + #endif +#elif defined(ATL_SSE1) + #include + #if defined(ATL_SSE3) + #include + #endif + #define ATL_VTYPE __m128 + #if defined(ATL_VLEN) && ATL_VLEN != 4 + #error "VLEN == 4 only supported size for single precision SSE!" + #elif !defined(ATL_VLEN) + #define ATL_VLEN 4 + #endif + #define ATL_vzero(v_) v_ = _mm_setzero_ps() + #define ATL_vbcast(v_, p_) v_ = _mm_load1_ps(p_) + #define ATL_vuld(v_, p_) v_ = _mm_loadu_ps(p_) + #define ATL_vld(v_, p_) v_ = _mm_load_ps(p_) + #define ATL_vust(p_, v_) _mm_storeu_ps(p_, v_) + #define ATL_vst(p_, v_) _mm_store_ps(p_, v_) + #define ATL_vadd(d_, s1_, s2_) d_ = _mm_add_ps(s1_, s2_) + #define ATL_vsub(d_, s1_, s2_) d_ = _mm_sub_ps(s1_, s2_) + #define ATL_vmul(d_, s1_, s2_) d_ = _mm_mul_ps(s1_, s2_) + #define ATL_vmac(d_, s1_, s2_) \ + { ATL_VTYPE t_; \ + t_ = _mm_mul_ps(s1_, s2_); \ + d_ = _mm_add_ps(t_, d_); \ + } + #ifdef ATL_SSE3 + #define ATL_vrsum1(d_, s_) \ + { ATL_VTYPE t_; \ + t_ = _mm_hadd_ps(s_, s_); \ + d_ = _mm_cvtss_f32(_mm_hadd_ps(t_, t_)); \ + } + #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \ + { \ + s0_ = _mm_hadd_ps(s0_, s1_); /*{s1cd,s1ab,s0cd,s0ab}*/ \ + s2_ = _mm_hadd_ps(s2_, s3_); /*{s3cd,s3ab,s2cd,s2ab}*/ \ + s0_ = _mm_hadd_ps(s0_, s2_); /*{s3a-d,s2a-d,s1a-d,s0a-d}*/ \ + } + #define ATL_vvrsum2(s0_, s1_) \ + { \ + s0_ = _mm_hadd_ps(s0_, s1_); /*{s1cd,s1ab,s0cd,s0ab}*/ \ + s0_ = _mm_hadd_ps(s0_, s0_); /*{s1a-d,s0a-d,s1a-d,s0a-d}*/ \ + } + #define ATL_vvrsum1(s0_) \ + { \ + s0_ = _mm_hadd_ps(s0_, s0_); /*{s0cd,s0ab,s0cd,s0ab}*/ \ + s0_ = _mm_hadd_ps(s0_, s0_); /*{s0a-d,s0a-d,s0a-d,s0a-d}*/ \ + } + #else + #define ATL_vrsum1(d_, s_) \ + { \ + ATL_VTYPE t_; \ + t_ = _mm_movehl_ps(s_, s_); \ + t_ = _mm_add_ps(t_, s_); \ + t_ = _mm_add_ps(t_, _mm_shuffle_ps(t_, t_, 1)); \ + d_ = _mm_cvtss_f32(t_); \ + } + #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \ + { /*{sXd, sXc, sXb, sXa}*/ \ + ATL_VTYPE t0_; \ + t0_ = _mm_unpackhi_ps(s0_,s1_); /*{s1d, s0d, s1c, s0c}*/\ + s0_ = _mm_unpacklo_ps(s0_,s1_); /*{s1b, s0b, s1a, s0a}*/\ + s1_ = _mm_unpackhi_ps(s2_,s3_); /*{s3d, s2d, s3c, s2c}*/\ + ATL_vadd(s0_, s0_, t0_); /*{s1bd, s0bd, s1ac, s0ac}*/\ + s2_ = _mm_unpacklo_ps(s2_,s3_); /*{s3b, s2b, s3a, s2a}*/\ + ATL_vadd(s2_, s2_, s1_); /*{s3bd, s2bd, s3ac, s2ac}*/\ + t0_ = _mm_shuffle_ps(s0_,s2_,0xEE); /*{s3bd, s2bd, s1bd, s0bd}*/\ + s0_ = _mm_shuffle_ps(s0_,s2_,0x44); /*{s3ac, s2ac, s1ac, s0ac}*/\ + ATL_vadd(s0_,s0_,t0_); /*{s3a-d,s2a-d,s1a-d,s0a-d}*/\ + } + #define ATL_vvrsum2(s0_, s1_) \ + { /*{sXd, sXc, sXb, sXa}*/ \ + ATL_VTYPE t0_; \ + t0_ = _mm_unpackhi_ps(s0_,s1_); /*{s1d, s0d, s1c, s0c}*/\ + s0_ = _mm_unpacklo_ps(s0_,s1_); /*{s1b, s0b, s1a, s0a}*/\ + ATL_vadd(s0_, s0_, t0_); /*{s1bd, s0bd, s1ac, s0ac}*/\ + t0_ = _mm_shuffle_ps(s0_,s0_,0xEE); /*{s1bd, s0bd, s1bd, s0bd}*/\ + s0_ = _mm_shuffle_ps(s0_,s0_,0x44); /*{s1ac, s0ac, s1ac, s0ac}*/\ + ATL_vadd(s0_,s0_,t0_); /*{s1a-d,s0a-d,s1a-d,s0a-d}*/\ + } + #define ATL_vvrsum1(s0_) \ + { /*{sXd, sXc, sXb, sXa}*/ \ + ATL_VTYPE t0_; \ + t0_ = _mm_unpackhi_ps(s0_,s0_); /*{s0d, s0d, s0c, s0c}*/\ + s0_ = _mm_unpacklo_ps(s0_,s0_); /*{s0b, s0b, s0a, s0a}*/\ + ATL_vadd(s0_, s0_, t0_); /*{s0bd, s0bd, s0ac, s0ac}*/\ + t0_ = _mm_shuffle_ps(s0_,s0_,0xEE); /*{s0bd, s0bd, s0bd, s0bd}*/\ + s0_ = _mm_shuffle_ps(s0_,s0_,0x44); /*{s0ac, s0ac, s0ac, s0ac}*/\ + ATL_vadd(s0_,s0_,t0_); /*{s0a-d,s0a-d,s0a-d,s0a-d}*/\ + } + #endif +#elif ATL_VLEN > 1 /* use gnuvec when atlas knows no VEC ISA */ + #define ATL_VTYPE TYPE __attribute__ ((vector_size (ATL_VLENb))) + #if defined(SREAL) || defined(SCPLX) + #define ATL_VITYPE int __attribute__ ((vector_size (ATL_VLENb))) + #else + #define ATL_VITYPE long long __attribute__ ((vector_size (ATL_VLENb))) + #endif + #define ATL_vzero(d_) d_ = (ATL_VTYPE)(((ATL_VITYPE)(d_))^((ATL_VITYPE)(d_))) + #define ATL_vcopy(d_, s_) d_ = s_ + #ifndef ATL_vbcast + #if 0 + #define ATL_vbcast(v_, p_) v_ = *((TYPE*)(p_)); + #elif 0 + #define ATL_vbcast(v_, p_) \ + { \ + (v_)[0] = p_; \ + v_ = __builtin_shuffle(v_, (ATL_VITYPE){0}); \ + } + #endif + #endif + #define ATL_vld(v_, p_) \ + v_ = *((ATL_VTYPE*)__builtin_assume_aligned(p_,ATL_VLENb)) + #define ATL_vust(p_, v_) *((ATL_VTYPE*)(p_)) = v_ + #define ATL_vst(p_, v_) \ + *((ATL_VTYPE*)__builtin_assume_aligned(p_,ATL_VLENb)) = v_ + #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_ + #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_ + #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_ + #define ATL_vmac(d_, s1_, s2_) d_ += s1_ * s2_ + #if ATL_VLEN == 1 + #define ATL_vbcast(v_, p_) v_ = *(p_) + #ifndef ATL_vuld + #define ATL_vuld(v_, p_) v_ = {*(p_)} + #endif + #ifndef ATL_vrsum1 + #define ATL_vrsum1(d_, s_) d_ = (s_) + #endif + #elif ATL_VLEN == 2 + #define ATL_vbcast(v_, p_) v_ = (ATL_VTYPE){*(p_), *(p_)} + #ifndef ATL_vuld + #define ATL_vuld(v_, p_) v_ = (ATL_VTYPE){*(p_), (p_)[1]} + #endif + #ifndef ATL_vrsum1 + #define ATL_vrsum1(d_, s_) d_ = ((s_)[0] + (s_)[1]) + #endif + #elif ATL_VLEN == 4 + #define ATL_vbcast(v_, p_) v_ = (ATL_VTYPE){*(p_), *(p_), *(p_), *(p_)} + #ifndef ATL_vuld + #define ATL_vuld(v_, p_) v_ = (ATL_VTYPE){*(p_),(p_)[1],(p_)[2],(p_)[3]} + #endif + #ifndef ATL_vrsum1 + #define ATL_vrsum1(d_, s_) d_ = ((s_)[0] + (s_)[1] + (s_)[2] + (s_)[3]) + #endif + #elif ATL_VLEN == 8 + #ifndef ATL_vuld + #define ATL_vuld(v_, p_) v_ = (ATL_VTYPE) \ + {*(p_), (p_)[1], (p_)[2], (p_)[3], (p_)[4], (p_)[5], (p_)[6], (p_)[7]} + #endif + #define ATL_vbcast(v_, p_) v_ = (ATL_VTYPE){*(p_), *(p_), *(p_), *(p_), \ + *(p_), *(p_), *(p_), *(p_)} + #ifndef ATL_vrsum1 + #define ATL_vrsum1(d_, s_) d_ = ((s_)[0] + (s_)[1] + (s_)[2] + (s_)[3] + \ + (s_)[4] + (s_)[5] + (s_)[6] + (s_)[7]) + #endif + #elif ATL_VLEN == 16 + #ifndef ATL_vuld + #define ATL_vuld(v_, p_) v_ = (ATL_VTYPE) \ + {*(p_),(p_)[1],(p_)[2],(p_)[3],(p_)[4],(p_)[5],(p_)[6],(p_)[7], \ + (p_)[8],(p_)[9],(p_)[10],(p_)[11],(p_)[12],(p_)[13],(p_)[14],(p_)[15]} + #endif + #ifndef ATL_vrsum1 + #define ATL_vrsum1(d_, s_) d_ = \ + ((s_)[0]+(s_)[1]+(s_)[2]+(s_)[3]+(s_)[4]+(s_)[5]+(s_)[6]+(s_)[7] +\ + (s_)[ 8]+(s_)[ 9]+(s_)[10]+(s_)[11]+(s_)[12]+(s_)[13]+(s_)[14]+(s_)[15]) + #endif + #elif ATL_VLEN == 32 + #ifndef ATL_vuld + #define ATL_vuld(v_, p_) v_ = (ATL_VTYPE) \ + {*(p_),(p_)[1],(p_)[2],(p_)[3],(p_)[4],(p_)[5],(p_)[6],(p_)[7], \ + (p_)[8],(p_)[9],(p_)[10],(p_)[11],(p_)[12],(p_)[13],(p_)[14],(p_)[15],\ + (p_)[16],(p_)[17],(p_)[18],(p_)[19],(p_)[20],(p_)[21],(p_)[22],(p_)[23],\ + (p_)[24],(p_)[25],(p_)[26],(p_)[27],(p_)[28],(p_)[29],(p_)[30],(p_)[31]} + #endif + #define ATL_vrsum1(d_, s_) d_ = \ + ((s_)[0]+(s_)[1]+(s_)[2]+(s_)[3]+(s_)[4]+(s_)[5]+(s_)[6]+(s_)[7] \ + +(s_)[ 8]+(s_)[ 9]+(s_)[10]+(s_)[11]+(s_)[12]+(s_)[13]+(s_)[14]+(s_)[15] \ + +(s_)[16]+(s_)[17]+(s_)[18]+(s_)[19]+(s_)[20]+(s_)[21]+(s_)[22]+(s_)[23] \ + +(s_)[24]+(s_)[25]+(s_)[26]+(s_)[27]+(s_)[28]+(s_)[29]+(s_)[30]+(s_)[31]) + #else + #error "Unsupported ATL_VLEN" + #endif +#else + #if defined(ATL_VLEN) && ATL_VLEN != 1 + #error "For systems without vector support, only ATL_VLEN=1 supported!" + #elif !defined(ATL_VLEN) + #define ATL_VLEN 1 + #endif + #define ATL_VTYPE TYPE + + #define ATL_vzero(d_) d_ = 0.0 + #define ATL_vcopy(d_, s_) d_ = s_ + #define ATL_vbcast(d_, p_) d_ = *(p_) + #define ATL_vuld(v_, p_) v_ = *(p_) + #define ATL_vld(v_, p_) v_ = *(p_) + #define ATL_vust(p_, s_) *(p_) = s_ + #define ATL_vst(p_, s_) *(p_) = s_ + #define ATL_vadd(d_, s1_, s2_) d_ = (s1_) + (s2_) + #define ATL_vsub(d_, s1_, s2_) d_ = (s1_) - (s2_) + #define ATL_vmul(d_, s1_, s2_) d_ = (s1_) * (s2_) + #define ATL_vmac(d_, s1_, s2_) d_ += (s1_) * (s2_) + #define ATL_vrsum1(d_, s_) d_ = s_ +#endif +/* + * If it isn't already defined (fast system-specific version), define vvrsumX + * This may be horribly slow or great, depending on how smart the compiler is. + */ +#if ATL_VLEN == 2 + #ifndef ATL_vvrsum1 + #define ATL_vvrsum1(s0_) s0_[0] += s0_[1] + #endif + #ifndef ATL_vvrsum2 + #define ATL_vvrsum2(s0_, s1_) \ + { \ + s0_[0] += s0_[1]; \ + s0_[1] = s1_[0] + s1_[1]; \ + } + #endif +#endif +#if ATL_VLEN == 4 + #ifndef ATL_vvrsum1 + #define ATL_vvrsum1(s0_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]; \ + } + #endif + #ifndef ATL_vvrsum2 + #define ATL_vvrsum2(s0_, s1_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]; \ + } + #endif + #ifndef ATL_vvrsum4 + #define ATL_vvrsum4(s0_, s1_, s2_, s3_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]; \ + s0_[2] = s2_[0]+s2_[1]+s2_[2]+s2_[3]; \ + s0_[3] = s3_[0]+s3_[1]+s3_[2]+s3_[3]; \ + } + #endif +#endif +#if ATL_VLEN == 8 + #ifndef ATL_vvrsum1 + #define ATL_vvrsum1(s0_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]; \ + } + #endif + #ifndef ATL_vvrsum2 + #define ATL_vvrsum2(s0_, s1_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]; \ + } + #endif + #ifndef ATL_vvrsum4 + #define ATL_vvrsum4(s0_, s1_, s2_, s3_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]; \ + s0_[2] = s2_[0]+s2_[1]+s2_[2]+s2_[3]+s2_[4]+s2_[5]+s2_[6]+s2_[7]; \ + s0_[3] = s3_[0]+s3_[1]+s3_[2]+s3_[3]+s3_[4]+s3_[5]+s3_[6]+s3_[7]; \ + } + #endif + #ifndef ATL_vvrsum8 + #define ATL_vvrsum8(s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]; \ + s0_[2] = s2_[0]+s2_[1]+s2_[2]+s2_[3]+s2_[4]+s2_[5]+s2_[6]+s2_[7]; \ + s0_[3] = s3_[0]+s3_[1]+s3_[2]+s3_[3]+s3_[4]+s3_[5]+s3_[6]+s3_[7]; \ + s0_[4] = s4_[0]+s4_[1]+s4_[2]+s4_[3]+s4_[4]+s4_[5]+s4_[6]+s4_[7]; \ + s0_[5] = s5_[0]+s5_[1]+s5_[2]+s5_[3]+s5_[4]+s5_[5]+s5_[6]+s5_[7]; \ + s0_[6] = s6_[0]+s6_[1]+s6_[2]+s6_[3]+s6_[4]+s6_[5]+s6_[6]+s6_[7]; \ + s0_[7] = s7_[0]+s7_[1]+s7_[2]+s7_[3]+s7_[4]+s7_[5]+s7_[6]+s7_[7]; \ + } + #endif +#endif +#if ATL_VLEN == 16 + #ifndef ATL_vvrsum1 + #define ATL_vvrsum1(s0_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]; \ + } + #endif + #ifndef ATL_vvrsum2 + #define ATL_vvrsum2(s0_, s1_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]+s1_[8]+s1_[9]+s1_[10]+s1_[11]+s1_[12]+s1_[13]+s1_[14]+s1_[15]; \ + } + #endif + #ifndef ATL_vvrsum4 + #define ATL_vvrsum4(s0_, s1_, s2_, s3_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]+s1_[8]+s1_[9]+s1_[10]+s1_[11]+s1_[12]+s1_[13]+s1_[14]+s1_[15]; \ + s0_[2] = s2_[0]+s2_[1]+s2_[2]+s2_[3]+s2_[4]+s2_[5]+s2_[6]+s2_[7]+s2_[8]+s2_[9]+s2_[10]+s2_[11]+s2_[12]+s2_[13]+s2_[14]+s2_[15]; \ + s0_[3] = s3_[0]+s3_[1]+s3_[2]+s3_[3]+s3_[4]+s3_[5]+s3_[6]+s3_[7]+s3_[8]+s3_[9]+s3_[10]+s3_[11]+s3_[12]+s3_[13]+s3_[14]+s3_[15]; \ + } + #endif + #ifndef ATL_vvrsum8 + #define ATL_vvrsum8(s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]+s1_[8]+s1_[9]+s1_[10]+s1_[11]+s1_[12]+s1_[13]+s1_[14]+s1_[15]; \ + s0_[2] = s2_[0]+s2_[1]+s2_[2]+s2_[3]+s2_[4]+s2_[5]+s2_[6]+s2_[7]+s2_[8]+s2_[9]+s2_[10]+s2_[11]+s2_[12]+s2_[13]+s2_[14]+s2_[15]; \ + s0_[3] = s3_[0]+s3_[1]+s3_[2]+s3_[3]+s3_[4]+s3_[5]+s3_[6]+s3_[7]+s3_[8]+s3_[9]+s3_[10]+s3_[11]+s3_[12]+s3_[13]+s3_[14]+s3_[15]; \ + s0_[4] = s4_[0]+s4_[1]+s4_[2]+s4_[3]+s4_[4]+s4_[5]+s4_[6]+s4_[7]+s4_[8]+s4_[9]+s4_[10]+s4_[11]+s4_[12]+s4_[13]+s4_[14]+s4_[15]; \ + s0_[5] = s5_[0]+s5_[1]+s5_[2]+s5_[3]+s5_[4]+s5_[5]+s5_[6]+s5_[7]+s5_[8]+s5_[9]+s5_[10]+s5_[11]+s5_[12]+s5_[13]+s5_[14]+s5_[15]; \ + s0_[6] = s6_[0]+s6_[1]+s6_[2]+s6_[3]+s6_[4]+s6_[5]+s6_[6]+s6_[7]+s6_[8]+s6_[9]+s6_[10]+s6_[11]+s6_[12]+s6_[13]+s6_[14]+s6_[15]; \ + s0_[7] = s7_[0]+s7_[1]+s7_[2]+s7_[3]+s7_[4]+s7_[5]+s7_[6]+s7_[7]+s7_[8]+s7_[9]+s7_[10]+s7_[11]+s7_[12]+s7_[13]+s7_[14]+s7_[15]; \ + } + #endif + #ifndef ATL_vvrsum16 + #define ATL_vvrsum16(s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_, s8_, s9_, s10_, s11_, s12_, s13_, s14_, s15_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]+s1_[8]+s1_[9]+s1_[10]+s1_[11]+s1_[12]+s1_[13]+s1_[14]+s1_[15]; \ + s0_[2] = s2_[0]+s2_[1]+s2_[2]+s2_[3]+s2_[4]+s2_[5]+s2_[6]+s2_[7]+s2_[8]+s2_[9]+s2_[10]+s2_[11]+s2_[12]+s2_[13]+s2_[14]+s2_[15]; \ + s0_[3] = s3_[0]+s3_[1]+s3_[2]+s3_[3]+s3_[4]+s3_[5]+s3_[6]+s3_[7]+s3_[8]+s3_[9]+s3_[10]+s3_[11]+s3_[12]+s3_[13]+s3_[14]+s3_[15]; \ + s0_[4] = s4_[0]+s4_[1]+s4_[2]+s4_[3]+s4_[4]+s4_[5]+s4_[6]+s4_[7]+s4_[8]+s4_[9]+s4_[10]+s4_[11]+s4_[12]+s4_[13]+s4_[14]+s4_[15]; \ + s0_[5] = s5_[0]+s5_[1]+s5_[2]+s5_[3]+s5_[4]+s5_[5]+s5_[6]+s5_[7]+s5_[8]+s5_[9]+s5_[10]+s5_[11]+s5_[12]+s5_[13]+s5_[14]+s5_[15]; \ + s0_[6] = s6_[0]+s6_[1]+s6_[2]+s6_[3]+s6_[4]+s6_[5]+s6_[6]+s6_[7]+s6_[8]+s6_[9]+s6_[10]+s6_[11]+s6_[12]+s6_[13]+s6_[14]+s6_[15]; \ + s0_[7] = s7_[0]+s7_[1]+s7_[2]+s7_[3]+s7_[4]+s7_[5]+s7_[6]+s7_[7]+s7_[8]+s7_[9]+s7_[10]+s7_[11]+s7_[12]+s7_[13]+s7_[14]+s7_[15]; \ + s0_[8] = s8_[0]+s8_[1]+s8_[2]+s8_[3]+s8_[4]+s8_[5]+s8_[6]+s8_[7]+s8_[8]+s8_[9]+s8_[10]+s8_[11]+s8_[12]+s8_[13]+s8_[14]+s8_[15]; \ + s0_[9] = s9_[0]+s9_[1]+s9_[2]+s9_[3]+s9_[4]+s9_[5]+s9_[6]+s9_[7]+s9_[8]+s9_[9]+s9_[10]+s9_[11]+s9_[12]+s9_[13]+s9_[14]+s9_[15]; \ + s0_[10] = s10_[0]+s10_[1]+s10_[2]+s10_[3]+s10_[4]+s10_[5]+s10_[6]+s10_[7]+s10_[8]+s10_[9]+s10_[10]+s10_[11]+s10_[12]+s10_[13]+s10_[14]+s10_[15]; \ + s0_[11] = s11_[0]+s11_[1]+s11_[2]+s11_[3]+s11_[4]+s11_[5]+s11_[6]+s11_[7]+s11_[8]+s11_[9]+s11_[10]+s11_[11]+s11_[12]+s11_[13]+s11_[14]+s11_[15]; \ + s0_[12] = s12_[0]+s12_[1]+s12_[2]+s12_[3]+s12_[4]+s12_[5]+s12_[6]+s12_[7]+s12_[8]+s12_[9]+s12_[10]+s12_[11]+s12_[12]+s12_[13]+s12_[14]+s12_[15]; \ + s0_[13] = s13_[0]+s13_[1]+s13_[2]+s13_[3]+s13_[4]+s13_[5]+s13_[6]+s13_[7]+s13_[8]+s13_[9]+s13_[10]+s13_[11]+s13_[12]+s13_[13]+s13_[14]+s13_[15]; \ + s0_[14] = s14_[0]+s14_[1]+s14_[2]+s14_[3]+s14_[4]+s14_[5]+s14_[6]+s14_[7]+s14_[8]+s14_[9]+s14_[10]+s14_[11]+s14_[12]+s14_[13]+s14_[14]+s14_[15]; \ + s0_[15] = s15_[0]+s15_[1]+s15_[2]+s15_[3]+s15_[4]+s15_[5]+s15_[6]+s15_[7]+s15_[8]+s15_[9]+s15_[10]+s15_[11]+s15_[12]+s15_[13]+s15_[14]+s15_[15]; \ + } + #endif +#endif +#if ATL_VLEN == 32 + #ifndef ATL_vvrsum1 + #define ATL_vvrsum1(s0_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]+s0_[16]+s0_[17]+s0_[18]+s0_[19]+s0_[20]+s0_[21]+s0_[22]+s0_[23]+s0_[24]+s0_[25]+s0_[26]+s0_[27]+s0_[28]+s0_[29]+s0_[30]+s0_[31]; \ + } + #endif + #ifndef ATL_vvrsum2 + #define ATL_vvrsum2(s0_, s1_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]+s0_[16]+s0_[17]+s0_[18]+s0_[19]+s0_[20]+s0_[21]+s0_[22]+s0_[23]+s0_[24]+s0_[25]+s0_[26]+s0_[27]+s0_[28]+s0_[29]+s0_[30]+s0_[31]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]+s1_[8]+s1_[9]+s1_[10]+s1_[11]+s1_[12]+s1_[13]+s1_[14]+s1_[15]+s1_[16]+s1_[17]+s1_[18]+s1_[19]+s1_[20]+s1_[21]+s1_[22]+s1_[23]+s1_[24]+s1_[25]+s1_[26]+s1_[27]+s1_[28]+s1_[29]+s1_[30]+s1_[31]; \ + } + #endif + #ifndef ATL_vvrsum4 + #define ATL_vvrsum4(s0_, s1_, s2_, s3_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]+s0_[16]+s0_[17]+s0_[18]+s0_[19]+s0_[20]+s0_[21]+s0_[22]+s0_[23]+s0_[24]+s0_[25]+s0_[26]+s0_[27]+s0_[28]+s0_[29]+s0_[30]+s0_[31]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]+s1_[8]+s1_[9]+s1_[10]+s1_[11]+s1_[12]+s1_[13]+s1_[14]+s1_[15]+s1_[16]+s1_[17]+s1_[18]+s1_[19]+s1_[20]+s1_[21]+s1_[22]+s1_[23]+s1_[24]+s1_[25]+s1_[26]+s1_[27]+s1_[28]+s1_[29]+s1_[30]+s1_[31]; \ + s0_[2] = s2_[0]+s2_[1]+s2_[2]+s2_[3]+s2_[4]+s2_[5]+s2_[6]+s2_[7]+s2_[8]+s2_[9]+s2_[10]+s2_[11]+s2_[12]+s2_[13]+s2_[14]+s2_[15]+s2_[16]+s2_[17]+s2_[18]+s2_[19]+s2_[20]+s2_[21]+s2_[22]+s2_[23]+s2_[24]+s2_[25]+s2_[26]+s2_[27]+s2_[28]+s2_[29]+s2_[30]+s2_[31]; \ + s0_[3] = s3_[0]+s3_[1]+s3_[2]+s3_[3]+s3_[4]+s3_[5]+s3_[6]+s3_[7]+s3_[8]+s3_[9]+s3_[10]+s3_[11]+s3_[12]+s3_[13]+s3_[14]+s3_[15]+s3_[16]+s3_[17]+s3_[18]+s3_[19]+s3_[20]+s3_[21]+s3_[22]+s3_[23]+s3_[24]+s3_[25]+s3_[26]+s3_[27]+s3_[28]+s3_[29]+s3_[30]+s3_[31]; \ + } + #endif + #ifndef ATL_vvrsum8 + #define ATL_vvrsum8(s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]+s0_[16]+s0_[17]+s0_[18]+s0_[19]+s0_[20]+s0_[21]+s0_[22]+s0_[23]+s0_[24]+s0_[25]+s0_[26]+s0_[27]+s0_[28]+s0_[29]+s0_[30]+s0_[31]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]+s1_[8]+s1_[9]+s1_[10]+s1_[11]+s1_[12]+s1_[13]+s1_[14]+s1_[15]+s1_[16]+s1_[17]+s1_[18]+s1_[19]+s1_[20]+s1_[21]+s1_[22]+s1_[23]+s1_[24]+s1_[25]+s1_[26]+s1_[27]+s1_[28]+s1_[29]+s1_[30]+s1_[31]; \ + s0_[2] = s2_[0]+s2_[1]+s2_[2]+s2_[3]+s2_[4]+s2_[5]+s2_[6]+s2_[7]+s2_[8]+s2_[9]+s2_[10]+s2_[11]+s2_[12]+s2_[13]+s2_[14]+s2_[15]+s2_[16]+s2_[17]+s2_[18]+s2_[19]+s2_[20]+s2_[21]+s2_[22]+s2_[23]+s2_[24]+s2_[25]+s2_[26]+s2_[27]+s2_[28]+s2_[29]+s2_[30]+s2_[31]; \ + s0_[3] = s3_[0]+s3_[1]+s3_[2]+s3_[3]+s3_[4]+s3_[5]+s3_[6]+s3_[7]+s3_[8]+s3_[9]+s3_[10]+s3_[11]+s3_[12]+s3_[13]+s3_[14]+s3_[15]+s3_[16]+s3_[17]+s3_[18]+s3_[19]+s3_[20]+s3_[21]+s3_[22]+s3_[23]+s3_[24]+s3_[25]+s3_[26]+s3_[27]+s3_[28]+s3_[29]+s3_[30]+s3_[31]; \ + s0_[4] = s4_[0]+s4_[1]+s4_[2]+s4_[3]+s4_[4]+s4_[5]+s4_[6]+s4_[7]+s4_[8]+s4_[9]+s4_[10]+s4_[11]+s4_[12]+s4_[13]+s4_[14]+s4_[15]+s4_[16]+s4_[17]+s4_[18]+s4_[19]+s4_[20]+s4_[21]+s4_[22]+s4_[23]+s4_[24]+s4_[25]+s4_[26]+s4_[27]+s4_[28]+s4_[29]+s4_[30]+s4_[31]; \ + s0_[5] = s5_[0]+s5_[1]+s5_[2]+s5_[3]+s5_[4]+s5_[5]+s5_[6]+s5_[7]+s5_[8]+s5_[9]+s5_[10]+s5_[11]+s5_[12]+s5_[13]+s5_[14]+s5_[15]+s5_[16]+s5_[17]+s5_[18]+s5_[19]+s5_[20]+s5_[21]+s5_[22]+s5_[23]+s5_[24]+s5_[25]+s5_[26]+s5_[27]+s5_[28]+s5_[29]+s5_[30]+s5_[31]; \ + s0_[6] = s6_[0]+s6_[1]+s6_[2]+s6_[3]+s6_[4]+s6_[5]+s6_[6]+s6_[7]+s6_[8]+s6_[9]+s6_[10]+s6_[11]+s6_[12]+s6_[13]+s6_[14]+s6_[15]+s6_[16]+s6_[17]+s6_[18]+s6_[19]+s6_[20]+s6_[21]+s6_[22]+s6_[23]+s6_[24]+s6_[25]+s6_[26]+s6_[27]+s6_[28]+s6_[29]+s6_[30]+s6_[31]; \ + s0_[7] = s7_[0]+s7_[1]+s7_[2]+s7_[3]+s7_[4]+s7_[5]+s7_[6]+s7_[7]+s7_[8]+s7_[9]+s7_[10]+s7_[11]+s7_[12]+s7_[13]+s7_[14]+s7_[15]+s7_[16]+s7_[17]+s7_[18]+s7_[19]+s7_[20]+s7_[21]+s7_[22]+s7_[23]+s7_[24]+s7_[25]+s7_[26]+s7_[27]+s7_[28]+s7_[29]+s7_[30]+s7_[31]; \ + } + #endif + #ifndef ATL_vvrsum16 + #define ATL_vvrsum16(s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_, s8_, s9_, s10_, s11_, s12_, s13_, s14_, s15_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]+s0_[16]+s0_[17]+s0_[18]+s0_[19]+s0_[20]+s0_[21]+s0_[22]+s0_[23]+s0_[24]+s0_[25]+s0_[26]+s0_[27]+s0_[28]+s0_[29]+s0_[30]+s0_[31]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]+s1_[8]+s1_[9]+s1_[10]+s1_[11]+s1_[12]+s1_[13]+s1_[14]+s1_[15]+s1_[16]+s1_[17]+s1_[18]+s1_[19]+s1_[20]+s1_[21]+s1_[22]+s1_[23]+s1_[24]+s1_[25]+s1_[26]+s1_[27]+s1_[28]+s1_[29]+s1_[30]+s1_[31]; \ + s0_[2] = s2_[0]+s2_[1]+s2_[2]+s2_[3]+s2_[4]+s2_[5]+s2_[6]+s2_[7]+s2_[8]+s2_[9]+s2_[10]+s2_[11]+s2_[12]+s2_[13]+s2_[14]+s2_[15]+s2_[16]+s2_[17]+s2_[18]+s2_[19]+s2_[20]+s2_[21]+s2_[22]+s2_[23]+s2_[24]+s2_[25]+s2_[26]+s2_[27]+s2_[28]+s2_[29]+s2_[30]+s2_[31]; \ + s0_[3] = s3_[0]+s3_[1]+s3_[2]+s3_[3]+s3_[4]+s3_[5]+s3_[6]+s3_[7]+s3_[8]+s3_[9]+s3_[10]+s3_[11]+s3_[12]+s3_[13]+s3_[14]+s3_[15]+s3_[16]+s3_[17]+s3_[18]+s3_[19]+s3_[20]+s3_[21]+s3_[22]+s3_[23]+s3_[24]+s3_[25]+s3_[26]+s3_[27]+s3_[28]+s3_[29]+s3_[30]+s3_[31]; \ + s0_[4] = s4_[0]+s4_[1]+s4_[2]+s4_[3]+s4_[4]+s4_[5]+s4_[6]+s4_[7]+s4_[8]+s4_[9]+s4_[10]+s4_[11]+s4_[12]+s4_[13]+s4_[14]+s4_[15]+s4_[16]+s4_[17]+s4_[18]+s4_[19]+s4_[20]+s4_[21]+s4_[22]+s4_[23]+s4_[24]+s4_[25]+s4_[26]+s4_[27]+s4_[28]+s4_[29]+s4_[30]+s4_[31]; \ + s0_[5] = s5_[0]+s5_[1]+s5_[2]+s5_[3]+s5_[4]+s5_[5]+s5_[6]+s5_[7]+s5_[8]+s5_[9]+s5_[10]+s5_[11]+s5_[12]+s5_[13]+s5_[14]+s5_[15]+s5_[16]+s5_[17]+s5_[18]+s5_[19]+s5_[20]+s5_[21]+s5_[22]+s5_[23]+s5_[24]+s5_[25]+s5_[26]+s5_[27]+s5_[28]+s5_[29]+s5_[30]+s5_[31]; \ + s0_[6] = s6_[0]+s6_[1]+s6_[2]+s6_[3]+s6_[4]+s6_[5]+s6_[6]+s6_[7]+s6_[8]+s6_[9]+s6_[10]+s6_[11]+s6_[12]+s6_[13]+s6_[14]+s6_[15]+s6_[16]+s6_[17]+s6_[18]+s6_[19]+s6_[20]+s6_[21]+s6_[22]+s6_[23]+s6_[24]+s6_[25]+s6_[26]+s6_[27]+s6_[28]+s6_[29]+s6_[30]+s6_[31]; \ + s0_[7] = s7_[0]+s7_[1]+s7_[2]+s7_[3]+s7_[4]+s7_[5]+s7_[6]+s7_[7]+s7_[8]+s7_[9]+s7_[10]+s7_[11]+s7_[12]+s7_[13]+s7_[14]+s7_[15]+s7_[16]+s7_[17]+s7_[18]+s7_[19]+s7_[20]+s7_[21]+s7_[22]+s7_[23]+s7_[24]+s7_[25]+s7_[26]+s7_[27]+s7_[28]+s7_[29]+s7_[30]+s7_[31]; \ + s0_[8] = s8_[0]+s8_[1]+s8_[2]+s8_[3]+s8_[4]+s8_[5]+s8_[6]+s8_[7]+s8_[8]+s8_[9]+s8_[10]+s8_[11]+s8_[12]+s8_[13]+s8_[14]+s8_[15]+s8_[16]+s8_[17]+s8_[18]+s8_[19]+s8_[20]+s8_[21]+s8_[22]+s8_[23]+s8_[24]+s8_[25]+s8_[26]+s8_[27]+s8_[28]+s8_[29]+s8_[30]+s8_[31]; \ + s0_[9] = s9_[0]+s9_[1]+s9_[2]+s9_[3]+s9_[4]+s9_[5]+s9_[6]+s9_[7]+s9_[8]+s9_[9]+s9_[10]+s9_[11]+s9_[12]+s9_[13]+s9_[14]+s9_[15]+s9_[16]+s9_[17]+s9_[18]+s9_[19]+s9_[20]+s9_[21]+s9_[22]+s9_[23]+s9_[24]+s9_[25]+s9_[26]+s9_[27]+s9_[28]+s9_[29]+s9_[30]+s9_[31]; \ + s0_[10] = s10_[0]+s10_[1]+s10_[2]+s10_[3]+s10_[4]+s10_[5]+s10_[6]+s10_[7]+s10_[8]+s10_[9]+s10_[10]+s10_[11]+s10_[12]+s10_[13]+s10_[14]+s10_[15]+s10_[16]+s10_[17]+s10_[18]+s10_[19]+s10_[20]+s10_[21]+s10_[22]+s10_[23]+s10_[24]+s10_[25]+s10_[26]+s10_[27]+s10_[28]+s10_[29]+s10_[30]+s10_[31]; \ + s0_[11] = s11_[0]+s11_[1]+s11_[2]+s11_[3]+s11_[4]+s11_[5]+s11_[6]+s11_[7]+s11_[8]+s11_[9]+s11_[10]+s11_[11]+s11_[12]+s11_[13]+s11_[14]+s11_[15]+s11_[16]+s11_[17]+s11_[18]+s11_[19]+s11_[20]+s11_[21]+s11_[22]+s11_[23]+s11_[24]+s11_[25]+s11_[26]+s11_[27]+s11_[28]+s11_[29]+s11_[30]+s11_[31]; \ + s0_[12] = s12_[0]+s12_[1]+s12_[2]+s12_[3]+s12_[4]+s12_[5]+s12_[6]+s12_[7]+s12_[8]+s12_[9]+s12_[10]+s12_[11]+s12_[12]+s12_[13]+s12_[14]+s12_[15]+s12_[16]+s12_[17]+s12_[18]+s12_[19]+s12_[20]+s12_[21]+s12_[22]+s12_[23]+s12_[24]+s12_[25]+s12_[26]+s12_[27]+s12_[28]+s12_[29]+s12_[30]+s12_[31]; \ + s0_[13] = s13_[0]+s13_[1]+s13_[2]+s13_[3]+s13_[4]+s13_[5]+s13_[6]+s13_[7]+s13_[8]+s13_[9]+s13_[10]+s13_[11]+s13_[12]+s13_[13]+s13_[14]+s13_[15]+s13_[16]+s13_[17]+s13_[18]+s13_[19]+s13_[20]+s13_[21]+s13_[22]+s13_[23]+s13_[24]+s13_[25]+s13_[26]+s13_[27]+s13_[28]+s13_[29]+s13_[30]+s13_[31]; \ + s0_[14] = s14_[0]+s14_[1]+s14_[2]+s14_[3]+s14_[4]+s14_[5]+s14_[6]+s14_[7]+s14_[8]+s14_[9]+s14_[10]+s14_[11]+s14_[12]+s14_[13]+s14_[14]+s14_[15]+s14_[16]+s14_[17]+s14_[18]+s14_[19]+s14_[20]+s14_[21]+s14_[22]+s14_[23]+s14_[24]+s14_[25]+s14_[26]+s14_[27]+s14_[28]+s14_[29]+s14_[30]+s14_[31]; \ + s0_[15] = s15_[0]+s15_[1]+s15_[2]+s15_[3]+s15_[4]+s15_[5]+s15_[6]+s15_[7]+s15_[8]+s15_[9]+s15_[10]+s15_[11]+s15_[12]+s15_[13]+s15_[14]+s15_[15]+s15_[16]+s15_[17]+s15_[18]+s15_[19]+s15_[20]+s15_[21]+s15_[22]+s15_[23]+s15_[24]+s15_[25]+s15_[26]+s15_[27]+s15_[28]+s15_[29]+s15_[30]+s15_[31]; \ + } + #endif + #ifndef ATL_vvrsum32 + #define ATL_vvrsum32(s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_, s8_, s9_, s10_, s11_, s12_, s13_, s14_, s15_, s16_, s17_, s18_, s19_, s20_, s21_, s22_, s23_, s24_, s25_, s26_, s27_, s28_, s29_, s30_, s31_)\ + { \ + s0_[0] = s0_[0]+s0_[1]+s0_[2]+s0_[3]+s0_[4]+s0_[5]+s0_[6]+s0_[7]+s0_[8]+s0_[9]+s0_[10]+s0_[11]+s0_[12]+s0_[13]+s0_[14]+s0_[15]+s0_[16]+s0_[17]+s0_[18]+s0_[19]+s0_[20]+s0_[21]+s0_[22]+s0_[23]+s0_[24]+s0_[25]+s0_[26]+s0_[27]+s0_[28]+s0_[29]+s0_[30]+s0_[31]; \ + s0_[1] = s1_[0]+s1_[1]+s1_[2]+s1_[3]+s1_[4]+s1_[5]+s1_[6]+s1_[7]+s1_[8]+s1_[9]+s1_[10]+s1_[11]+s1_[12]+s1_[13]+s1_[14]+s1_[15]+s1_[16]+s1_[17]+s1_[18]+s1_[19]+s1_[20]+s1_[21]+s1_[22]+s1_[23]+s1_[24]+s1_[25]+s1_[26]+s1_[27]+s1_[28]+s1_[29]+s1_[30]+s1_[31]; \ + s0_[2] = s2_[0]+s2_[1]+s2_[2]+s2_[3]+s2_[4]+s2_[5]+s2_[6]+s2_[7]+s2_[8]+s2_[9]+s2_[10]+s2_[11]+s2_[12]+s2_[13]+s2_[14]+s2_[15]+s2_[16]+s2_[17]+s2_[18]+s2_[19]+s2_[20]+s2_[21]+s2_[22]+s2_[23]+s2_[24]+s2_[25]+s2_[26]+s2_[27]+s2_[28]+s2_[29]+s2_[30]+s2_[31]; \ + s0_[3] = s3_[0]+s3_[1]+s3_[2]+s3_[3]+s3_[4]+s3_[5]+s3_[6]+s3_[7]+s3_[8]+s3_[9]+s3_[10]+s3_[11]+s3_[12]+s3_[13]+s3_[14]+s3_[15]+s3_[16]+s3_[17]+s3_[18]+s3_[19]+s3_[20]+s3_[21]+s3_[22]+s3_[23]+s3_[24]+s3_[25]+s3_[26]+s3_[27]+s3_[28]+s3_[29]+s3_[30]+s3_[31]; \ + s0_[4] = s4_[0]+s4_[1]+s4_[2]+s4_[3]+s4_[4]+s4_[5]+s4_[6]+s4_[7]+s4_[8]+s4_[9]+s4_[10]+s4_[11]+s4_[12]+s4_[13]+s4_[14]+s4_[15]+s4_[16]+s4_[17]+s4_[18]+s4_[19]+s4_[20]+s4_[21]+s4_[22]+s4_[23]+s4_[24]+s4_[25]+s4_[26]+s4_[27]+s4_[28]+s4_[29]+s4_[30]+s4_[31]; \ + s0_[5] = s5_[0]+s5_[1]+s5_[2]+s5_[3]+s5_[4]+s5_[5]+s5_[6]+s5_[7]+s5_[8]+s5_[9]+s5_[10]+s5_[11]+s5_[12]+s5_[13]+s5_[14]+s5_[15]+s5_[16]+s5_[17]+s5_[18]+s5_[19]+s5_[20]+s5_[21]+s5_[22]+s5_[23]+s5_[24]+s5_[25]+s5_[26]+s5_[27]+s5_[28]+s5_[29]+s5_[30]+s5_[31]; \ + s0_[6] = s6_[0]+s6_[1]+s6_[2]+s6_[3]+s6_[4]+s6_[5]+s6_[6]+s6_[7]+s6_[8]+s6_[9]+s6_[10]+s6_[11]+s6_[12]+s6_[13]+s6_[14]+s6_[15]+s6_[16]+s6_[17]+s6_[18]+s6_[19]+s6_[20]+s6_[21]+s6_[22]+s6_[23]+s6_[24]+s6_[25]+s6_[26]+s6_[27]+s6_[28]+s6_[29]+s6_[30]+s6_[31]; \ + s0_[7] = s7_[0]+s7_[1]+s7_[2]+s7_[3]+s7_[4]+s7_[5]+s7_[6]+s7_[7]+s7_[8]+s7_[9]+s7_[10]+s7_[11]+s7_[12]+s7_[13]+s7_[14]+s7_[15]+s7_[16]+s7_[17]+s7_[18]+s7_[19]+s7_[20]+s7_[21]+s7_[22]+s7_[23]+s7_[24]+s7_[25]+s7_[26]+s7_[27]+s7_[28]+s7_[29]+s7_[30]+s7_[31]; \ + s0_[8] = s8_[0]+s8_[1]+s8_[2]+s8_[3]+s8_[4]+s8_[5]+s8_[6]+s8_[7]+s8_[8]+s8_[9]+s8_[10]+s8_[11]+s8_[12]+s8_[13]+s8_[14]+s8_[15]+s8_[16]+s8_[17]+s8_[18]+s8_[19]+s8_[20]+s8_[21]+s8_[22]+s8_[23]+s8_[24]+s8_[25]+s8_[26]+s8_[27]+s8_[28]+s8_[29]+s8_[30]+s8_[31]; \ + s0_[9] = s9_[0]+s9_[1]+s9_[2]+s9_[3]+s9_[4]+s9_[5]+s9_[6]+s9_[7]+s9_[8]+s9_[9]+s9_[10]+s9_[11]+s9_[12]+s9_[13]+s9_[14]+s9_[15]+s9_[16]+s9_[17]+s9_[18]+s9_[19]+s9_[20]+s9_[21]+s9_[22]+s9_[23]+s9_[24]+s9_[25]+s9_[26]+s9_[27]+s9_[28]+s9_[29]+s9_[30]+s9_[31]; \ + s0_[10] = s10_[0]+s10_[1]+s10_[2]+s10_[3]+s10_[4]+s10_[5]+s10_[6]+s10_[7]+s10_[8]+s10_[9]+s10_[10]+s10_[11]+s10_[12]+s10_[13]+s10_[14]+s10_[15]+s10_[16]+s10_[17]+s10_[18]+s10_[19]+s10_[20]+s10_[21]+s10_[22]+s10_[23]+s10_[24]+s10_[25]+s10_[26]+s10_[27]+s10_[28]+s10_[29]+s10_[30]+s10_[31]; \ + s0_[11] = s11_[0]+s11_[1]+s11_[2]+s11_[3]+s11_[4]+s11_[5]+s11_[6]+s11_[7]+s11_[8]+s11_[9]+s11_[10]+s11_[11]+s11_[12]+s11_[13]+s11_[14]+s11_[15]+s11_[16]+s11_[17]+s11_[18]+s11_[19]+s11_[20]+s11_[21]+s11_[22]+s11_[23]+s11_[24]+s11_[25]+s11_[26]+s11_[27]+s11_[28]+s11_[29]+s11_[30]+s11_[31]; \ + s0_[12] = s12_[0]+s12_[1]+s12_[2]+s12_[3]+s12_[4]+s12_[5]+s12_[6]+s12_[7]+s12_[8]+s12_[9]+s12_[10]+s12_[11]+s12_[12]+s12_[13]+s12_[14]+s12_[15]+s12_[16]+s12_[17]+s12_[18]+s12_[19]+s12_[20]+s12_[21]+s12_[22]+s12_[23]+s12_[24]+s12_[25]+s12_[26]+s12_[27]+s12_[28]+s12_[29]+s12_[30]+s12_[31]; \ + s0_[13] = s13_[0]+s13_[1]+s13_[2]+s13_[3]+s13_[4]+s13_[5]+s13_[6]+s13_[7]+s13_[8]+s13_[9]+s13_[10]+s13_[11]+s13_[12]+s13_[13]+s13_[14]+s13_[15]+s13_[16]+s13_[17]+s13_[18]+s13_[19]+s13_[20]+s13_[21]+s13_[22]+s13_[23]+s13_[24]+s13_[25]+s13_[26]+s13_[27]+s13_[28]+s13_[29]+s13_[30]+s13_[31]; \ + s0_[14] = s14_[0]+s14_[1]+s14_[2]+s14_[3]+s14_[4]+s14_[5]+s14_[6]+s14_[7]+s14_[8]+s14_[9]+s14_[10]+s14_[11]+s14_[12]+s14_[13]+s14_[14]+s14_[15]+s14_[16]+s14_[17]+s14_[18]+s14_[19]+s14_[20]+s14_[21]+s14_[22]+s14_[23]+s14_[24]+s14_[25]+s14_[26]+s14_[27]+s14_[28]+s14_[29]+s14_[30]+s14_[31]; \ + s0_[15] = s15_[0]+s15_[1]+s15_[2]+s15_[3]+s15_[4]+s15_[5]+s15_[6]+s15_[7]+s15_[8]+s15_[9]+s15_[10]+s15_[11]+s15_[12]+s15_[13]+s15_[14]+s15_[15]+s15_[16]+s15_[17]+s15_[18]+s15_[19]+s15_[20]+s15_[21]+s15_[22]+s15_[23]+s15_[24]+s15_[25]+s15_[26]+s15_[27]+s15_[28]+s15_[29]+s15_[30]+s15_[31]; \ + s0_[16] = s16_[0]+s16_[1]+s16_[2]+s16_[3]+s16_[4]+s16_[5]+s16_[6]+s16_[7]+s16_[8]+s16_[9]+s16_[10]+s16_[11]+s16_[12]+s16_[13]+s16_[14]+s16_[15]+s16_[16]+s16_[17]+s16_[18]+s16_[19]+s16_[20]+s16_[21]+s16_[22]+s16_[23]+s16_[24]+s16_[25]+s16_[26]+s16_[27]+s16_[28]+s16_[29]+s16_[30]+s16_[31]; \ + s0_[17] = s17_[0]+s17_[1]+s17_[2]+s17_[3]+s17_[4]+s17_[5]+s17_[6]+s17_[7]+s17_[8]+s17_[9]+s17_[10]+s17_[11]+s17_[12]+s17_[13]+s17_[14]+s17_[15]+s17_[16]+s17_[17]+s17_[18]+s17_[19]+s17_[20]+s17_[21]+s17_[22]+s17_[23]+s17_[24]+s17_[25]+s17_[26]+s17_[27]+s17_[28]+s17_[29]+s17_[30]+s17_[31]; \ + s0_[18] = s18_[0]+s18_[1]+s18_[2]+s18_[3]+s18_[4]+s18_[5]+s18_[6]+s18_[7]+s18_[8]+s18_[9]+s18_[10]+s18_[11]+s18_[12]+s18_[13]+s18_[14]+s18_[15]+s18_[16]+s18_[17]+s18_[18]+s18_[19]+s18_[20]+s18_[21]+s18_[22]+s18_[23]+s18_[24]+s18_[25]+s18_[26]+s18_[27]+s18_[28]+s18_[29]+s18_[30]+s18_[31]; \ + s0_[19] = s19_[0]+s19_[1]+s19_[2]+s19_[3]+s19_[4]+s19_[5]+s19_[6]+s19_[7]+s19_[8]+s19_[9]+s19_[10]+s19_[11]+s19_[12]+s19_[13]+s19_[14]+s19_[15]+s19_[16]+s19_[17]+s19_[18]+s19_[19]+s19_[20]+s19_[21]+s19_[22]+s19_[23]+s19_[24]+s19_[25]+s19_[26]+s19_[27]+s19_[28]+s19_[29]+s19_[30]+s19_[31]; \ + s0_[20] = s20_[0]+s20_[1]+s20_[2]+s20_[3]+s20_[4]+s20_[5]+s20_[6]+s20_[7]+s20_[8]+s20_[9]+s20_[10]+s20_[11]+s20_[12]+s20_[13]+s20_[14]+s20_[15]+s20_[16]+s20_[17]+s20_[18]+s20_[19]+s20_[20]+s20_[21]+s20_[22]+s20_[23]+s20_[24]+s20_[25]+s20_[26]+s20_[27]+s20_[28]+s20_[29]+s20_[30]+s20_[31]; \ + s0_[21] = s21_[0]+s21_[1]+s21_[2]+s21_[3]+s21_[4]+s21_[5]+s21_[6]+s21_[7]+s21_[8]+s21_[9]+s21_[10]+s21_[11]+s21_[12]+s21_[13]+s21_[14]+s21_[15]+s21_[16]+s21_[17]+s21_[18]+s21_[19]+s21_[20]+s21_[21]+s21_[22]+s21_[23]+s21_[24]+s21_[25]+s21_[26]+s21_[27]+s21_[28]+s21_[29]+s21_[30]+s21_[31]; \ + s0_[22] = s22_[0]+s22_[1]+s22_[2]+s22_[3]+s22_[4]+s22_[5]+s22_[6]+s22_[7]+s22_[8]+s22_[9]+s22_[10]+s22_[11]+s22_[12]+s22_[13]+s22_[14]+s22_[15]+s22_[16]+s22_[17]+s22_[18]+s22_[19]+s22_[20]+s22_[21]+s22_[22]+s22_[23]+s22_[24]+s22_[25]+s22_[26]+s22_[27]+s22_[28]+s22_[29]+s22_[30]+s22_[31]; \ + s0_[23] = s23_[0]+s23_[1]+s23_[2]+s23_[3]+s23_[4]+s23_[5]+s23_[6]+s23_[7]+s23_[8]+s23_[9]+s23_[10]+s23_[11]+s23_[12]+s23_[13]+s23_[14]+s23_[15]+s23_[16]+s23_[17]+s23_[18]+s23_[19]+s23_[20]+s23_[21]+s23_[22]+s23_[23]+s23_[24]+s23_[25]+s23_[26]+s23_[27]+s23_[28]+s23_[29]+s23_[30]+s23_[31]; \ + s0_[24] = s24_[0]+s24_[1]+s24_[2]+s24_[3]+s24_[4]+s24_[5]+s24_[6]+s24_[7]+s24_[8]+s24_[9]+s24_[10]+s24_[11]+s24_[12]+s24_[13]+s24_[14]+s24_[15]+s24_[16]+s24_[17]+s24_[18]+s24_[19]+s24_[20]+s24_[21]+s24_[22]+s24_[23]+s24_[24]+s24_[25]+s24_[26]+s24_[27]+s24_[28]+s24_[29]+s24_[30]+s24_[31]; \ + s0_[25] = s25_[0]+s25_[1]+s25_[2]+s25_[3]+s25_[4]+s25_[5]+s25_[6]+s25_[7]+s25_[8]+s25_[9]+s25_[10]+s25_[11]+s25_[12]+s25_[13]+s25_[14]+s25_[15]+s25_[16]+s25_[17]+s25_[18]+s25_[19]+s25_[20]+s25_[21]+s25_[22]+s25_[23]+s25_[24]+s25_[25]+s25_[26]+s25_[27]+s25_[28]+s25_[29]+s25_[30]+s25_[31]; \ + s0_[26] = s26_[0]+s26_[1]+s26_[2]+s26_[3]+s26_[4]+s26_[5]+s26_[6]+s26_[7]+s26_[8]+s26_[9]+s26_[10]+s26_[11]+s26_[12]+s26_[13]+s26_[14]+s26_[15]+s26_[16]+s26_[17]+s26_[18]+s26_[19]+s26_[20]+s26_[21]+s26_[22]+s26_[23]+s26_[24]+s26_[25]+s26_[26]+s26_[27]+s26_[28]+s26_[29]+s26_[30]+s26_[31]; \ + s0_[27] = s27_[0]+s27_[1]+s27_[2]+s27_[3]+s27_[4]+s27_[5]+s27_[6]+s27_[7]+s27_[8]+s27_[9]+s27_[10]+s27_[11]+s27_[12]+s27_[13]+s27_[14]+s27_[15]+s27_[16]+s27_[17]+s27_[18]+s27_[19]+s27_[20]+s27_[21]+s27_[22]+s27_[23]+s27_[24]+s27_[25]+s27_[26]+s27_[27]+s27_[28]+s27_[29]+s27_[30]+s27_[31]; \ + s0_[28] = s28_[0]+s28_[1]+s28_[2]+s28_[3]+s28_[4]+s28_[5]+s28_[6]+s28_[7]+s28_[8]+s28_[9]+s28_[10]+s28_[11]+s28_[12]+s28_[13]+s28_[14]+s28_[15]+s28_[16]+s28_[17]+s28_[18]+s28_[19]+s28_[20]+s28_[21]+s28_[22]+s28_[23]+s28_[24]+s28_[25]+s28_[26]+s28_[27]+s28_[28]+s28_[29]+s28_[30]+s28_[31]; \ + s0_[29] = s29_[0]+s29_[1]+s29_[2]+s29_[3]+s29_[4]+s29_[5]+s29_[6]+s29_[7]+s29_[8]+s29_[9]+s29_[10]+s29_[11]+s29_[12]+s29_[13]+s29_[14]+s29_[15]+s29_[16]+s29_[17]+s29_[18]+s29_[19]+s29_[20]+s29_[21]+s29_[22]+s29_[23]+s29_[24]+s29_[25]+s29_[26]+s29_[27]+s29_[28]+s29_[29]+s29_[30]+s29_[31]; \ + s0_[30] = s30_[0]+s30_[1]+s30_[2]+s30_[3]+s30_[4]+s30_[5]+s30_[6]+s30_[7]+s30_[8]+s30_[9]+s30_[10]+s30_[11]+s30_[12]+s30_[13]+s30_[14]+s30_[15]+s30_[16]+s30_[17]+s30_[18]+s30_[19]+s30_[20]+s30_[21]+s30_[22]+s30_[23]+s30_[24]+s30_[25]+s30_[26]+s30_[27]+s30_[28]+s30_[29]+s30_[30]+s30_[31]; \ + s0_[31] = s31_[0]+s31_[1]+s31_[2]+s31_[3]+s31_[4]+s31_[5]+s31_[6]+s31_[7]+s31_[8]+s31_[9]+s31_[10]+s31_[11]+s31_[12]+s31_[13]+s31_[14]+s31_[15]+s31_[16]+s31_[17]+s31_[18]+s31_[19]+s31_[20]+s31_[21]+s31_[22]+s31_[23]+s31_[24]+s31_[25]+s31_[26]+s31_[27]+s31_[28]+s31_[29]+s31_[30]+s31_[31]; \ + } + #endif +#endif +/* + * If it isn't defined already (fast sys-spec vers), define + * vsplatI (0 <= I < VL) using vector indexing. + * This may be horribly slow or great, depending on how smart the compiler is. + */ +#if ATL_VLEN == 2 + #ifndef ATL_vsplat0 + #define ATL_vsplat0(d_, s_) d_[0] = d_[1] = s_[0] + #endif + #ifndef ATL_vsplat1 + #define ATL_vsplat1(d_, s_) d_[0] = d_[1] = s_[1] + #endif +#elif ATL_VLEN == 4 + #ifndef ATL_vsplat0 + #define ATL_vsplat0(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3] = s_[0] + #endif + #ifndef ATL_vsplat1 + #define ATL_vsplat1(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3] = s_[1] + #endif + #ifndef ATL_vsplat2 + #define ATL_vsplat2(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3] = s_[2] + #endif + #ifndef ATL_vsplat3 + #define ATL_vsplat3(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3] = s_[3] + #endif +#elif ATL_VLEN == 8 + #ifndef ATL_vsplat0 + #define ATL_vsplat0(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7] = s_[0] + #endif + #ifndef ATL_vsplat1 + #define ATL_vsplat1(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7] = s_[1] + #endif + #ifndef ATL_vsplat2 + #define ATL_vsplat2(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7] = s_[2] + #endif + #ifndef ATL_vsplat3 + #define ATL_vsplat3(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7] = s_[3] + #endif + #ifndef ATL_vsplat4 + #define ATL_vsplat4(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7] = s_[4] + #endif + #ifndef ATL_vsplat5 + #define ATL_vsplat5(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7] = s_[5] + #endif + #ifndef ATL_vsplat6 + #define ATL_vsplat6(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7] = s_[6] + #endif + #ifndef ATL_vsplat7 + #define ATL_vsplat7(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7] = s_[7] + #endif +#elif ATL_VLEN == 16 + #ifndef ATL_vsplat0 + #define ATL_vsplat0(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[0] + #endif + #ifndef ATL_vsplat1 + #define ATL_vsplat1(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[1] + #endif + #ifndef ATL_vsplat2 + #define ATL_vsplat2(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[2] + #endif + #ifndef ATL_vsplat3 + #define ATL_vsplat3(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[3] + #endif + #ifndef ATL_vsplat4 + #define ATL_vsplat4(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[4] + #endif + #ifndef ATL_vsplat5 + #define ATL_vsplat5(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[5] + #endif + #ifndef ATL_vsplat6 + #define ATL_vsplat6(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[6] + #endif + #ifndef ATL_vsplat7 + #define ATL_vsplat7(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[7] + #endif + #ifndef ATL_vsplat8 + #define ATL_vsplat8(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[8] + #endif + #ifndef ATL_vsplat9 + #define ATL_vsplat9(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[9] + #endif + #ifndef ATL_vsplat10 + #define ATL_vsplat10(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[10] + #endif + #ifndef ATL_vsplat11 + #define ATL_vsplat11(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[11] + #endif + #ifndef ATL_vsplat12 + #define ATL_vsplat12(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[12] + #endif + #ifndef ATL_vsplat13 + #define ATL_vsplat13(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[13] + #endif + #ifndef ATL_vsplat14 + #define ATL_vsplat14(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[14] + #endif + #ifndef ATL_vsplat15 + #define ATL_vsplat15(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15] = s_[15] + #endif +#elif ATL_VLEN == 32 + #ifndef ATL_vsplat0 + #define ATL_vsplat0(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[0] + #endif + #ifndef ATL_vsplat1 + #define ATL_vsplat1(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[1] + #endif + #ifndef ATL_vsplat2 + #define ATL_vsplat2(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[2] + #endif + #ifndef ATL_vsplat3 + #define ATL_vsplat3(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[3] + #endif + #ifndef ATL_vsplat4 + #define ATL_vsplat4(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[4] + #endif + #ifndef ATL_vsplat5 + #define ATL_vsplat5(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[5] + #endif + #ifndef ATL_vsplat6 + #define ATL_vsplat6(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[6] + #endif + #ifndef ATL_vsplat7 + #define ATL_vsplat7(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[7] + #endif + #ifndef ATL_vsplat8 + #define ATL_vsplat8(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[8] + #endif + #ifndef ATL_vsplat9 + #define ATL_vsplat9(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[9] + #endif + #ifndef ATL_vsplat10 + #define ATL_vsplat10(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[10] + #endif + #ifndef ATL_vsplat11 + #define ATL_vsplat11(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[11] + #endif + #ifndef ATL_vsplat12 + #define ATL_vsplat12(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[12] + #endif + #ifndef ATL_vsplat13 + #define ATL_vsplat13(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[13] + #endif + #ifndef ATL_vsplat14 + #define ATL_vsplat14(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[14] + #endif + #ifndef ATL_vsplat15 + #define ATL_vsplat15(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[15] + #endif + #ifndef ATL_vsplat16 + #define ATL_vsplat16(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[16] + #endif + #ifndef ATL_vsplat17 + #define ATL_vsplat17(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[17] + #endif + #ifndef ATL_vsplat18 + #define ATL_vsplat18(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[18] + #endif + #ifndef ATL_vsplat19 + #define ATL_vsplat19(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[19] + #endif + #ifndef ATL_vsplat20 + #define ATL_vsplat20(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[20] + #endif + #ifndef ATL_vsplat21 + #define ATL_vsplat21(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[21] + #endif + #ifndef ATL_vsplat22 + #define ATL_vsplat22(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[22] + #endif + #ifndef ATL_vsplat23 + #define ATL_vsplat23(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[23] + #endif + #ifndef ATL_vsplat24 + #define ATL_vsplat24(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[24] + #endif + #ifndef ATL_vsplat25 + #define ATL_vsplat25(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[25] + #endif + #ifndef ATL_vsplat26 + #define ATL_vsplat26(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[26] + #endif + #ifndef ATL_vsplat27 + #define ATL_vsplat27(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[27] + #endif + #ifndef ATL_vsplat28 + #define ATL_vsplat28(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[28] + #endif + #ifndef ATL_vsplat29 + #define ATL_vsplat29(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[29] + #endif + #ifndef ATL_vsplat30 + #define ATL_vsplat30(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[30] + #endif + #ifndef ATL_vsplat31 + #define ATL_vsplat31(d_, s_) \ + d_[0]=d_[1]=d_[2]=d_[3]=d_[4]=d_[5]=d_[6]=d_[7]=d_[8]=d_[9]=d_[10]=d_[11]=d_[12]=d_[13]=d_[14]=d_[15]=d_[16]=d_[17]=d_[18]=d_[19]=d_[20]=d_[21]=d_[22]=d_[23]=d_[24]=d_[25]=d_[26]=d_[27]=d_[28]=d_[29]=d_[30]=d_[31] = s_[31] + #endif +#endif +/* + * If we don't have one defined, write slow version that should work with + * any gcc-compatible compiler + */ +#ifndef ATL_vrsum1 + #define ATL_vrsum1(d_, s_) \ + { TYPE mem_[ATL_VLEN] __attribute__ ((aligned (ATL_VLENb)));\ + int i_; \ + ATL_vst(mem_, s_); \ + d_ = *mem_; \ + for (i_=1; i_ < ATL_VLEN; i_++) \ + d_ += mem_[i_]; \ + } +#endif + +#endif /* end multiple-inclusion guard */ diff -Nru atlas-3.10.2/include/atlas_threads.h atlas-3.10.3/include/atlas_threads.h --- atlas-3.10.2/include/atlas_threads.h 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/include/atlas_threads.h 2016-07-28 19:42:59.000000000 +0000 @@ -20,8 +20,10 @@ #if defined(ATL_OS_Win64) || defined(ATL_OS_WinNT) #ifdef ATL_USE64BITS - #define ATL_WIN64THREADS 1 - #define ATL_WINTHREADS 1 + #ifdef __MINGW64__ /* use pthreads if using cygwin gcc */ + #define ATL_WIN64THREADS 1 + #define ATL_WINTHREADS 1 + #endif #else #define ATL_WIN32THREADS 1 #define ATL_WINTHREADS 1 diff -Nru atlas-3.10.2/include/atlas_tlvl3.h atlas-3.10.3/include/atlas_tlvl3.h --- atlas-3.10.2/include/atlas_tlvl3.h 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/include/atlas_tlvl3.h 2016-07-28 19:42:59.000000000 +0000 @@ -7,9 +7,9 @@ #endif #ifndef ATL_XOVER_L3 #ifdef TREAL - #define ATL_XOVER_L3 2 /* number of NBxNB blocks */ + #define ATL_XOVER_L3 8 /* number of NBxNB blocks */ #else - #define ATL_XOVER_L3 1 + #define ATL_XOVER_L3 4 #endif #endif diff -Nru atlas-3.10.2/include/atlas_tst.h atlas-3.10.3/include/atlas_tst.h --- atlas-3.10.2/include/atlas_tst.h 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/include/atlas_tst.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/include/f77wrap_lapack.h atlas-3.10.3/include/f77wrap_lapack.h --- atlas-3.10.2/include/f77wrap_lapack.h 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/include/f77wrap_lapack.h 2016-07-28 19:42:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/INSTALL.txt atlas-3.10.3/INSTALL.txt --- atlas-3.10.2/INSTALL.txt 2014-07-10 16:22:00.000000000 +0000 +++ atlas-3.10.3/INSTALL.txt 2016-07-28 19:42:59.000000000 +0000 @@ -39,7 +39,7 @@ before installing ATLAS, turn off CPU throttling. For most PCs, you can switch it off in the BIOS (eg., on my Athlon-64 machine, I can say "No" to "Cool and Quiet" under "Power Management"). Most OSes also provide a way -to do switch off CPU throttling, but that varies from OS to OS. Under Fedora, +to switch off CPU throttling, but that varies from OS to OS. Under Fedora, at any rate, the following command seemed to work: /usr/bin/cpufreq-selector -g performance On my Core2Duo, cpufreq-selector only changes the parameters of the first CPU, @@ -51,11 +51,11 @@ cp /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor \ /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor -For non-broken systems, you instead issue the above command with -c <#> appended +More modern and non-broken systems use cpufreq-set, with -c <#> appended to change the performance of each core in turn. For example, to speedup both processors of a dual system you would issue: - /usr/bin/cpufreq-selector -g performance -c 0 - /usr/bin/cpufreq-selector -g performance -c 1 + /usr/bin/cpufreq-set -g performance -c 0 + /usr/bin/cpufreq-set -g performance -c 1 On Kubuntu, I had problems with this not working because scaling_max_freq was set to the minimal speed. To fix, I had to first increase the max scaling @@ -77,8 +77,8 @@ ATLAS config tries to detect if CPU throttling is enabled, but it may not always detect it, and sometimes may detect it after you have disabled it. In the latter case, to force the configure to continue regardless of the -results of the CPU throttling probe, pass this flag to configure: - -Si cputhrchk 0 +results of the CPU throttling probe, read the error message you get when +ATLAS's configure dies upon detecting throttling. ********************************** CONFIG ************************************* First, create a directory where you will build ATLAS. It can be anywhere in diff -Nru atlas-3.10.2/interfaces/blas/C/src/catlas_caxpby.c atlas-3.10.3/interfaces/blas/C/src/catlas_caxpby.c --- atlas-3.10.2/interfaces/blas/C/src/catlas_caxpby.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/catlas_caxpby.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/catlas_cset.c atlas-3.10.3/interfaces/blas/C/src/catlas_cset.c --- atlas-3.10.2/interfaces/blas/C/src/catlas_cset.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/catlas_cset.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/catlas_daxpby.c atlas-3.10.3/interfaces/blas/C/src/catlas_daxpby.c --- atlas-3.10.2/interfaces/blas/C/src/catlas_daxpby.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/catlas_daxpby.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/catlas_dset.c atlas-3.10.3/interfaces/blas/C/src/catlas_dset.c --- atlas-3.10.2/interfaces/blas/C/src/catlas_dset.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/catlas_dset.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/catlas_saxpby.c atlas-3.10.3/interfaces/blas/C/src/catlas_saxpby.c --- atlas-3.10.2/interfaces/blas/C/src/catlas_saxpby.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/catlas_saxpby.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/catlas_sset.c atlas-3.10.3/interfaces/blas/C/src/catlas_sset.c --- atlas-3.10.2/interfaces/blas/C/src/catlas_sset.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/catlas_sset.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/catlas_zaxpby.c atlas-3.10.3/interfaces/blas/C/src/catlas_zaxpby.c --- atlas-3.10.2/interfaces/blas/C/src/catlas_zaxpby.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/catlas_zaxpby.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/catlas_zset.c atlas-3.10.3/interfaces/blas/C/src/catlas_zset.c --- atlas-3.10.2/interfaces/blas/C/src/catlas_zset.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/catlas_zset.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_caxpy.c atlas-3.10.3/interfaces/blas/C/src/cblas_caxpy.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_caxpy.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_caxpy.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ccopy.c atlas-3.10.3/interfaces/blas/C/src/cblas_ccopy.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ccopy.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ccopy.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cdotc.c atlas-3.10.3/interfaces/blas/C/src/cblas_cdotc.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cdotc.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cdotc.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cdotu.c atlas-3.10.3/interfaces/blas/C/src/cblas_cdotu.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cdotu.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cdotu.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cgbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_cgbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cgbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cgbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cgemm.c atlas-3.10.3/interfaces/blas/C/src/cblas_cgemm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cgemm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cgemm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cgemv.c atlas-3.10.3/interfaces/blas/C/src/cblas_cgemv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cgemv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cgemv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cger2c.c atlas-3.10.3/interfaces/blas/C/src/cblas_cger2c.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cger2c.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cger2c.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cger2u.c atlas-3.10.3/interfaces/blas/C/src/cblas_cger2u.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cger2u.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cger2u.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cgerc.c atlas-3.10.3/interfaces/blas/C/src/cblas_cgerc.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cgerc.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cgerc.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cgeru.c atlas-3.10.3/interfaces/blas/C/src/cblas_cgeru.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cgeru.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cgeru.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_chbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_chbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_chbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_chbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_chemm.c atlas-3.10.3/interfaces/blas/C/src/cblas_chemm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_chemm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_chemm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_chemv.c atlas-3.10.3/interfaces/blas/C/src/cblas_chemv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_chemv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_chemv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cher2.c atlas-3.10.3/interfaces/blas/C/src/cblas_cher2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cher2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cher2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cher2k.c atlas-3.10.3/interfaces/blas/C/src/cblas_cher2k.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cher2k.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cher2k.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cher.c atlas-3.10.3/interfaces/blas/C/src/cblas_cher.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cher.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cher.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cherk.c atlas-3.10.3/interfaces/blas/C/src/cblas_cherk.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cherk.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cherk.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_chpmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_chpmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_chpmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_chpmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_chpr2.c atlas-3.10.3/interfaces/blas/C/src/cblas_chpr2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_chpr2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_chpr2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_chpr.c atlas-3.10.3/interfaces/blas/C/src/cblas_chpr.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_chpr.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_chpr.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_crotg.c atlas-3.10.3/interfaces/blas/C/src/cblas_crotg.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_crotg.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_crotg.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cscal.c atlas-3.10.3/interfaces/blas/C/src/cblas_cscal.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cscal.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cscal.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_csrot.c atlas-3.10.3/interfaces/blas/C/src/cblas_csrot.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_csrot.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_csrot.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_csscal.c atlas-3.10.3/interfaces/blas/C/src/cblas_csscal.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_csscal.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_csscal.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_cswap.c atlas-3.10.3/interfaces/blas/C/src/cblas_cswap.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_cswap.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_cswap.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_csymm.c atlas-3.10.3/interfaces/blas/C/src/cblas_csymm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_csymm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_csymm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_csyr2k.c atlas-3.10.3/interfaces/blas/C/src/cblas_csyr2k.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_csyr2k.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_csyr2k.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_csyrk.c atlas-3.10.3/interfaces/blas/C/src/cblas_csyrk.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_csyrk.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_csyrk.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ctbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ctbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ctbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ctbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ctbsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ctbsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ctbsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ctbsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ctpmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ctpmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ctpmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ctpmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ctpsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ctpsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ctpsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ctpsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ctrmm.c atlas-3.10.3/interfaces/blas/C/src/cblas_ctrmm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ctrmm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ctrmm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ctrmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ctrmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ctrmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ctrmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ctrsm.c atlas-3.10.3/interfaces/blas/C/src/cblas_ctrsm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ctrsm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ctrsm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ctrsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ctrsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ctrsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ctrsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dasum.c atlas-3.10.3/interfaces/blas/C/src/cblas_dasum.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dasum.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dasum.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_daxpy.c atlas-3.10.3/interfaces/blas/C/src/cblas_daxpy.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_daxpy.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_daxpy.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dcopy.c atlas-3.10.3/interfaces/blas/C/src/cblas_dcopy.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dcopy.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dcopy.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ddot.c atlas-3.10.3/interfaces/blas/C/src/cblas_ddot.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ddot.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ddot.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dgbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dgbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dgbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dgbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dgemm.c atlas-3.10.3/interfaces/blas/C/src/cblas_dgemm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dgemm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dgemm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dgemv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dgemv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dgemv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dgemv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dger2.c atlas-3.10.3/interfaces/blas/C/src/cblas_dger2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dger2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dger2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dger.c atlas-3.10.3/interfaces/blas/C/src/cblas_dger.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dger.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dger.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dnrm2.c atlas-3.10.3/interfaces/blas/C/src/cblas_dnrm2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dnrm2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dnrm2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_drot.c atlas-3.10.3/interfaces/blas/C/src/cblas_drot.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_drot.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_drot.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_drotg.c atlas-3.10.3/interfaces/blas/C/src/cblas_drotg.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_drotg.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_drotg.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_drotm.c atlas-3.10.3/interfaces/blas/C/src/cblas_drotm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_drotm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_drotm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_drotmg.c atlas-3.10.3/interfaces/blas/C/src/cblas_drotmg.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_drotmg.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_drotmg.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dsbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dsbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dsbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dsbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dscal.c atlas-3.10.3/interfaces/blas/C/src/cblas_dscal.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dscal.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dscal.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dsdot.c atlas-3.10.3/interfaces/blas/C/src/cblas_dsdot.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dsdot.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dsdot.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dspmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dspmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dspmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dspmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dspr2.c atlas-3.10.3/interfaces/blas/C/src/cblas_dspr2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dspr2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dspr2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dspr.c atlas-3.10.3/interfaces/blas/C/src/cblas_dspr.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dspr.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dspr.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dswap.c atlas-3.10.3/interfaces/blas/C/src/cblas_dswap.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dswap.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dswap.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dsymm.c atlas-3.10.3/interfaces/blas/C/src/cblas_dsymm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dsymm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dsymm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dsymv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dsymv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dsymv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dsymv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dsyr2.c atlas-3.10.3/interfaces/blas/C/src/cblas_dsyr2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dsyr2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dsyr2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dsyr2k.c atlas-3.10.3/interfaces/blas/C/src/cblas_dsyr2k.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dsyr2k.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dsyr2k.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dsyr.c atlas-3.10.3/interfaces/blas/C/src/cblas_dsyr.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dsyr.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dsyr.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dsyrk.c atlas-3.10.3/interfaces/blas/C/src/cblas_dsyrk.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dsyrk.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dsyrk.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dtbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dtbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dtbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dtbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dtbsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dtbsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dtbsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dtbsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dtpmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dtpmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dtpmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dtpmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dtpsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dtpsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dtpsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dtpsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dtrmm.c atlas-3.10.3/interfaces/blas/C/src/cblas_dtrmm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dtrmm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dtrmm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dtrmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dtrmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dtrmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dtrmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dtrsm.c atlas-3.10.3/interfaces/blas/C/src/cblas_dtrsm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dtrsm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dtrsm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dtrsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_dtrsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dtrsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dtrsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dzasum.c atlas-3.10.3/interfaces/blas/C/src/cblas_dzasum.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dzasum.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dzasum.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_dznrm2.c atlas-3.10.3/interfaces/blas/C/src/cblas_dznrm2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_dznrm2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_dznrm2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_errprn.c atlas-3.10.3/interfaces/blas/C/src/cblas_errprn.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_errprn.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_errprn.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_icamax.c atlas-3.10.3/interfaces/blas/C/src/cblas_icamax.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_icamax.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_icamax.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_idamax.c atlas-3.10.3/interfaces/blas/C/src/cblas_idamax.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_idamax.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_idamax.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_isamax.c atlas-3.10.3/interfaces/blas/C/src/cblas_isamax.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_isamax.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_isamax.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_izamax.c atlas-3.10.3/interfaces/blas/C/src/cblas_izamax.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_izamax.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_izamax.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sasum.c atlas-3.10.3/interfaces/blas/C/src/cblas_sasum.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sasum.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sasum.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_saxpy.c atlas-3.10.3/interfaces/blas/C/src/cblas_saxpy.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_saxpy.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_saxpy.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_scasum.c atlas-3.10.3/interfaces/blas/C/src/cblas_scasum.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_scasum.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_scasum.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_scnrm2.c atlas-3.10.3/interfaces/blas/C/src/cblas_scnrm2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_scnrm2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_scnrm2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_scopy.c atlas-3.10.3/interfaces/blas/C/src/cblas_scopy.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_scopy.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_scopy.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sdot.c atlas-3.10.3/interfaces/blas/C/src/cblas_sdot.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sdot.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sdot.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sdsdot.c atlas-3.10.3/interfaces/blas/C/src/cblas_sdsdot.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sdsdot.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sdsdot.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sgbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_sgbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sgbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sgbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sgemm.c atlas-3.10.3/interfaces/blas/C/src/cblas_sgemm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sgemm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sgemm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sgemv.c atlas-3.10.3/interfaces/blas/C/src/cblas_sgemv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sgemv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sgemv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sger2.c atlas-3.10.3/interfaces/blas/C/src/cblas_sger2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sger2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sger2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sger.c atlas-3.10.3/interfaces/blas/C/src/cblas_sger.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sger.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sger.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_snrm2.c atlas-3.10.3/interfaces/blas/C/src/cblas_snrm2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_snrm2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_snrm2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_srot.c atlas-3.10.3/interfaces/blas/C/src/cblas_srot.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_srot.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_srot.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_srotg.c atlas-3.10.3/interfaces/blas/C/src/cblas_srotg.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_srotg.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_srotg.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_srotm.c atlas-3.10.3/interfaces/blas/C/src/cblas_srotm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_srotm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_srotm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_srotmg.c atlas-3.10.3/interfaces/blas/C/src/cblas_srotmg.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_srotmg.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_srotmg.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ssbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ssbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ssbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ssbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sscal.c atlas-3.10.3/interfaces/blas/C/src/cblas_sscal.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sscal.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sscal.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sspmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_sspmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sspmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sspmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sspr2.c atlas-3.10.3/interfaces/blas/C/src/cblas_sspr2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sspr2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sspr2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sspr.c atlas-3.10.3/interfaces/blas/C/src/cblas_sspr.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sspr.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sspr.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_sswap.c atlas-3.10.3/interfaces/blas/C/src/cblas_sswap.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_sswap.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_sswap.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ssymm.c atlas-3.10.3/interfaces/blas/C/src/cblas_ssymm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ssymm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ssymm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ssymv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ssymv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ssymv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ssymv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ssyr2.c atlas-3.10.3/interfaces/blas/C/src/cblas_ssyr2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ssyr2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ssyr2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ssyr2k.c atlas-3.10.3/interfaces/blas/C/src/cblas_ssyr2k.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ssyr2k.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ssyr2k.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ssyr.c atlas-3.10.3/interfaces/blas/C/src/cblas_ssyr.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ssyr.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ssyr.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ssyrk.c atlas-3.10.3/interfaces/blas/C/src/cblas_ssyrk.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ssyrk.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ssyrk.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_stbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_stbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_stbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_stbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_stbsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_stbsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_stbsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_stbsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_stpmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_stpmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_stpmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_stpmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_stpsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_stpsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_stpsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_stpsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_strmm.c atlas-3.10.3/interfaces/blas/C/src/cblas_strmm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_strmm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_strmm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_strmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_strmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_strmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_strmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_strsm.c atlas-3.10.3/interfaces/blas/C/src/cblas_strsm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_strsm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_strsm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_strsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_strsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_strsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_strsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_xerbla.c atlas-3.10.3/interfaces/blas/C/src/cblas_xerbla.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_xerbla.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_xerbla.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zaxpy.c atlas-3.10.3/interfaces/blas/C/src/cblas_zaxpy.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zaxpy.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zaxpy.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zcopy.c atlas-3.10.3/interfaces/blas/C/src/cblas_zcopy.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zcopy.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zcopy.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zdotc.c atlas-3.10.3/interfaces/blas/C/src/cblas_zdotc.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zdotc.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zdotc.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zdotu.c atlas-3.10.3/interfaces/blas/C/src/cblas_zdotu.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zdotu.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zdotu.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zdrot.c atlas-3.10.3/interfaces/blas/C/src/cblas_zdrot.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zdrot.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zdrot.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zdscal.c atlas-3.10.3/interfaces/blas/C/src/cblas_zdscal.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zdscal.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zdscal.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zgbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_zgbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zgbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zgbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zgemm.c atlas-3.10.3/interfaces/blas/C/src/cblas_zgemm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zgemm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zgemm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zgemv.c atlas-3.10.3/interfaces/blas/C/src/cblas_zgemv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zgemv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zgemv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zger2c.c atlas-3.10.3/interfaces/blas/C/src/cblas_zger2c.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zger2c.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zger2c.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zger2u.c atlas-3.10.3/interfaces/blas/C/src/cblas_zger2u.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zger2u.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zger2u.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zgerc.c atlas-3.10.3/interfaces/blas/C/src/cblas_zgerc.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zgerc.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zgerc.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zgeru.c atlas-3.10.3/interfaces/blas/C/src/cblas_zgeru.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zgeru.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zgeru.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zhbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_zhbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zhbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zhbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zhemm.c atlas-3.10.3/interfaces/blas/C/src/cblas_zhemm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zhemm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zhemm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zhemv.c atlas-3.10.3/interfaces/blas/C/src/cblas_zhemv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zhemv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zhemv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zher2.c atlas-3.10.3/interfaces/blas/C/src/cblas_zher2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zher2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zher2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zher2k.c atlas-3.10.3/interfaces/blas/C/src/cblas_zher2k.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zher2k.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zher2k.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zher.c atlas-3.10.3/interfaces/blas/C/src/cblas_zher.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zher.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zher.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zherk.c atlas-3.10.3/interfaces/blas/C/src/cblas_zherk.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zherk.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zherk.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zhpmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_zhpmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zhpmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zhpmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zhpr2.c atlas-3.10.3/interfaces/blas/C/src/cblas_zhpr2.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zhpr2.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zhpr2.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zhpr.c atlas-3.10.3/interfaces/blas/C/src/cblas_zhpr.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zhpr.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zhpr.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zrotg.c atlas-3.10.3/interfaces/blas/C/src/cblas_zrotg.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zrotg.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zrotg.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zscal.c atlas-3.10.3/interfaces/blas/C/src/cblas_zscal.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zscal.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zscal.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zswap.c atlas-3.10.3/interfaces/blas/C/src/cblas_zswap.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zswap.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zswap.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zsymm.c atlas-3.10.3/interfaces/blas/C/src/cblas_zsymm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zsymm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zsymm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zsyr2k.c atlas-3.10.3/interfaces/blas/C/src/cblas_zsyr2k.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zsyr2k.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zsyr2k.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_zsyrk.c atlas-3.10.3/interfaces/blas/C/src/cblas_zsyrk.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_zsyrk.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_zsyrk.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ztbmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ztbmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ztbmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ztbmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ztbsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ztbsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ztbsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ztbsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ztpmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ztpmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ztpmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ztpmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ztpsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ztpsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ztpsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ztpsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ztrmm.c atlas-3.10.3/interfaces/blas/C/src/cblas_ztrmm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ztrmm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ztrmm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ztrmv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ztrmv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ztrmv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ztrmv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ztrsm.c atlas-3.10.3/interfaces/blas/C/src/cblas_ztrsm.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ztrsm.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ztrsm.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/C/src/cblas_ztrsv.c atlas-3.10.3/interfaces/blas/C/src/cblas_ztrsv.c --- atlas-3.10.2/interfaces/blas/C/src/cblas_ztrsv.c 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/C/src/cblas_ztrsv.c 2016-07-28 19:43:02.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/F77/src/f77wrap/fatlas_axpby.c atlas-3.10.3/interfaces/blas/F77/src/f77wrap/fatlas_axpby.c --- atlas-3.10.2/interfaces/blas/F77/src/f77wrap/fatlas_axpby.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/F77/src/f77wrap/fatlas_axpby.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/F77/src/f77wrap/fatlas_set.c atlas-3.10.3/interfaces/blas/F77/src/f77wrap/fatlas_set.c --- atlas-3.10.2/interfaces/blas/F77/src/f77wrap/fatlas_set.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/F77/src/f77wrap/fatlas_set.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/blas/F77/testing/cblat1.f atlas-3.10.3/interfaces/blas/F77/testing/cblat1.f --- atlas-3.10.2/interfaces/blas/F77/testing/cblat1.f 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/F77/testing/cblat1.f 2016-07-28 19:43:02.000000000 +0000 @@ -1,7 +1,49 @@ +*> \brief \b CBLAT1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CBLAT1 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX Level 1 BLAS. +*> Based upon the original BLAS test routine together with: +*> +*> F06GAF Example Program Text +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex_blas_testing +* +* ===================================================================== PROGRAM CBLAT1 -* Test program for the COMPLEX Level 1 BLAS. -* Based upon the original BLAS test routine together with: -* F06GAF Example Program Text +* +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* ===================================================================== +* * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) @@ -114,8 +156,8 @@ + (5.0E0,6.0E0), (5.0E0,6.0E0), (0.1E0,0.1E0), + (-0.6E0,0.1E0), (0.1E0,-0.3E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (7.0E0,8.0E0), (0.3E0,0.1E0), (0.1E0,0.4E0), - + (0.4E0,0.1E0), (0.1E0,0.2E0), (2.0E0,3.0E0), + + (7.0E0,8.0E0), (0.3E0,0.1E0), (0.5E0,0.0E0), + + (0.0E0,0.5E0), (0.0E0,0.2E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), @@ -129,10 +171,10 @@ + (3.0E0,6.0E0), (-0.6E0,0.1E0), (4.0E0,7.0E0), + (0.1E0,-0.3E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.3E0,0.1E0), (5.0E0,8.0E0), - + (0.1E0,0.4E0), (6.0E0,9.0E0), (0.4E0,0.1E0), - + (8.0E0,3.0E0), (0.1E0,0.2E0), (9.0E0,4.0E0)/ - DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.7E0/ - DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.7E0/ + + (0.5E0,0.0E0), (6.0E0,9.0E0), (0.0E0,0.5E0), + + (8.0E0,3.0E0), (0.0E0,0.2E0), (9.0E0,4.0E0)/ + DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.8E0/ + DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.6E0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), @@ -145,8 +187,8 @@ + (0.11E0,-0.03E0), (-0.17E0,0.46E0), + (-0.17E0,-0.19E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (0.19E0,-0.17E0), (0.32E0,0.09E0), - + (0.23E0,-0.24E0), (0.18E0,0.01E0), + + (0.19E0,-0.17E0), (0.20E0,-0.35E0), + + (0.35E0,0.20E0), (0.14E0,0.08E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0), + (2.0E0,3.0E0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), @@ -162,9 +204,9 @@ + (-0.17E0,0.46E0), (4.0E0,7.0E0), + (-0.17E0,-0.19E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.19E0,-0.17E0), (5.0E0,8.0E0), - + (0.32E0,0.09E0), (6.0E0,9.0E0), - + (0.23E0,-0.24E0), (8.0E0,3.0E0), - + (0.18E0,0.01E0), (9.0E0,4.0E0)/ + + (0.20E0,-0.35E0), (6.0E0,9.0E0), + + (0.35E0,0.20E0), (8.0E0,3.0E0), + + (0.14E0,0.08E0), (9.0E0,4.0E0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), @@ -177,8 +219,8 @@ + (0.03E0,0.03E0), (-0.18E0,0.03E0), + (0.03E0,-0.09E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (0.09E0,0.03E0), (0.03E0,0.12E0), - + (0.12E0,0.03E0), (0.03E0,0.06E0), (2.0E0,3.0E0), + + (0.09E0,0.03E0), (0.15E0,0.00E0), + + (0.00E0,0.15E0), (0.00E0,0.06E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), @@ -193,8 +235,8 @@ + (-0.18E0,0.03E0), (4.0E0,7.0E0), + (0.03E0,-0.09E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.09E0,0.03E0), (5.0E0,8.0E0), - + (0.03E0,0.12E0), (6.0E0,9.0E0), (0.12E0,0.03E0), - + (8.0E0,3.0E0), (0.03E0,0.06E0), (9.0E0,4.0E0)/ + + (0.15E0,0.00E0), (6.0E0,9.0E0), (0.00E0,0.15E0), + + (8.0E0,3.0E0), (0.00E0,0.06E0), (9.0E0,4.0E0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 @@ -529,7 +571,8 @@ * * .. Parameters .. INTEGER NOUT - PARAMETER (NOUT=6) + REAL ZERO + PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC INTEGER LEN @@ -552,7 +595,7 @@ * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) - IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0E0) + IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). diff -Nru atlas-3.10.2/interfaces/blas/F77/testing/dblat1.f atlas-3.10.3/interfaces/blas/F77/testing/dblat1.f --- atlas-3.10.2/interfaces/blas/F77/testing/dblat1.f 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/F77/testing/dblat1.f 2016-07-28 19:43:02.000000000 +0000 @@ -1,12 +1,54 @@ +*> \brief \b DBLAT1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM DBLAT1 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the DOUBLE PRECISION Level 1 BLAS. +*> +*> Based upon the original BLAS test routine together with: +*> F06EAF Example Program Text +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup double_blas_testing +* +* ===================================================================== PROGRAM DBLAT1 -* Test program for the DOUBLE PRECISION Level 1 BLAS. -* Based upon the original BLAS test routine together with: -* F06EAF Example Program Text +* +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* ===================================================================== +* * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SFAC @@ -14,31 +56,30 @@ * .. External Subroutines .. EXTERNAL CHECK0, CHECK1, CHECK2, CHECK3, HEADER * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SFAC/9.765625D-4/ * .. Executable Statements .. WRITE (NOUT,99999) - DO 20 IC = 1, 10 + DO 20 IC = 1, 13 ICASE = IC CALL HEADER * -* .. Initialize PASS, INCX, INCY, and MODE for a new case. .. -* .. the value 9999 for INCX, INCY or MODE will appear in the .. +* .. Initialize PASS, INCX, and INCY for a new case. .. +* .. the value 9999 for INCX or INCY will appear in the .. * .. detailed output, if any, for cases that do not involve .. * .. these parameters .. * PASS = .TRUE. INCX = 9999 INCY = 9999 - MODE = 9999 - IF (ICASE.EQ.3) THEN + IF (ICASE.EQ.3 .OR. ICASE.EQ.11) THEN CALL CHECK0(SFAC) ELSE IF (ICASE.EQ.7 .OR. ICASE.EQ.8 .OR. ICASE.EQ.9 .OR. + ICASE.EQ.10) THEN CALL CHECK1(SFAC) ELSE IF (ICASE.EQ.1 .OR. ICASE.EQ.2 .OR. ICASE.EQ.5 .OR. - + ICASE.EQ.6) THEN + + ICASE.EQ.6 .OR. ICASE.EQ.12 .OR. ICASE.EQ.13) THEN CALL CHECK2(SFAC) ELSE IF (ICASE.EQ.4) THEN CALL CHECK3(SFAC) @@ -56,12 +97,12 @@ INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Arrays .. - CHARACTER*6 L(10) + CHARACTER*6 L(13) * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA L(1)/' DDOT '/ DATA L(2)/'DAXPY '/ @@ -73,6 +114,9 @@ DATA L(8)/'DASUM '/ DATA L(9)/'DSCAL '/ DATA L(10)/'IDAMAX'/ + DATA L(11)/'DROTMG'/ + DATA L(12)/'DROTM '/ + DATA L(13)/'DSDOT '/ * .. Executable Statements .. WRITE (NOUT,99999) ICASE, L(ICASE) RETURN @@ -86,18 +130,18 @@ * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. - DOUBLE PRECISION D12, SA, SB, SC, SS - INTEGER K + DOUBLE PRECISION SA, SB, SC, SS, D12 + INTEGER I, K * .. Local Arrays .. DOUBLE PRECISION DA1(8), DATRUE(8), DB1(8), DBTRUE(8), DC1(8), - + DS1(8) + $ DS1(8), DAB(4,9), DTEMP(9), DTRUE(9,9) * .. External Subroutines .. - EXTERNAL DROTG, STEST1 + EXTERNAL DROTG, DROTMG, STEST1 * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA DA1/0.3D0, 0.4D0, -0.3D0, -0.4D0, -0.3D0, 0.0D0, + 0.0D0, 1.0D0/ @@ -111,7 +155,52 @@ + 0.0D0, 1.0D0, 1.0D0/ DATA DBTRUE/0.0D0, 0.6D0, 0.0D0, -0.6D0, 0.0D0, + 0.0D0, 1.0D0, 0.0D0/ - DATA D12/4096.0D0/ +* INPUT FOR MODIFIED GIVENS + DATA DAB/ .1D0,.3D0,1.2D0,.2D0, + A .7D0, .2D0, .6D0, 4.2D0, + B 0.D0,0.D0,0.D0,0.D0, + C 4.D0, -1.D0, 2.D0, 4.D0, + D 6.D-10, 2.D-2, 1.D5, 10.D0, + E 4.D10, 2.D-2, 1.D-5, 10.D0, + F 2.D-10, 4.D-2, 1.D5, 10.D0, + G 2.D10, 4.D-2, 1.D-5, 10.D0, + H 4.D0, -2.D0, 8.D0, 4.D0 / +* TRUE RESULTS FOR MODIFIED GIVENS + DATA DTRUE/0.D0,0.D0, 1.3D0, .2D0, 0.D0,0.D0,0.D0, .5D0, 0.D0, + A 0.D0,0.D0, 4.5D0, 4.2D0, 1.D0, .5D0, 0.D0,0.D0,0.D0, + B 0.D0,0.D0,0.D0,0.D0, -2.D0, 0.D0,0.D0,0.D0,0.D0, + C 0.D0,0.D0,0.D0, 4.D0, -1.D0, 0.D0,0.D0,0.D0,0.D0, + D 0.D0, 15.D-3, 0.D0, 10.D0, -1.D0, 0.D0, -1.D-4, + E 0.D0, 1.D0, + F 0.D0,0.D0, 6144.D-5, 10.D0, -1.D0, 4096.D0, -1.D6, + G 0.D0, 1.D0, + H 0.D0,0.D0,15.D0,10.D0,-1.D0, 5.D-5, 0.D0,1.D0,0.D0, + I 0.D0,0.D0, 15.D0, 10.D0, -1. D0, 5.D5, -4096.D0, + J 1.D0, 4096.D-6, + K 0.D0,0.D0, 7.D0, 4.D0, 0.D0,0.D0, -.5D0, -.25D0, 0.D0/ +* 4096 = 2 ** 12 + DATA D12 /4096.D0/ + DTRUE(1,1) = 12.D0 / 130.D0 + DTRUE(2,1) = 36.D0 / 130.D0 + DTRUE(7,1) = -1.D0 / 6.D0 + DTRUE(1,2) = 14.D0 / 75.D0 + DTRUE(2,2) = 49.D0 / 75.D0 + DTRUE(9,2) = 1.D0 / 7.D0 + DTRUE(1,5) = 45.D-11 * (D12 * D12) + DTRUE(3,5) = 4.D5 / (3.D0 * D12) + DTRUE(6,5) = 1.D0 / D12 + DTRUE(8,5) = 1.D4 / (3.D0 * D12) + DTRUE(1,6) = 4.D10 / (1.5D0 * D12 * D12) + DTRUE(2,6) = 2.D-2 / 1.5D0 + DTRUE(8,6) = 5.D-7 * D12 + DTRUE(1,7) = 4.D0 / 150.D0 + DTRUE(2,7) = (2.D-10 / 1.5D0) * (D12 * D12) + DTRUE(7,7) = -DTRUE(6,5) + DTRUE(9,7) = 1.D4 / D12 + DTRUE(1,8) = DTRUE(1,7) + DTRUE(2,8) = 2.D10 / (1.5D0 * D12 * D12) + DTRUE(1,9) = 32.D0 / 7.D0 + DTRUE(2,9) = -16.D0 / 7.D0 * .. Executable Statements .. * * Compute true values which cannot be prestored @@ -134,6 +223,15 @@ CALL STEST1(SB,DBTRUE(K),DBTRUE(K),SFAC) CALL STEST1(SC,DC1(K),DC1(K),SFAC) CALL STEST1(SS,DS1(K),DS1(K),SFAC) + ELSEIF (ICASE.EQ.11) THEN +* .. DROTMG .. + DO I=1,4 + DTEMP(I)= DAB(I,K) + DTEMP(I+4) = 0.0 + END DO + DTEMP(9) = 0.0 + CALL DROTMG(DTEMP(1),DTEMP(2),DTEMP(3),DTEMP(4),DTEMP(5)) + CALL STEST(9,DTEMP,DTRUE(1,K),DTRUE(1,K),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK0' STOP @@ -148,7 +246,7 @@ * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER I, LEN, NP1 @@ -165,7 +263,7 @@ * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SA/0.3D0, -1.0D0, 0.0D0, 1.0D0, 0.3D0, 0.3D0, + 0.3D0, 0.3D0, 0.3D0, 0.3D0/ @@ -212,11 +310,11 @@ IF (ICASE.EQ.7) THEN * .. DNRM2 .. STEMP(1) = DTRUE1(NP1) - CALL STEST1(DNRM2(N,SX,INCX),STEMP,STEMP,SFAC) + CALL STEST1(DNRM2(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.8) THEN * .. DASUM .. STEMP(1) = DTRUE3(NP1) - CALL STEST1(DASUM(N,SX,INCX),STEMP,STEMP,SFAC) + CALL STEST1(DASUM(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.9) THEN * .. DSCAL .. CALL DSCAL(N,SA((INCX-1)*5+NP1),SX,INCX) @@ -242,27 +340,39 @@ * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. - DOUBLE PRECISION SA, SC, SS - INTEGER I, J, KI, KN, KSIZE, LENX, LENY, MX, MY + DOUBLE PRECISION SA + INTEGER I, J, KI, KN, KNI, KPAR, KSIZE, LENX, LENY, + $ MX, MY * .. Local Arrays .. DOUBLE PRECISION DT10X(7,4,4), DT10Y(7,4,4), DT7(4,4), - + DT8(7,4,4), DT9X(7,4,4), DT9Y(7,4,4), DX1(7), - + DY1(7), SSIZE1(4), SSIZE2(14,2), STX(7), STY(7), - + SX(7), SY(7) + $ DT8(7,4,4), DX1(7), + $ DY1(7), SSIZE1(4), SSIZE2(14,2), SSIZE(7), + $ STX(7), STY(7), SX(7), SY(7), + $ DPAR(5,4), DT19X(7,4,16),DT19XA(7,4,4), + $ DT19XB(7,4,4), DT19XC(7,4,4),DT19XD(7,4,4), + $ DT19Y(7,4,16), DT19YA(7,4,4),DT19YB(7,4,4), + $ DT19YC(7,4,4), DT19YD(7,4,4), DTEMP(5) INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4) * .. External Functions .. - DOUBLE PRECISION DDOT - EXTERNAL DDOT + DOUBLE PRECISION DDOT, DSDOT + EXTERNAL DDOT, DSDOT * .. External Subroutines .. - EXTERNAL DAXPY, DCOPY, DSWAP, STEST, STEST1 + EXTERNAL DAXPY, DCOPY, DROTM, DSWAP, STEST, STEST1 * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. + EQUIVALENCE (DT19X(1,1,1),DT19XA(1,1,1)),(DT19X(1,1,5), + A DT19XB(1,1,1)),(DT19X(1,1,9),DT19XC(1,1,1)), + B (DT19X(1,1,13),DT19XD(1,1,1)) + EQUIVALENCE (DT19Y(1,1,1),DT19YA(1,1,1)),(DT19Y(1,1,5), + A DT19YB(1,1,1)),(DT19Y(1,1,9),DT19YC(1,1,1)), + B (DT19Y(1,1,13),DT19YD(1,1,1)) + DATA SA/0.3D0/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ @@ -272,7 +382,6 @@ + -0.4D0/ DATA DY1/0.5D0, -0.9D0, 0.3D0, 0.7D0, -0.6D0, 0.2D0, + 0.8D0/ - DATA SC, SS/0.8D0, 0.6D0/ DATA DT7/0.0D0, 0.30D0, 0.21D0, 0.62D0, 0.0D0, + 0.30D0, -0.07D0, 0.85D0, 0.0D0, 0.30D0, -0.79D0, + -0.74D0, 0.0D0, 0.30D0, 0.33D0, 1.27D0/ @@ -295,44 +404,6 @@ + 0.0D0, 0.68D0, -0.9D0, 0.33D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.68D0, -0.9D0, 0.33D0, 0.7D0, + -0.75D0, 0.2D0, 1.04D0/ - DATA DT9X/0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.78D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.0D0, 0.78D0, -0.46D0, 0.0D0, 0.0D0, - + 0.0D0, 0.0D0, 0.0D0, 0.78D0, -0.46D0, -0.22D0, - + 1.06D0, 0.0D0, 0.0D0, 0.0D0, 0.6D0, 0.0D0, - + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.78D0, - + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.66D0, 0.1D0, -0.1D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.96D0, 0.1D0, -0.76D0, 0.8D0, 0.90D0, - + -0.3D0, -0.02D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.0D0, 0.0D0, 0.78D0, 0.0D0, 0.0D0, - + 0.0D0, 0.0D0, 0.0D0, 0.0D0, -0.06D0, 0.1D0, - + -0.1D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.90D0, - + 0.1D0, -0.22D0, 0.8D0, 0.18D0, -0.3D0, -0.02D0, - + 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.78D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.78D0, 0.26D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.0D0, 0.78D0, 0.26D0, -0.76D0, 1.12D0, - + 0.0D0, 0.0D0, 0.0D0/ - DATA DT9Y/0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.04D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.0D0, 0.04D0, -0.78D0, 0.0D0, 0.0D0, - + 0.0D0, 0.0D0, 0.0D0, 0.04D0, -0.78D0, 0.54D0, - + 0.08D0, 0.0D0, 0.0D0, 0.0D0, 0.5D0, 0.0D0, - + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.04D0, - + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.7D0, - + -0.9D0, -0.12D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.64D0, -0.9D0, -0.30D0, 0.7D0, -0.18D0, 0.2D0, - + 0.28D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.0D0, 0.04D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.0D0, 0.0D0, 0.7D0, -1.08D0, 0.0D0, - + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.64D0, -1.26D0, - + 0.54D0, 0.20D0, 0.0D0, 0.0D0, 0.0D0, 0.5D0, - + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.04D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, - + 0.0D0, 0.04D0, -0.9D0, 0.18D0, 0.0D0, 0.0D0, - + 0.0D0, 0.0D0, 0.04D0, -0.9D0, 0.18D0, 0.7D0, - + -0.18D0, 0.2D0, 0.16D0/ DATA DT10X/0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.5D0, -0.9D0, 0.0D0, 0.0D0, 0.0D0, @@ -375,6 +446,150 @@ + 0.0D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, + 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, + 1.17D0, 1.17D0, 1.17D0/ +* +* FOR DROTM +* + DATA DPAR/-2.D0, 0.D0,0.D0,0.D0,0.D0, + A -1.D0, 2.D0, -3.D0, -4.D0, 5.D0, + B 0.D0, 0.D0, 2.D0, -3.D0, 0.D0, + C 1.D0, 5.D0, 2.D0, 0.D0, -4.D0/ +* TRUE X RESULTS F0R ROTATIONS DROTM + DATA DT19XA/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + H .6D0, .1D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + I -.8D0, 3.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + J -.9D0, 2.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + K 3.5D0, -.4D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + L .6D0, .1D0, -.5D0, .8D0, 0.D0,0.D0,0.D0, + M -.8D0, 3.8D0, -2.2D0, -1.2D0, 0.D0,0.D0,0.D0, + N -.9D0, 2.8D0, -1.4D0, -1.3D0, 0.D0,0.D0,0.D0, + O 3.5D0, -.4D0, -2.2D0, 4.7D0, 0.D0,0.D0,0.D0/ +* + DATA DT19XB/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + H .6D0, .1D0, -.5D0, 0.D0,0.D0,0.D0,0.D0, + I 0.D0, .1D0, -3.0D0, 0.D0,0.D0,0.D0,0.D0, + J -.3D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, + K 3.3D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, + L .6D0, .1D0, -.5D0, .8D0, .9D0, -.3D0, -.4D0, + M -2.0D0, .1D0, 1.4D0, .8D0, .6D0, -.3D0, -2.8D0, + N -1.8D0, .1D0, 1.3D0, .8D0, 0.D0, -.3D0, -1.9D0, + O 3.8D0, .1D0, -3.1D0, .8D0, 4.8D0, -.3D0, -1.5D0 / +* + DATA DT19XC/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + H .6D0, .1D0, -.5D0, 0.D0,0.D0,0.D0,0.D0, + I 4.8D0, .1D0, -3.0D0, 0.D0,0.D0,0.D0,0.D0, + J 3.3D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, + K 2.1D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, + L .6D0, .1D0, -.5D0, .8D0, .9D0, -.3D0, -.4D0, + M -1.6D0, .1D0, -2.2D0, .8D0, 5.4D0, -.3D0, -2.8D0, + N -1.5D0, .1D0, -1.4D0, .8D0, 3.6D0, -.3D0, -1.9D0, + O 3.7D0, .1D0, -2.2D0, .8D0, 3.6D0, -.3D0, -1.5D0 / +* + DATA DT19XD/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + H .6D0, .1D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + I -.8D0, -1.0D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + J -.9D0, -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + K 3.5D0, .8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + L .6D0, .1D0, -.5D0, .8D0, 0.D0,0.D0,0.D0, + M -.8D0, -1.0D0, 1.4D0, -1.6D0, 0.D0,0.D0,0.D0, + N -.9D0, -.8D0, 1.3D0, -1.6D0, 0.D0,0.D0,0.D0, + O 3.5D0, .8D0, -3.1D0, 4.8D0, 0.D0,0.D0,0.D0/ +* TRUE Y RESULTS FOR ROTATIONS DROTM + DATA DT19YA/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + H .5D0, -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + I .7D0, -4.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + J 1.7D0, -.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + K -2.6D0, 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + L .5D0, -.9D0, .3D0, .7D0, 0.D0,0.D0,0.D0, + M .7D0, -4.8D0, 3.0D0, 1.1D0, 0.D0,0.D0,0.D0, + N 1.7D0, -.7D0, -.7D0, 2.3D0, 0.D0,0.D0,0.D0, + O -2.6D0, 3.5D0, -.7D0, -3.6D0, 0.D0,0.D0,0.D0/ +* + DATA DT19YB/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + H .5D0, -.9D0, .3D0, 0.D0,0.D0,0.D0,0.D0, + I 4.0D0, -.9D0, -.3D0, 0.D0,0.D0,0.D0,0.D0, + J -.5D0, -.9D0, 1.5D0, 0.D0,0.D0,0.D0,0.D0, + K -1.5D0, -.9D0, -1.8D0, 0.D0,0.D0,0.D0,0.D0, + L .5D0, -.9D0, .3D0, .7D0, -.6D0, .2D0, .8D0, + M 3.7D0, -.9D0, -1.2D0, .7D0, -1.5D0, .2D0, 2.2D0, + N -.3D0, -.9D0, 2.1D0, .7D0, -1.6D0, .2D0, 2.0D0, + O -1.6D0, -.9D0, -2.1D0, .7D0, 2.9D0, .2D0, -3.8D0 / +* + DATA DT19YC/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + H .5D0, -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + I 4.0D0, -6.3D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + J -.5D0, .3D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + K -1.5D0, 3.0D0, 0.D0,0.D0,0.D0,0.D0,0.D0, + L .5D0, -.9D0, .3D0, .7D0, 0.D0,0.D0,0.D0, + M 3.7D0, -7.2D0, 3.0D0, 1.7D0, 0.D0,0.D0,0.D0, + N -.3D0, .9D0, -.7D0, 1.9D0, 0.D0,0.D0,0.D0, + O -1.6D0, 2.7D0, -.7D0, -3.4D0, 0.D0,0.D0,0.D0/ +* + DATA DT19YD/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, + H .5D0, -.9D0, .3D0, 0.D0,0.D0,0.D0,0.D0, + I .7D0, -.9D0, 1.2D0, 0.D0,0.D0,0.D0,0.D0, + J 1.7D0, -.9D0, .5D0, 0.D0,0.D0,0.D0,0.D0, + K -2.6D0, -.9D0, -1.3D0, 0.D0,0.D0,0.D0,0.D0, + L .5D0, -.9D0, .3D0, .7D0, -.6D0, .2D0, .8D0, + M .7D0, -.9D0, 1.2D0, .7D0, -1.5D0, .2D0, 1.6D0, + N 1.7D0, -.9D0, .5D0, .7D0, -1.6D0, .2D0, 2.4D0, + O -2.6D0, -.9D0, -1.3D0, .7D0, 2.9D0, .2D0, -4.0D0 / +* * .. Executable Statements .. * DO 120 KI = 1, 4 @@ -421,6 +636,39 @@ 80 CONTINUE CALL STEST(LENX,SX,STX,SSIZE2(1,1),1.0D0) CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0D0) + ELSE IF (ICASE.EQ.12) THEN +* .. DROTM .. + KNI=KN+4*(KI-1) + DO KPAR=1,4 + DO I=1,7 + SX(I) = DX1(I) + SY(I) = DY1(I) + STX(I)= DT19X(I,KPAR,KNI) + STY(I)= DT19Y(I,KPAR,KNI) + END DO +* + DO I=1,5 + DTEMP(I) = DPAR(I,KPAR) + END DO +* + DO I=1,LENX + SSIZE(I)=STX(I) + END DO +* SEE REMARK ABOVE ABOUT DT11X(1,2,7) +* AND DT11X(5,3,8). + IF ((KPAR .EQ. 2) .AND. (KNI .EQ. 7)) + $ SSIZE(1) = 2.4D0 + IF ((KPAR .EQ. 3) .AND. (KNI .EQ. 8)) + $ SSIZE(5) = 1.8D0 +* + CALL DROTM(N,SX,INCX,SY,INCY,DTEMP) + CALL STEST(LENX,SX,STX,SSIZE,SFAC) + CALL STEST(LENY,SY,STY,STY,SFAC) + END DO + ELSE IF (ICASE.EQ.13) THEN +* .. DSDOT .. + CALL TESTDSDOT(REAL(DSDOT(N,REAL(SX),INCX,REAL(SY),INCY)), + $ REAL(DT7(KN,KI)),REAL(SSIZE1(KN)), .3125E-1) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' STOP @@ -436,10 +684,10 @@ * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. - DOUBLE PRECISION SA, SC, SS + DOUBLE PRECISION SC, SS INTEGER I, K, KI, KN, KSIZE, LENX, LENY, MX, MY * .. Local Arrays .. DOUBLE PRECISION COPYX(5), COPYY(5), DT9X(7,4,4), DT9Y(7,4,4), @@ -454,9 +702,8 @@ * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. - DATA SA/0.3D0/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ @@ -647,14 +894,15 @@ * * .. Parameters .. INTEGER NOUT - PARAMETER (NOUT=6) + DOUBLE PRECISION ZERO + PARAMETER (NOUT=6, ZERO=0.0D0) * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN * .. Array Arguments .. DOUBLE PRECISION SCOMP(LEN), SSIZE(LEN), STRUE(LEN) * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SD @@ -665,12 +913,12 @@ * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) - IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0D0) + IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). @@ -680,16 +928,64 @@ PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) - 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, I, SCOMP(I), + 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, I, SCOMP(I), + STRUE(I), SD, SSIZE(I) 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') -99998 FORMAT (/' CASE N INCX INCY MODE I ', +99998 FORMAT (/' CASE N INCX INCY I ', + + ' COMP(I) TRUE(I) DIFFERENCE', + + ' SIZE(I)',/1X) +99997 FORMAT (1X,I4,I3,2I5,I3,2D36.8,2D12.4) + END + SUBROUTINE TESTDSDOT(SCOMP,STRUE,SSIZE,SFAC) +* ********************************* STEST ************************** +* +* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO +* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE +* NEGLIGIBLE. +* +* C. L. LAWSON, JPL, 1974 DEC 10 +* +* .. Parameters .. + INTEGER NOUT + REAL ZERO + PARAMETER (NOUT=6, ZERO=0.0E0) +* .. Scalar Arguments .. + REAL SFAC, SCOMP, SSIZE, STRUE +* .. Scalars in Common .. + INTEGER ICASE, INCX, INCY, N + LOGICAL PASS +* .. Local Scalars .. + REAL SD +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. Common blocks .. + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS +* .. Executable Statements .. +* + SD = SCOMP - STRUE + IF (ABS(SFAC*SD) .LE. ABS(SSIZE) * EPSILON(ZERO)) + + GO TO 40 +* +* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). +* + IF ( .NOT. PASS) GO TO 20 +* PRINT FAIL MESSAGE AND HEADER. + PASS = .FALSE. + WRITE (NOUT,99999) + WRITE (NOUT,99998) + 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, SCOMP, + + STRUE, SD, SSIZE + 40 CONTINUE + RETURN +* +99999 FORMAT (' FAIL') +99998 FORMAT (/' CASE N INCX INCY ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) -99997 FORMAT (1X,I4,I3,3I5,I3,2D36.8,2D12.4) +99997 FORMAT (1X,I4,I3,1I5,I3,2E36.8,2E12.4) END SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) * ************************* STEST1 ***************************** @@ -739,12 +1035,12 @@ * .. Scalar Arguments .. INTEGER ICOMP, ITRUE * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER ID * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * IF (ICOMP.EQ.ITRUE) GO TO 40 @@ -757,13 +1053,13 @@ WRITE (NOUT,99999) WRITE (NOUT,99998) 20 ID = ICOMP - ITRUE - WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, ICOMP, ITRUE, ID + WRITE (NOUT,99997) ICASE, N, INCX, INCY, ICOMP, ITRUE, ID 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') -99998 FORMAT (/' CASE N INCX INCY MODE ', +99998 FORMAT (/' CASE N INCX INCY ', + ' COMP TRUE DIFFERENCE', + /1X) -99997 FORMAT (1X,I4,I3,3I5,2I36,I12) +99997 FORMAT (1X,I4,I3,2I5,2I36,I12) END diff -Nru atlas-3.10.2/interfaces/blas/F77/testing/sblat1.f atlas-3.10.3/interfaces/blas/F77/testing/sblat1.f --- atlas-3.10.2/interfaces/blas/F77/testing/sblat1.f 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/F77/testing/sblat1.f 2016-07-28 19:43:02.000000000 +0000 @@ -1,12 +1,54 @@ +*> \brief \b SBLAT1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM SBLAT1 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the REAL Level 1 BLAS. +*> +*> Based upon the original BLAS test routine together with: +*> F06EAF Example Program Text +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup single_blas_testing +* +* ===================================================================== PROGRAM SBLAT1 -* Test program for the REAL Level 1 BLAS. -* Based upon the original BLAS test routine together with: -* F06EAF Example Program Text +* +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* ===================================================================== +* * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SFAC @@ -14,31 +56,30 @@ * .. External Subroutines .. EXTERNAL CHECK0, CHECK1, CHECK2, CHECK3, HEADER * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SFAC/9.765625E-4/ * .. Executable Statements .. WRITE (NOUT,99999) - DO 20 IC = 1, 10 + DO 20 IC = 1, 13 ICASE = IC CALL HEADER * -* .. Initialize PASS, INCX, INCY, and MODE for a new case. .. -* .. the value 9999 for INCX, INCY or MODE will appear in the .. +* .. Initialize PASS, INCX, and INCY for a new case. .. +* .. the value 9999 for INCX or INCY will appear in the .. * .. detailed output, if any, for cases that do not involve .. * .. these parameters .. * PASS = .TRUE. INCX = 9999 INCY = 9999 - MODE = 9999 - IF (ICASE.EQ.3) THEN + IF (ICASE.EQ.3 .OR. ICASE.EQ.11) THEN CALL CHECK0(SFAC) ELSE IF (ICASE.EQ.7 .OR. ICASE.EQ.8 .OR. ICASE.EQ.9 .OR. + ICASE.EQ.10) THEN CALL CHECK1(SFAC) ELSE IF (ICASE.EQ.1 .OR. ICASE.EQ.2 .OR. ICASE.EQ.5 .OR. - + ICASE.EQ.6) THEN + + ICASE.EQ.6 .OR. ICASE.EQ.12 .OR. ICASE.EQ.13) THEN CALL CHECK2(SFAC) ELSE IF (ICASE.EQ.4) THEN CALL CHECK3(SFAC) @@ -56,12 +97,12 @@ INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Arrays .. - CHARACTER*6 L(10) + CHARACTER*6 L(13) * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA L(1)/' SDOT '/ DATA L(2)/'SAXPY '/ @@ -73,6 +114,9 @@ DATA L(8)/'SASUM '/ DATA L(9)/'SSCAL '/ DATA L(10)/'ISAMAX'/ + DATA L(11)/'SROTMG'/ + DATA L(12)/'SROTM '/ + DATA L(13)/'SDSDOT'/ * .. Executable Statements .. WRITE (NOUT,99999) ICASE, L(ICASE) RETURN @@ -86,18 +130,18 @@ * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL D12, SA, SB, SC, SS - INTEGER K + INTEGER I, K * .. Local Arrays .. REAL DA1(8), DATRUE(8), DB1(8), DBTRUE(8), DC1(8), - + DS1(8) + + DS1(8), DAB(4,9), DTEMP(9), DTRUE(9,9) * .. External Subroutines .. - EXTERNAL SROTG, STEST1 + EXTERNAL SROTG, SROTMG, STEST1 * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA DA1/0.3E0, 0.4E0, -0.3E0, -0.4E0, -0.3E0, 0.0E0, + 0.0E0, 1.0E0/ @@ -111,7 +155,52 @@ + 0.0E0, 1.0E0, 1.0E0/ DATA DBTRUE/0.0E0, 0.6E0, 0.0E0, -0.6E0, 0.0E0, + 0.0E0, 1.0E0, 0.0E0/ - DATA D12/4096.0E0/ +* INPUT FOR MODIFIED GIVENS + DATA DAB/ .1E0,.3E0,1.2E0,.2E0, + A .7E0, .2E0, .6E0, 4.2E0, + B 0.E0,0.E0,0.E0,0.E0, + C 4.E0, -1.E0, 2.E0, 4.E0, + D 6.E-10, 2.E-2, 1.E5, 10.E0, + E 4.E10, 2.E-2, 1.E-5, 10.E0, + F 2.E-10, 4.E-2, 1.E5, 10.E0, + G 2.E10, 4.E-2, 1.E-5, 10.E0, + H 4.E0, -2.E0, 8.E0, 4.E0 / +* TRUE RESULTS FOR MODIFIED GIVENS + DATA DTRUE/0.E0,0.E0, 1.3E0, .2E0, 0.E0,0.E0,0.E0, .5E0, 0.E0, + A 0.E0,0.E0, 4.5E0, 4.2E0, 1.E0, .5E0, 0.E0,0.E0,0.E0, + B 0.E0,0.E0,0.E0,0.E0, -2.E0, 0.E0,0.E0,0.E0,0.E0, + C 0.E0,0.E0,0.E0, 4.E0, -1.E0, 0.E0,0.E0,0.E0,0.E0, + D 0.E0, 15.E-3, 0.E0, 10.E0, -1.E0, 0.E0, -1.E-4, + E 0.E0, 1.E0, + F 0.E0,0.E0, 6144.E-5, 10.E0, -1.E0, 4096.E0, -1.E6, + G 0.E0, 1.E0, + H 0.E0,0.E0,15.E0,10.E0,-1.E0, 5.E-5, 0.E0,1.E0,0.E0, + I 0.E0,0.E0, 15.E0, 10.E0, -1. E0, 5.E5, -4096.E0, + J 1.E0, 4096.E-6, + K 0.E0,0.E0, 7.E0, 4.E0, 0.E0,0.E0, -.5E0, -.25E0, 0.E0/ +* 4096 = 2 ** 12 + DATA D12 /4096.E0/ + DTRUE(1,1) = 12.E0 / 130.E0 + DTRUE(2,1) = 36.E0 / 130.E0 + DTRUE(7,1) = -1.E0 / 6.E0 + DTRUE(1,2) = 14.E0 / 75.E0 + DTRUE(2,2) = 49.E0 / 75.E0 + DTRUE(9,2) = 1.E0 / 7.E0 + DTRUE(1,5) = 45.E-11 * (D12 * D12) + DTRUE(3,5) = 4.E5 / (3.E0 * D12) + DTRUE(6,5) = 1.E0 / D12 + DTRUE(8,5) = 1.E4 / (3.E0 * D12) + DTRUE(1,6) = 4.E10 / (1.5E0 * D12 * D12) + DTRUE(2,6) = 2.E-2 / 1.5E0 + DTRUE(8,6) = 5.E-7 * D12 + DTRUE(1,7) = 4.E0 / 150.E0 + DTRUE(2,7) = (2.E-10 / 1.5E0) * (D12 * D12) + DTRUE(7,7) = -DTRUE(6,5) + DTRUE(9,7) = 1.E4 / D12 + DTRUE(1,8) = DTRUE(1,7) + DTRUE(2,8) = 2.E10 / (1.5E0 * D12 * D12) + DTRUE(1,9) = 32.E0 / 7.E0 + DTRUE(2,9) = -16.E0 / 7.E0 * .. Executable Statements .. * * Compute true values which cannot be prestored @@ -134,6 +223,15 @@ CALL STEST1(SB,DBTRUE(K),DBTRUE(K),SFAC) CALL STEST1(SC,DC1(K),DC1(K),SFAC) CALL STEST1(SS,DS1(K),DS1(K),SFAC) + ELSEIF (ICASE.EQ.11) THEN +* .. SROTMG .. + DO I=1,4 + DTEMP(I)= DAB(I,K) + DTEMP(I+4) = 0.0 + END DO + DTEMP(9) = 0.0 + CALL SROTMG(DTEMP(1),DTEMP(2),DTEMP(3),DTEMP(4),DTEMP(5)) + CALL STEST(9,DTEMP,DTRUE(1,K),DTRUE(1,K),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK0' STOP @@ -148,7 +246,7 @@ * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER I, LEN, NP1 @@ -165,7 +263,7 @@ * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SA/0.3E0, -1.0E0, 0.0E0, 1.0E0, 0.3E0, 0.3E0, + 0.3E0, 0.3E0, 0.3E0, 0.3E0/ @@ -212,11 +310,11 @@ IF (ICASE.EQ.7) THEN * .. SNRM2 .. STEMP(1) = DTRUE1(NP1) - CALL STEST1(SNRM2(N,SX,INCX),STEMP,STEMP,SFAC) + CALL STEST1(SNRM2(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.8) THEN * .. SASUM .. STEMP(1) = DTRUE3(NP1) - CALL STEST1(SASUM(N,SX,INCX),STEMP,STEMP,SFAC) + CALL STEST1(SASUM(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.9) THEN * .. SSCAL .. CALL SSCAL(N,SA((INCX-1)*5+NP1),SX,INCX) @@ -242,27 +340,40 @@ * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. - REAL SA, SC, SS - INTEGER I, J, KI, KN, KSIZE, LENX, LENY, MX, MY + REAL SA + INTEGER I, J, KI, KN, KNI, KPAR, KSIZE, LENX, LENY, + $ MX, MY * .. Local Arrays .. REAL DT10X(7,4,4), DT10Y(7,4,4), DT7(4,4), - + DT8(7,4,4), DT9X(7,4,4), DT9Y(7,4,4), DX1(7), - + DY1(7), SSIZE1(4), SSIZE2(14,2), STX(7), STY(7), - + SX(7), SY(7) + $ DT8(7,4,4), DX1(7), + $ DY1(7), SSIZE1(4), SSIZE2(14,2), SSIZE3(4), + $ SSIZE(7), STX(7), STY(7), SX(7), SY(7), + $ DPAR(5,4), DT19X(7,4,16),DT19XA(7,4,4), + $ DT19XB(7,4,4), DT19XC(7,4,4),DT19XD(7,4,4), + $ DT19Y(7,4,16), DT19YA(7,4,4),DT19YB(7,4,4), + $ DT19YC(7,4,4), DT19YD(7,4,4), DTEMP(5), + $ ST7B(4,4) INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4) * .. External Functions .. - REAL SDOT - EXTERNAL SDOT + REAL SDOT, SDSDOT + EXTERNAL SDOT, SDSDOT * .. External Subroutines .. - EXTERNAL SAXPY, SCOPY, SSWAP, STEST, STEST1 + EXTERNAL SAXPY, SCOPY, SROTM, SSWAP, STEST, STEST1 * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. + EQUIVALENCE (DT19X(1,1,1),DT19XA(1,1,1)),(DT19X(1,1,5), + A DT19XB(1,1,1)),(DT19X(1,1,9),DT19XC(1,1,1)), + B (DT19X(1,1,13),DT19XD(1,1,1)) + EQUIVALENCE (DT19Y(1,1,1),DT19YA(1,1,1)),(DT19Y(1,1,5), + A DT19YB(1,1,1)),(DT19Y(1,1,9),DT19YC(1,1,1)), + B (DT19Y(1,1,13),DT19YD(1,1,1)) + DATA SA/0.3E0/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ @@ -272,10 +383,11 @@ + -0.4E0/ DATA DY1/0.5E0, -0.9E0, 0.3E0, 0.7E0, -0.6E0, 0.2E0, + 0.8E0/ - DATA SC, SS/0.8E0, 0.6E0/ DATA DT7/0.0E0, 0.30E0, 0.21E0, 0.62E0, 0.0E0, + 0.30E0, -0.07E0, 0.85E0, 0.0E0, 0.30E0, -0.79E0, + -0.74E0, 0.0E0, 0.30E0, 0.33E0, 1.27E0/ + DATA ST7B/ .1, .4, .31, .72, .1, .4, .03, .95, + + .1, .4, -.69, -.64, .1, .4, .43, 1.37/ DATA DT8/0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.68E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.68E0, -0.87E0, 0.0E0, 0.0E0, @@ -295,44 +407,6 @@ + 0.0E0, 0.68E0, -0.9E0, 0.33E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.68E0, -0.9E0, 0.33E0, 0.7E0, + -0.75E0, 0.2E0, 1.04E0/ - DATA DT9X/0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.78E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.0E0, 0.78E0, -0.46E0, 0.0E0, 0.0E0, - + 0.0E0, 0.0E0, 0.0E0, 0.78E0, -0.46E0, -0.22E0, - + 1.06E0, 0.0E0, 0.0E0, 0.0E0, 0.6E0, 0.0E0, - + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.78E0, - + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.66E0, 0.1E0, -0.1E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.96E0, 0.1E0, -0.76E0, 0.8E0, 0.90E0, - + -0.3E0, -0.02E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.0E0, 0.0E0, 0.78E0, 0.0E0, 0.0E0, - + 0.0E0, 0.0E0, 0.0E0, 0.0E0, -0.06E0, 0.1E0, - + -0.1E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.90E0, - + 0.1E0, -0.22E0, 0.8E0, 0.18E0, -0.3E0, -0.02E0, - + 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.78E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.78E0, 0.26E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.0E0, 0.78E0, 0.26E0, -0.76E0, 1.12E0, - + 0.0E0, 0.0E0, 0.0E0/ - DATA DT9Y/0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.04E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.0E0, 0.04E0, -0.78E0, 0.0E0, 0.0E0, - + 0.0E0, 0.0E0, 0.0E0, 0.04E0, -0.78E0, 0.54E0, - + 0.08E0, 0.0E0, 0.0E0, 0.0E0, 0.5E0, 0.0E0, - + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.04E0, - + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.7E0, - + -0.9E0, -0.12E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.64E0, -0.9E0, -0.30E0, 0.7E0, -0.18E0, 0.2E0, - + 0.28E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.0E0, 0.04E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.0E0, 0.0E0, 0.7E0, -1.08E0, 0.0E0, - + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.64E0, -1.26E0, - + 0.54E0, 0.20E0, 0.0E0, 0.0E0, 0.0E0, 0.5E0, - + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.04E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, - + 0.0E0, 0.04E0, -0.9E0, 0.18E0, 0.0E0, 0.0E0, - + 0.0E0, 0.0E0, 0.04E0, -0.9E0, 0.18E0, 0.7E0, - + -0.18E0, 0.2E0, 0.16E0/ DATA DT10X/0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.5E0, -0.9E0, 0.0E0, 0.0E0, 0.0E0, @@ -375,6 +449,151 @@ + 0.0E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, + 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, + 1.17E0, 1.17E0, 1.17E0/ + DATA SSIZE3/ .1, .4, 1.7, 3.3 / +* +* FOR DROTM +* + DATA DPAR/-2.E0, 0.E0,0.E0,0.E0,0.E0, + A -1.E0, 2.E0, -3.E0, -4.E0, 5.E0, + B 0.E0, 0.E0, 2.E0, -3.E0, 0.E0, + C 1.E0, 5.E0, 2.E0, 0.E0, -4.E0/ +* TRUE X RESULTS F0R ROTATIONS DROTM + DATA DT19XA/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + H .6E0, .1E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + I -.8E0, 3.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + J -.9E0, 2.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + K 3.5E0, -.4E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + L .6E0, .1E0, -.5E0, .8E0, 0.E0,0.E0,0.E0, + M -.8E0, 3.8E0, -2.2E0, -1.2E0, 0.E0,0.E0,0.E0, + N -.9E0, 2.8E0, -1.4E0, -1.3E0, 0.E0,0.E0,0.E0, + O 3.5E0, -.4E0, -2.2E0, 4.7E0, 0.E0,0.E0,0.E0/ +* + DATA DT19XB/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + H .6E0, .1E0, -.5E0, 0.E0,0.E0,0.E0,0.E0, + I 0.E0, .1E0, -3.0E0, 0.E0,0.E0,0.E0,0.E0, + J -.3E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, + K 3.3E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, + L .6E0, .1E0, -.5E0, .8E0, .9E0, -.3E0, -.4E0, + M -2.0E0, .1E0, 1.4E0, .8E0, .6E0, -.3E0, -2.8E0, + N -1.8E0, .1E0, 1.3E0, .8E0, 0.E0, -.3E0, -1.9E0, + O 3.8E0, .1E0, -3.1E0, .8E0, 4.8E0, -.3E0, -1.5E0 / +* + DATA DT19XC/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + H .6E0, .1E0, -.5E0, 0.E0,0.E0,0.E0,0.E0, + I 4.8E0, .1E0, -3.0E0, 0.E0,0.E0,0.E0,0.E0, + J 3.3E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, + K 2.1E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, + L .6E0, .1E0, -.5E0, .8E0, .9E0, -.3E0, -.4E0, + M -1.6E0, .1E0, -2.2E0, .8E0, 5.4E0, -.3E0, -2.8E0, + N -1.5E0, .1E0, -1.4E0, .8E0, 3.6E0, -.3E0, -1.9E0, + O 3.7E0, .1E0, -2.2E0, .8E0, 3.6E0, -.3E0, -1.5E0 / +* + DATA DT19XD/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + H .6E0, .1E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + I -.8E0, -1.0E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + J -.9E0, -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + K 3.5E0, .8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + L .6E0, .1E0, -.5E0, .8E0, 0.E0,0.E0,0.E0, + M -.8E0, -1.0E0, 1.4E0, -1.6E0, 0.E0,0.E0,0.E0, + N -.9E0, -.8E0, 1.3E0, -1.6E0, 0.E0,0.E0,0.E0, + O 3.5E0, .8E0, -3.1E0, 4.8E0, 0.E0,0.E0,0.E0/ +* TRUE Y RESULTS FOR ROTATIONS DROTM + DATA DT19YA/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + H .5E0, -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + I .7E0, -4.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + J 1.7E0, -.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + K -2.6E0, 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + L .5E0, -.9E0, .3E0, .7E0, 0.E0,0.E0,0.E0, + M .7E0, -4.8E0, 3.0E0, 1.1E0, 0.E0,0.E0,0.E0, + N 1.7E0, -.7E0, -.7E0, 2.3E0, 0.E0,0.E0,0.E0, + O -2.6E0, 3.5E0, -.7E0, -3.6E0, 0.E0,0.E0,0.E0/ +* + DATA DT19YB/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + H .5E0, -.9E0, .3E0, 0.E0,0.E0,0.E0,0.E0, + I 4.0E0, -.9E0, -.3E0, 0.E0,0.E0,0.E0,0.E0, + J -.5E0, -.9E0, 1.5E0, 0.E0,0.E0,0.E0,0.E0, + K -1.5E0, -.9E0, -1.8E0, 0.E0,0.E0,0.E0,0.E0, + L .5E0, -.9E0, .3E0, .7E0, -.6E0, .2E0, .8E0, + M 3.7E0, -.9E0, -1.2E0, .7E0, -1.5E0, .2E0, 2.2E0, + N -.3E0, -.9E0, 2.1E0, .7E0, -1.6E0, .2E0, 2.0E0, + O -1.6E0, -.9E0, -2.1E0, .7E0, 2.9E0, .2E0, -3.8E0 / +* + DATA DT19YC/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + H .5E0, -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + I 4.0E0, -6.3E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + J -.5E0, .3E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + K -1.5E0, 3.0E0, 0.E0,0.E0,0.E0,0.E0,0.E0, + L .5E0, -.9E0, .3E0, .7E0, 0.E0,0.E0,0.E0, + M 3.7E0, -7.2E0, 3.0E0, 1.7E0, 0.E0,0.E0,0.E0, + N -.3E0, .9E0, -.7E0, 1.9E0, 0.E0,0.E0,0.E0, + O -1.6E0, 2.7E0, -.7E0, -3.4E0, 0.E0,0.E0,0.E0/ +* + DATA DT19YD/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, + H .5E0, -.9E0, .3E0, 0.E0,0.E0,0.E0,0.E0, + I .7E0, -.9E0, 1.2E0, 0.E0,0.E0,0.E0,0.E0, + J 1.7E0, -.9E0, .5E0, 0.E0,0.E0,0.E0,0.E0, + K -2.6E0, -.9E0, -1.3E0, 0.E0,0.E0,0.E0,0.E0, + L .5E0, -.9E0, .3E0, .7E0, -.6E0, .2E0, .8E0, + M .7E0, -.9E0, 1.2E0, .7E0, -1.5E0, .2E0, 1.6E0, + N 1.7E0, -.9E0, .5E0, .7E0, -1.6E0, .2E0, 2.4E0, + O -2.6E0, -.9E0, -1.3E0, .7E0, 2.9E0, .2E0, -4.0E0 / +* * .. Executable Statements .. * DO 120 KI = 1, 4 @@ -421,6 +640,39 @@ 80 CONTINUE CALL STEST(LENX,SX,STX,SSIZE2(1,1),1.0E0) CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0E0) + ELSEIF (ICASE.EQ.12) THEN +* .. SROTM .. + KNI=KN+4*(KI-1) + DO KPAR=1,4 + DO I=1,7 + SX(I) = DX1(I) + SY(I) = DY1(I) + STX(I)= DT19X(I,KPAR,KNI) + STY(I)= DT19Y(I,KPAR,KNI) + END DO +* + DO I=1,5 + DTEMP(I) = DPAR(I,KPAR) + END DO +* + DO I=1,LENX + SSIZE(I)=STX(I) + END DO +* SEE REMARK ABOVE ABOUT DT11X(1,2,7) +* AND DT11X(5,3,8). + IF ((KPAR .EQ. 2) .AND. (KNI .EQ. 7)) + $ SSIZE(1) = 2.4E0 + IF ((KPAR .EQ. 3) .AND. (KNI .EQ. 8)) + $ SSIZE(5) = 1.8E0 +* + CALL SROTM(N,SX,INCX,SY,INCY,DTEMP) + CALL STEST(LENX,SX,STX,SSIZE,SFAC) + CALL STEST(LENY,SY,STY,STY,SFAC) + END DO + ELSEIF (ICASE.EQ.13) THEN +* .. SDSROT .. + CALL STEST1 (SDSDOT(N,.1,SX,INCX,SY,INCY), + $ ST7B(KN,KI),SSIZE3(KN),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' STOP @@ -436,10 +688,10 @@ * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. - REAL SA, SC, SS + REAL SC, SS INTEGER I, K, KI, KN, KSIZE, LENX, LENY, MX, MY * .. Local Arrays .. REAL COPYX(5), COPYY(5), DT9X(7,4,4), DT9Y(7,4,4), @@ -454,9 +706,8 @@ * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. - DATA SA/0.3E0/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ @@ -647,14 +898,15 @@ * * .. Parameters .. INTEGER NOUT - PARAMETER (NOUT=6) + REAL ZERO + PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC INTEGER LEN * .. Array Arguments .. REAL SCOMP(LEN), SSIZE(LEN), STRUE(LEN) * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SD @@ -665,12 +917,12 @@ * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) - IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0E0) + IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). @@ -680,16 +932,16 @@ PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) - 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, I, SCOMP(I), + 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, I, SCOMP(I), + STRUE(I), SD, SSIZE(I) 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') -99998 FORMAT (/' CASE N INCX INCY MODE I ', +99998 FORMAT (/' CASE N INCX INCY I ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) -99997 FORMAT (1X,I4,I3,3I5,I3,2E36.8,2E12.4) +99997 FORMAT (1X,I4,I3,2I5,I3,2E36.8,2E12.4) END SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) * ************************* STEST1 ***************************** @@ -739,12 +991,12 @@ * .. Scalar Arguments .. INTEGER ICOMP, ITRUE * .. Scalars in Common .. - INTEGER ICASE, INCX, INCY, MODE, N + INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER ID * .. Common blocks .. - COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS + COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * IF (ICOMP.EQ.ITRUE) GO TO 40 @@ -757,13 +1009,13 @@ WRITE (NOUT,99999) WRITE (NOUT,99998) 20 ID = ICOMP - ITRUE - WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, ICOMP, ITRUE, ID + WRITE (NOUT,99997) ICASE, N, INCX, INCY, ICOMP, ITRUE, ID 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') -99998 FORMAT (/' CASE N INCX INCY MODE ', +99998 FORMAT (/' CASE N INCX INCY ', + ' COMP TRUE DIFFERENCE', + /1X) -99997 FORMAT (1X,I4,I3,3I5,2I36,I12) +99997 FORMAT (1X,I4,I3,2I5,2I36,I12) END diff -Nru atlas-3.10.2/interfaces/blas/F77/testing/zblat1.f atlas-3.10.3/interfaces/blas/F77/testing/zblat1.f --- atlas-3.10.2/interfaces/blas/F77/testing/zblat1.f 2014-07-10 16:22:03.000000000 +0000 +++ atlas-3.10.3/interfaces/blas/F77/testing/zblat1.f 2016-07-28 19:43:02.000000000 +0000 @@ -1,7 +1,49 @@ +*> \brief \b ZBLAT1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZBLAT1 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX*16 Level 1 BLAS. +*> +*> Based upon the original BLAS test routine together with: +*> F06GAF Example Program Text +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16_blas_testing +* +* ===================================================================== PROGRAM ZBLAT1 -* Test program for the COMPLEX*16 Level 1 BLAS. -* Based upon the original BLAS test routine together with: -* F06GAF Example Program Text +* +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* ===================================================================== +* * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) @@ -114,8 +156,8 @@ + (5.0D0,6.0D0), (5.0D0,6.0D0), (0.1D0,0.1D0), + (-0.6D0,0.1D0), (0.1D0,-0.3D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (7.0D0,8.0D0), (0.3D0,0.1D0), (0.1D0,0.4D0), - + (0.4D0,0.1D0), (0.1D0,0.2D0), (2.0D0,3.0D0), + + (7.0D0,8.0D0), (0.3D0,0.1D0), (0.5D0,0.0D0), + + (0.0D0,0.5D0), (0.0D0,0.2D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), @@ -129,10 +171,10 @@ + (3.0D0,6.0D0), (-0.6D0,0.1D0), (4.0D0,7.0D0), + (0.1D0,-0.3D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.3D0,0.1D0), (5.0D0,8.0D0), - + (0.1D0,0.4D0), (6.0D0,9.0D0), (0.4D0,0.1D0), - + (8.0D0,3.0D0), (0.1D0,0.2D0), (9.0D0,4.0D0)/ - DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.7D0/ - DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.7D0/ + + (0.5D0,0.0D0), (6.0D0,9.0D0), (0.0D0,0.5D0), + + (8.0D0,3.0D0), (0.0D0,0.2D0), (9.0D0,4.0D0)/ + DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.8D0/ + DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.6D0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), @@ -145,8 +187,8 @@ + (0.11D0,-0.03D0), (-0.17D0,0.46D0), + (-0.17D0,-0.19D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (0.19D0,-0.17D0), (0.32D0,0.09D0), - + (0.23D0,-0.24D0), (0.18D0,0.01D0), + + (0.19D0,-0.17D0), (0.20D0,-0.35D0), + + (0.35D0,0.20D0), (0.14D0,0.08D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0), + (2.0D0,3.0D0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), @@ -162,9 +204,9 @@ + (-0.17D0,0.46D0), (4.0D0,7.0D0), + (-0.17D0,-0.19D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.19D0,-0.17D0), (5.0D0,8.0D0), - + (0.32D0,0.09D0), (6.0D0,9.0D0), - + (0.23D0,-0.24D0), (8.0D0,3.0D0), - + (0.18D0,0.01D0), (9.0D0,4.0D0)/ + + (0.20D0,-0.35D0), (6.0D0,9.0D0), + + (0.35D0,0.20D0), (8.0D0,3.0D0), + + (0.14D0,0.08D0), (9.0D0,4.0D0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), @@ -177,8 +219,8 @@ + (0.03D0,0.03D0), (-0.18D0,0.03D0), + (0.03D0,-0.09D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (0.09D0,0.03D0), (0.03D0,0.12D0), - + (0.12D0,0.03D0), (0.03D0,0.06D0), (2.0D0,3.0D0), + + (0.09D0,0.03D0), (0.15D0,0.00D0), + + (0.00D0,0.15D0), (0.00D0,0.06D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), @@ -193,8 +235,8 @@ + (-0.18D0,0.03D0), (4.0D0,7.0D0), + (0.03D0,-0.09D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.09D0,0.03D0), (5.0D0,8.0D0), - + (0.03D0,0.12D0), (6.0D0,9.0D0), (0.12D0,0.03D0), - + (8.0D0,3.0D0), (0.03D0,0.06D0), (9.0D0,4.0D0)/ + + (0.15D0,0.00D0), (6.0D0,9.0D0), (0.00D0,0.15D0), + + (8.0D0,3.0D0), (0.00D0,0.06D0), (9.0D0,4.0D0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 @@ -529,7 +571,8 @@ * * .. Parameters .. INTEGER NOUT - PARAMETER (NOUT=6) + DOUBLE PRECISION ZERO + PARAMETER (NOUT=6, ZERO=0.0D0) * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN @@ -552,7 +595,7 @@ * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) - IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0D0) + IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cgelqf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cgelqf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cgelqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cgelqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cgels.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cgels.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cgels.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cgels.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cgeqlf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cgeqlf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cgeqlf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cgeqlf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cgeqrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cgeqrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cgeqrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cgeqrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cgerqf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cgerqf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cgerqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cgerqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cgesv.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cgesv.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cgesv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cgesv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cgetrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cgetrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cgetrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cgetrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cgetri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cgetri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cgetri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cgetri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cgetrs.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cgetrs.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cgetrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cgetrs.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_clauum.c atlas-3.10.3/interfaces/lapack/C/src/clapack_clauum.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_clauum.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_clauum.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cposv.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cposv.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cposv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cposv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cpotrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cpotrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cpotrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cpotrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cpotri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cpotri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cpotri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cpotri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_cpotrs.c atlas-3.10.3/interfaces/lapack/C/src/clapack_cpotrs.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_cpotrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_cpotrs.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_ctrtri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_ctrtri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_ctrtri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_ctrtri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Peter Soendergaard * * Code contributers : Peter Soendergaard, R. Clint Whaley diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dgelqf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dgelqf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dgelqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dgelqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dgels.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dgels.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dgels.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dgels.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dgeqlf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dgeqlf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dgeqlf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dgeqlf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dgeqrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dgeqrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dgeqrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dgeqrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dgerqf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dgerqf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dgerqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dgerqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dgesv.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dgesv.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dgesv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dgesv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dgetrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dgetrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dgetrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dgetrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dgetri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dgetri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dgetri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dgetri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dgetrs.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dgetrs.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dgetrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dgetrs.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dlamch.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dlamch.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dlamch.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dlamch.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -28,7 +28,7 @@ * */ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dlauum.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dlauum.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dlauum.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dlauum.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dposv.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dposv.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dposv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dposv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dpotrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dpotrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dpotrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dpotrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dpotri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dpotri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dpotri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dpotri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dpotrs.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dpotrs.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dpotrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dpotrs.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_dtrtri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_dtrtri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_dtrtri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_dtrtri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Peter Soendergaard * * Code contributers : Peter Soendergaard, R. Clint Whaley diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_ilaenv.c atlas-3.10.3/interfaces/lapack/C/src/clapack_ilaenv.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_ilaenv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_ilaenv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_sgelqf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_sgelqf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_sgelqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_sgelqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_sgels.c atlas-3.10.3/interfaces/lapack/C/src/clapack_sgels.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_sgels.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_sgels.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_sgeqlf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_sgeqlf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_sgeqlf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_sgeqlf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_sgeqrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_sgeqrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_sgeqrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_sgeqrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_sgerqf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_sgerqf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_sgerqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_sgerqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_sgesv.c atlas-3.10.3/interfaces/lapack/C/src/clapack_sgesv.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_sgesv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_sgesv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_sgetrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_sgetrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_sgetrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_sgetrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_sgetri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_sgetri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_sgetri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_sgetri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_sgetrs.c atlas-3.10.3/interfaces/lapack/C/src/clapack_sgetrs.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_sgetrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_sgetrs.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_slamch.c atlas-3.10.3/interfaces/lapack/C/src/clapack_slamch.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_slamch.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_slamch.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -28,7 +28,7 @@ * */ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_slauum.c atlas-3.10.3/interfaces/lapack/C/src/clapack_slauum.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_slauum.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_slauum.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_sposv.c atlas-3.10.3/interfaces/lapack/C/src/clapack_sposv.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_sposv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_sposv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_spotrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_spotrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_spotrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_spotrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_spotri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_spotri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_spotri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_spotri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_spotrs.c atlas-3.10.3/interfaces/lapack/C/src/clapack_spotrs.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_spotrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_spotrs.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_strtri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_strtri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_strtri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_strtri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Peter Soendergaard * * Code contributers : Peter Soendergaard, R. Clint Whaley diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zgelqf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zgelqf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zgelqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zgelqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zgels.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zgels.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zgels.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zgels.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zgeqlf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zgeqlf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zgeqlf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zgeqlf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zgeqrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zgeqrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zgeqrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zgeqrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zgerqf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zgerqf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zgerqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zgerqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2014, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zgesv.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zgesv.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zgesv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zgesv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zgetrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zgetrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zgetrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zgetrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zgetri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zgetri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zgetri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zgetri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zgetrs.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zgetrs.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zgetrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zgetrs.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zlauum.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zlauum.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zlauum.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zlauum.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zposv.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zposv.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zposv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zposv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zpotrf.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zpotrf.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zpotrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zpotrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zpotri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zpotri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zpotri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zpotri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_zpotrs.c atlas-3.10.3/interfaces/lapack/C/src/clapack_zpotrs.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_zpotrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_zpotrs.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C/src/clapack_ztrtri.c atlas-3.10.3/interfaces/lapack/C/src/clapack_ztrtri.c --- atlas-3.10.2/interfaces/lapack/C/src/clapack_ztrtri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C/src/clapack_ztrtri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Peter Soendergaard * * Code contributers : Peter Soendergaard, R. Clint Whaley diff -Nru atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Fgels.c atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Fgels.c --- atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Fgels.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Fgels.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Formlq.c atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Formlq.c --- atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Formlq.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Formlq.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Formql.c atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Formql.c --- atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Formql.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Formql.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Formqr.c atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Formqr.c --- atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Formqr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Formqr.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Formrq.c atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Formrq.c --- atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Formrq.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Formrq.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Funmlq.c atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Funmlq.c --- atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Funmlq.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Funmlq.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Funmql.c atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Funmql.c --- atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Funmql.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Funmql.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Funmqr.c atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Funmqr.c --- atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Funmqr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Funmqr.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Funmrq.c atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Funmrq.c --- atlas-3.10.2/interfaces/lapack/C2F/src/ATL_C2Funmrq.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/C2F/src/ATL_C2Funmrq.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gelqf.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gelqf.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gelqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gelqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gels.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gels.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gels.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gels.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_geqlf.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_geqlf.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_geqlf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_geqlf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_geqrf.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_geqrf.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_geqrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_geqrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gerqf.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gerqf.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gerqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gerqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gesv.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gesv.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gesv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_gesv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getnb.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getnb.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getnb.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getnb.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getrf.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getrf.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getri.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getri.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getrs.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getrs.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_getrs.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_ilaenv.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_ilaenv.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_ilaenv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_ilaenv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_larfb.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_larfb.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_larfb.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_larfb.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_larft.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_larft.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_larft.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_larft.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_lauum.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_lauum.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_lauum.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_lauum.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_posv.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_posv.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_posv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_posv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potrf.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potrf.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potri.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potri.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potrs.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potrs.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_potrs.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_trtri.c atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_trtri.c --- atlas-3.10.2/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_trtri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/interfaces/lapack/F77/src/f77wrap/ATL_f77wrap_trtri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/makes/Make.lib atlas-3.10.3/makes/Make.lib --- atlas-3.10.2/makes/Make.lib 2014-07-10 16:22:01.000000000 +0000 +++ atlas-3.10.3/makes/Make.lib 2016-07-28 19:43:00.000000000 +0000 @@ -9,7 +9,7 @@ CDYNlibs = liblapack.so libcblas.so libatlas.so CPTDYNlibs = liblapack.so libptcblas.so libatlas.so -VER=3.10.2 +VER=3.10.3 tmpd = RCW_tMp tarnam = atlas$(VER)_$(ARCH) tar : tarfile @@ -51,7 +51,7 @@ --whole-archive $(libas) --no-whole-archive $(LIBS) GCCTRY_WIN: $(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \ - -Wl,"rpath-link $(LIBINSTdir)" \ + -Wl,"-rpath-link $(LIBINSTdir)" \ -Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS) GCCTRY_norp_WIN: $(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \ @@ -117,7 +117,7 @@ --whole-archive $(libas) --no-whole-archive $(LIBS) GCCTRY: $(GOODGCC) -shared -o $(outso) \ - -Wl,"rpath-link $(LIBINSTdir)" \ + -Wl,"-rpath-link $(LIBINSTdir)" \ -Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS) GCCTRY_norp: $(GOODGCC) -shared -o $(outso) \ diff -Nru atlas-3.10.2/makes/Make.sysinfo atlas-3.10.3/makes/Make.sysinfo --- atlas-3.10.2/makes/Make.sysinfo 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/makes/Make.sysinfo 2016-07-28 19:43:00.000000000 +0000 @@ -81,7 +81,8 @@ ./xmasrch -p $(pre) -o res/$(pre)MULADD RunLamch : xemit_lamch - $(ATLRUN) $(SYSdir) xemit_lamch $(INCAdir) + $(ATLRUN) $(SYSdir) xemit_lamch + cp res/atlas_?lamch.h $(INCAdir)/. RunTyp: xemit_typ $(ATLRUN) $(SYSdir) xemit_typ > $(INCAdir)/atlas_type.h diff -Nru atlas-3.10.2/makes/Make.ttune atlas-3.10.3/makes/Make.ttune --- atlas-3.10.2/makes/Make.ttune 2014-07-10 16:22:02.000000000 +0000 +++ atlas-3.10.3/makes/Make.ttune 2016-07-28 19:43:00.000000000 +0000 @@ -43,9 +43,11 @@ IRun_nthr : IRun_aff xprobe_nthr rm -f $(INCAdir)/atlas_pthreads.h - ./xprobe_nthr -o $(INCAdir)/atlas_pthreads.h + ./xprobe_nthr -o res/atlas_pthreads.h + mv res/atlas_pthreads.h $(INCAdir)/atlas_pthreads.h ITune_aff : IRun_nthr xtune_aff - ./xtune_aff -o $(INCAdir)/atlas_taffinity.h + ./xtune_aff -o res/atlas_taffinity.h + mv res/atlas_taffinity.h $(INCAdir)/. tlib : cd $(BLDdir)/src/threads ; $(MAKE) xprobe_aff : probe_aff.o atlconf_misc.o diff -Nru atlas-3.10.2/src/auxil/ATL_axpby.c atlas-3.10.3/src/auxil/ATL_axpby.c --- atlas-3.10.2/src/auxil/ATL_axpby.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_axpby.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_axpy.c atlas-3.10.3/src/auxil/ATL_axpy.c --- atlas-3.10.2/src/auxil/ATL_axpy.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_axpy.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_copy.c atlas-3.10.3/src/auxil/ATL_copy.c --- atlas-3.10.2/src/auxil/ATL_copy.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_copy.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_cplxdivide.c atlas-3.10.3/src/auxil/ATL_cplxdivide.c --- atlas-3.10.2/src/auxil/ATL_cplxdivide.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_cplxdivide.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_cplxinvert.c atlas-3.10.3/src/auxil/ATL_cplxinvert.c --- atlas-3.10.2/src/auxil/ATL_cplxinvert.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_cplxinvert.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_flushCacheByAddr.c atlas-3.10.3/src/auxil/ATL_flushCacheByAddr.c --- atlas-3.10.2/src/auxil/ATL_flushCacheByAddr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_flushCacheByAddr.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_flushcache.c atlas-3.10.3/src/auxil/ATL_flushcache.c --- atlas-3.10.2/src/auxil/ATL_flushcache.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_flushcache.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_geadd.c atlas-3.10.3/src/auxil/ATL_geadd.c --- atlas-3.10.2/src/auxil/ATL_geadd.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_geadd.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_geApBt_NB.c atlas-3.10.3/src/auxil/ATL_geApBt_NB.c --- atlas-3.10.2/src/auxil/ATL_geApBt_NB.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_geApBt_NB.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_gecollapse.c atlas-3.10.3/src/auxil/ATL_gecollapse.c --- atlas-3.10.2/src/auxil/ATL_gecollapse.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_gecollapse.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_gecopy.c atlas-3.10.3/src/auxil/ATL_gecopy.c --- atlas-3.10.2/src/auxil/ATL_gecopy.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_gecopy.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_gemaxnrm.c atlas-3.10.3/src/auxil/ATL_gemaxnrm.c --- atlas-3.10.2/src/auxil/ATL_gemaxnrm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_gemaxnrm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_gemove.c atlas-3.10.3/src/auxil/ATL_gemove.c --- atlas-3.10.2/src/auxil/ATL_gemove.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_gemove.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_gemoveT.c atlas-3.10.3/src/auxil/ATL_gemoveT.c --- atlas-3.10.2/src/auxil/ATL_gemoveT.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_gemoveT.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_gescal.c atlas-3.10.3/src/auxil/ATL_gescal.c --- atlas-3.10.2/src/auxil/ATL_gescal.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_gescal.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_geset.c atlas-3.10.3/src/auxil/ATL_geset.c --- atlas-3.10.2/src/auxil/ATL_geset.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_geset.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011, 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_geswapT.c atlas-3.10.3/src/auxil/ATL_geswapT.c --- atlas-3.10.2/src/auxil/ATL_geswapT.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_geswapT.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_gezero.c atlas-3.10.3/src/auxil/ATL_gezero.c --- atlas-3.10.2/src/auxil/ATL_gezero.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_gezero.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_hereflect.c atlas-3.10.3/src/auxil/ATL_hereflect.c --- atlas-3.10.2/src/auxil/ATL_hereflect.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_hereflect.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_hescal.c atlas-3.10.3/src/auxil/ATL_hescal.c --- atlas-3.10.2/src/auxil/ATL_hescal.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_hescal.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_lcm.c atlas-3.10.3/src/auxil/ATL_lcm.c --- atlas-3.10.2/src/auxil/ATL_lcm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_lcm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/src/auxil/ATL_move.c atlas-3.10.3/src/auxil/ATL_move.c --- atlas-3.10.2/src/auxil/ATL_move.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_move.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_ptflushcache.c atlas-3.10.3/src/auxil/ATL_ptflushcache.c --- atlas-3.10.2/src/auxil/ATL_ptflushcache.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_ptflushcache.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_scal.c atlas-3.10.3/src/auxil/ATL_scal.c --- atlas-3.10.2/src/auxil/ATL_scal.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_scal.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_set.c atlas-3.10.3/src/auxil/ATL_set.c --- atlas-3.10.2/src/auxil/ATL_set.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_set.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_sqtrans.c atlas-3.10.3/src/auxil/ATL_sqtrans.c --- atlas-3.10.2/src/auxil/ATL_sqtrans.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_sqtrans.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_syApAt.c atlas-3.10.3/src/auxil/ATL_syApAt.c --- atlas-3.10.2/src/auxil/ATL_syApAt.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_syApAt.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_syApAt_NB.c atlas-3.10.3/src/auxil/ATL_syApAt_NB.c --- atlas-3.10.2/src/auxil/ATL_syApAt_NB.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_syApAt_NB.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_syreflect.c atlas-3.10.3/src/auxil/ATL_syreflect.c --- atlas-3.10.2/src/auxil/ATL_syreflect.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_syreflect.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_tradd.c atlas-3.10.3/src/auxil/ATL_tradd.c --- atlas-3.10.2/src/auxil/ATL_tradd.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_tradd.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_trcollapse.c atlas-3.10.3/src/auxil/ATL_trcollapse.c --- atlas-3.10.2/src/auxil/ATL_trcollapse.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_trcollapse.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_trscal.c atlas-3.10.3/src/auxil/ATL_trscal.c --- atlas-3.10.2/src/auxil/ATL_trscal.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_trscal.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_trsetL.c atlas-3.10.3/src/auxil/ATL_trsetL.c --- atlas-3.10.2/src/auxil/ATL_trsetL.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_trsetL.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_trsetU.c atlas-3.10.3/src/auxil/ATL_trsetU.c --- atlas-3.10.2/src/auxil/ATL_trsetU.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_trsetU.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_xerbla.c atlas-3.10.3/src/auxil/ATL_xerbla.c --- atlas-3.10.2/src/auxil/ATL_xerbla.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_xerbla.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/ATL_zero.c atlas-3.10.3/src/auxil/ATL_zero.c --- atlas-3.10.2/src/auxil/ATL_zero.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/ATL_zero.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/kernel/ATL_axpby.c atlas-3.10.3/src/auxil/kernel/ATL_axpby.c --- atlas-3.10.2/src/auxil/kernel/ATL_axpby.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/kernel/ATL_axpby.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/kernel/ATL_axpy_x1_y1.c atlas-3.10.3/src/auxil/kernel/ATL_axpy_x1_y1.c --- atlas-3.10.2/src/auxil/kernel/ATL_axpy_x1_y1.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/kernel/ATL_axpy_x1_y1.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/kernel/ATL_caxpy_x1_y1.c atlas-3.10.3/src/auxil/kernel/ATL_caxpy_x1_y1.c --- atlas-3.10.2/src/auxil/kernel/ATL_caxpy_x1_y1.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/kernel/ATL_caxpy_x1_y1.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/kernel/ATL_geadd.c atlas-3.10.3/src/auxil/kernel/ATL_geadd.c --- atlas-3.10.2/src/auxil/kernel/ATL_geadd.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/kernel/ATL_geadd.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/kernel/ATL_gemove.c atlas-3.10.3/src/auxil/kernel/ATL_gemove.c --- atlas-3.10.2/src/auxil/kernel/ATL_gemove.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/kernel/ATL_gemove.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/auxil/kernel/ATL_gescal.c atlas-3.10.3/src/auxil/kernel/ATL_gescal.c --- atlas-3.10.2/src/auxil/kernel/ATL_gescal.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/auxil/kernel/ATL_gescal.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_AgemmXX.c atlas-3.10.3/src/blas/gemm/ATL_AgemmXX.c --- atlas-3.10.2/src/blas/gemm/ATL_AgemmXX.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_AgemmXX.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_ccol2blk.c atlas-3.10.3/src/blas/gemm/ATL_ccol2blk.c --- atlas-3.10.2/src/blas/gemm/ATL_ccol2blk.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_ccol2blk.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_cmmIJK.c atlas-3.10.3/src/blas/gemm/ATL_cmmIJK.c --- atlas-3.10.2/src/blas/gemm/ATL_cmmIJK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_cmmIJK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_cmmJIK.c atlas-3.10.3/src/blas/gemm/ATL_cmmJIK.c --- atlas-3.10.2/src/blas/gemm/ATL_cmmJIK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_cmmJIK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_cmmJITcp.c atlas-3.10.3/src/blas/gemm/ATL_cmmJITcp.c --- atlas-3.10.2/src/blas/gemm/ATL_cmmJITcp.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_cmmJITcp.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_cmmJKI.c atlas-3.10.3/src/blas/gemm/ATL_cmmJKI.c --- atlas-3.10.2/src/blas/gemm/ATL_cmmJKI.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_cmmJKI.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2006 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_CNBmm_b0.c atlas-3.10.3/src/blas/gemm/ATL_CNBmm_b0.c --- atlas-3.10.2/src/blas/gemm/ATL_CNBmm_b0.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_CNBmm_b0.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_CNBmm_b1.c atlas-3.10.3/src/blas/gemm/ATL_CNBmm_b1.c --- atlas-3.10.2/src/blas/gemm/ATL_CNBmm_b1.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_CNBmm_b1.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_CNBmm_bX.c atlas-3.10.3/src/blas/gemm/ATL_CNBmm_bX.c --- atlas-3.10.2/src/blas/gemm/ATL_CNBmm_bX.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_CNBmm_bX.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_cNCmmIJK.c atlas-3.10.3/src/blas/gemm/ATL_cNCmmIJK.c --- atlas-3.10.2/src/blas/gemm/ATL_cNCmmIJK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_cNCmmIJK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_cNCmmIJK_c.c atlas-3.10.3/src/blas/gemm/ATL_cNCmmIJK_c.c --- atlas-3.10.2/src/blas/gemm/ATL_cNCmmIJK_c.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_cNCmmIJK_c.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_cNCmmJIK.c atlas-3.10.3/src/blas/gemm/ATL_cNCmmJIK.c --- atlas-3.10.2/src/blas/gemm/ATL_cNCmmJIK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_cNCmmJIK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_cNCmmJIK_c.c atlas-3.10.3/src/blas/gemm/ATL_cNCmmJIK_c.c --- atlas-3.10.2/src/blas/gemm/ATL_cNCmmJIK_c.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_cNCmmJIK_c.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_col2blk.c atlas-3.10.3/src/blas/gemm/ATL_col2blk.c --- atlas-3.10.2/src/blas/gemm/ATL_col2blk.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_col2blk.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_crow2blkT.c atlas-3.10.3/src/blas/gemm/ATL_crow2blkT.c --- atlas-3.10.2/src/blas/gemm/ATL_crow2blkT.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_crow2blkT.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_gemm.c atlas-3.10.3/src/blas/gemm/ATL_gemm.c --- atlas-3.10.2/src/blas/gemm/ATL_gemm.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_gemm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_gemmXX.c atlas-3.10.3/src/blas/gemm/ATL_gemmXX.c --- atlas-3.10.2/src/blas/gemm/ATL_gemmXX.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_gemmXX.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_gereal2cplx.c atlas-3.10.3/src/blas/gemm/ATL_gereal2cplx.c --- atlas-3.10.2/src/blas/gemm/ATL_gereal2cplx.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_gereal2cplx.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_GetNB.c atlas-3.10.3/src/blas/gemm/ATL_GetNB.c --- atlas-3.10.2/src/blas/gemm/ATL_GetNB.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_GetNB.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_GetNCNB.c atlas-3.10.3/src/blas/gemm/ATL_GetNCNB.c --- atlas-3.10.2/src/blas/gemm/ATL_GetNCNB.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_GetNCNB.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_IBJBmm.c atlas-3.10.3/src/blas/gemm/ATL_IBJBmm.c --- atlas-3.10.2/src/blas/gemm/ATL_IBJBmm.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_IBJBmm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_IBNBmm.c atlas-3.10.3/src/blas/gemm/ATL_IBNBmm.c --- atlas-3.10.2/src/blas/gemm/ATL_IBNBmm.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_IBNBmm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_MBJBmm.c atlas-3.10.3/src/blas/gemm/ATL_MBJBmm.c --- atlas-3.10.2/src/blas/gemm/ATL_MBJBmm.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_MBJBmm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_mmBPP.c atlas-3.10.3/src/blas/gemm/ATL_mmBPP.c --- atlas-3.10.2/src/blas/gemm/ATL_mmBPP.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_mmBPP.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_mmIJK.c atlas-3.10.3/src/blas/gemm/ATL_mmIJK.c --- atlas-3.10.2/src/blas/gemm/ATL_mmIJK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_mmIJK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_mmJIK.c atlas-3.10.3/src/blas/gemm/ATL_mmJIK.c --- atlas-3.10.2/src/blas/gemm/ATL_mmJIK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_mmJIK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_mmJITcp.c atlas-3.10.3/src/blas/gemm/ATL_mmJITcp.c --- atlas-3.10.2/src/blas/gemm/ATL_mmJITcp.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_mmJITcp.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_mmJKI.c atlas-3.10.3/src/blas/gemm/ATL_mmJKI.c --- atlas-3.10.2/src/blas/gemm/ATL_mmJKI.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_mmJKI.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2006 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_mmK.c atlas-3.10.3/src/blas/gemm/ATL_mmK.c --- atlas-3.10.2/src/blas/gemm/ATL_mmK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_mmK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_NCmmIJK.c atlas-3.10.3/src/blas/gemm/ATL_NCmmIJK.c --- atlas-3.10.2/src/blas/gemm/ATL_NCmmIJK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_NCmmIJK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_NCmmIJK_c.c atlas-3.10.3/src/blas/gemm/ATL_NCmmIJK_c.c --- atlas-3.10.2/src/blas/gemm/ATL_NCmmIJK_c.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_NCmmIJK_c.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_NCmmJIK.c atlas-3.10.3/src/blas/gemm/ATL_NCmmJIK.c --- atlas-3.10.2/src/blas/gemm/ATL_NCmmJIK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_NCmmJIK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_NCmmJIK_c.c atlas-3.10.3/src/blas/gemm/ATL_NCmmJIK_c.c --- atlas-3.10.2/src/blas/gemm/ATL_NCmmJIK_c.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_NCmmJIK_c.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_putblk.c atlas-3.10.3/src/blas/gemm/ATL_putblk.c --- atlas-3.10.2/src/blas/gemm/ATL_putblk.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_putblk.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/ATL_row2blkT.c atlas-3.10.3/src/blas/gemm/ATL_row2blkT.c --- atlas-3.10.2/src/blas/gemm/ATL_row2blkT.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/ATL_row2blkT.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemm/UMMEXAMPLE/ATLU_usergemm.c atlas-3.10.3/src/blas/gemm/UMMEXAMPLE/ATLU_usergemm.c --- atlas-3.10.2/src/blas/gemm/UMMEXAMPLE/ATLU_usergemm.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemm/UMMEXAMPLE/ATLU_usergemm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemv/ATL_gemv.c atlas-3.10.3/src/blas/gemv/ATL_gemv.c --- atlas-3.10.2/src/blas/gemv/ATL_gemv.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemv/ATL_gemv.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemv/ATL_gemvCN.c atlas-3.10.3/src/blas/gemv/ATL_gemvCN.c --- atlas-3.10.2/src/blas/gemv/ATL_gemvCN.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemv/ATL_gemvCN.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemv/ATL_gemvCT.c atlas-3.10.3/src/blas/gemv/ATL_gemvCT.c --- atlas-3.10.2/src/blas/gemv/ATL_gemvCT.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemv/ATL_gemvCT.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemv/ATL_gemvN.c atlas-3.10.3/src/blas/gemv/ATL_gemvN.c --- atlas-3.10.2/src/blas/gemv/ATL_gemvN.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemv/ATL_gemvN.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemv/ATL_gemvT.c atlas-3.10.3/src/blas/gemv/ATL_gemvT.c --- atlas-3.10.2/src/blas/gemv/ATL_gemvT.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemv/ATL_gemvT.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemv/ATL_mvnk_Mlt16.c atlas-3.10.3/src/blas/gemv/ATL_mvnk_Mlt16.c --- atlas-3.10.2/src/blas/gemv/ATL_mvnk_Mlt16.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemv/ATL_mvnk_Mlt16.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemv/ATL_mvnk_smallN.c atlas-3.10.3/src/blas/gemv/ATL_mvnk_smallN.c --- atlas-3.10.2/src/blas/gemv/ATL_mvnk_smallN.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemv/ATL_mvnk_smallN.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemv/ATL_mvtk_Mlt16.c atlas-3.10.3/src/blas/gemv/ATL_mvtk_Mlt16.c --- atlas-3.10.2/src/blas/gemv/ATL_mvtk_Mlt16.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemv/ATL_mvtk_Mlt16.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/gemv/ATL_mvtk_smallN.c atlas-3.10.3/src/blas/gemv/ATL_mvtk_smallN.c --- atlas-3.10.2/src/blas/gemv/ATL_mvtk_smallN.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/gemv/ATL_mvtk_smallN.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/ger/ATL_ger2.c atlas-3.10.3/src/blas/ger/ATL_ger2.c --- atlas-3.10.2/src/blas/ger/ATL_ger2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/blas/ger/ATL_ger2.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/ger/ATL_ger.c atlas-3.10.3/src/blas/ger/ATL_ger.c --- atlas-3.10.2/src/blas/ger/ATL_ger.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/blas/ger/ATL_ger.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level1/ATL_dsdot.c atlas-3.10.3/src/blas/level1/ATL_dsdot.c --- atlas-3.10.2/src/blas/level1/ATL_dsdot.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level1/ATL_dsdot.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level1/ATL_rot.c atlas-3.10.3/src/blas/level1/ATL_rot.c --- atlas-3.10.2/src/blas/level1/ATL_rot.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level1/ATL_rot.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level1/ATL_rotg.c atlas-3.10.3/src/blas/level1/ATL_rotg.c --- atlas-3.10.2/src/blas/level1/ATL_rotg.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level1/ATL_rotg.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level1/ATL_rotm.c atlas-3.10.3/src/blas/level1/ATL_rotm.c --- atlas-3.10.2/src/blas/level1/ATL_rotm.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level1/ATL_rotm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level1/ATL_rotmg.c atlas-3.10.3/src/blas/level1/ATL_rotmg.c --- atlas-3.10.2/src/blas/level1/ATL_rotmg.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level1/ATL_rotmg.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -30,6 +30,7 @@ #include "atlas_misc.h" #include "atlas_level1.h" +/* #define ATL_DODIV 1 */ void Mjoin(PATL,rotmg)(TYPE *D1, TYPE *D2, TYPE *X1, const TYPE y1, TYPE *P) { @@ -52,7 +53,7 @@ p2 = d2 * y1; if (p2 == ATL_rzero) { - *P = -2.0; + *P = ATL_typify(-2.0); return; } @@ -64,16 +65,13 @@ h21 = -y1 / x1; h12 = p2 / p1; u = ATL_rone - h12 * h21; - if (u <= ATL_rzero) + if (u > ATL_rzero) { - *P = ATL_rnone; - *D1 = *D2 = *X1 = P[1] = P[2] = P[3] = P[4] = ATL_rzero; - return; + flag = ATL_rzero; + d1 /= u; + d2 /= u; + x1 *= u; } - flag = ATL_rzero; - d1 = d1 / u; - d2 = d2 / u; - x1 = x1 * u; } else { @@ -84,13 +82,14 @@ return; } flag = ATL_rone; - h11 = p1 / p2; - h22 = x1 / y1; - u = ATL_rone + h11 * h22; + h11 = p1/p2; + h22 = x1/y1; + u = ATL_rone + h11*h22; tmp = d2 / u; d2 = d1 / u; d1 = tmp; - x1 = y1 * u; + x1 = y1*u; + } if (d1 <= rgamsq) @@ -98,29 +97,45 @@ if (d1 != ATL_rzero) { if (flag == ATL_rzero) { flag = ATL_rnone; h11 = h22 = ATL_rone; } - else if (flag > ATL_rzero) { flag = h21 = ATL_rnone; h12 = ATL_rone; } - do + else { flag = h21 = ATL_rnone; h12 = ATL_rone; } + while(1) { d1 *= gamsq; - x1 *= rgam; - h11 *= rgam; - h12 *= rgam; + #ifndef ATL_DODIV + x1 *= rgam; + h11 *= rgam; + h12 *= rgam; + #else + x1 /= gam; + h11 /= gam; + h12 /= gam; + #endif + if (d1 > rgamsq) + break; + h21 = ATL_rnone; + h12 = ATL_rone; } - while (d1 <= gamsq); } } else if (d1 >= gamsq) { if (flag == ATL_rzero) { flag = ATL_rnone; h11 = h22 = ATL_rone; } - else if (flag > ATL_rzero) { flag = h21 = ATL_rnone; h12 = ATL_rone; } - do + else { flag = h21 = ATL_rnone; h12 = ATL_rone; } + while (1) { - d1 *= rgamsq; + #ifndef ATL_DODIV + d1 *= rgamsq; + #else + d1 /= gamsq; + #endif x1 *= gam; h11 *= gam; h12 *= gam; + if (d1 < gamsq) + break; + h21 = ATL_rnone; + h12 = ATL_rone; } - while (d1 >= gamsq); } tmp = Mabs(d2); @@ -129,54 +144,82 @@ if (d2 != ATL_rzero) { if (flag == ATL_rzero) { flag = ATL_rnone; h11 = h22 = ATL_rone; } - else if (flag > ATL_rzero) { flag = h21 = ATL_rnone; h12 = ATL_rone; } - if (d2 > ATL_rzero) + else { flag = h21 = ATL_rnone; h12 = ATL_rone; } + if (d2 >= ATL_rzero) + while (1) { - do - { - d2 *= gamsq; + d2 *= gamsq; + #ifndef ATL_DODIV h21 *= rgam; h22 *= rgam; - } - while(d2 <= rgamsq); + #else + h21 /= gam; + h22 /= gam; + #endif + if (d2 > rgamsq) + break; + h21 = ATL_rnone; + h12 = ATL_rone; } else /* d2 < ATL_rzero */ { tmp = -rgamsq; - do + while(1) { d2 *= gamsq; - h21 *= rgam; - h22 *= rgam; + #ifndef ATL_DODIV + h21 *= rgam; + h22 *= rgam; + #else + h21 /= gam; + h22 /= gam; + #endif + if (d2 < tmp) + break; + h21 = ATL_rnone; + h12 = ATL_rone; } - while(d2 >= tmp); } } } else if (tmp >= gamsq) { if (flag == ATL_rzero) { flag = ATL_rnone; h11 = h22 = ATL_rone; } - else if (flag > ATL_rzero) { flag = h21 = ATL_rnone; h12 = ATL_rone; } + else { flag = h21 = ATL_rnone; h12 = ATL_rone; } if (d2 > ATL_rzero) { - do + while (1) { - d2 *= rgamsq; + #ifndef ATL_DODIV + d2 *= rgamsq; + #else + d2 /= gamsq; + #endif h21 *= gam; h22 *= gam; + if (d2 < gamsq) + break; + h21 = ATL_rnone; + h12 = ATL_rone; } - while(d2 >= gamsq); } else /* d2 < ATL_rzero */ { tmp = -gamsq; - do + while(1) { - d2 *= rgamsq; + #ifndef ATL_DODIV + d2 *= rgamsq; + #else + d2 /= gamsq; + #endif h21 *= gam; h22 *= gam; + if (d2 > tmp) + break; + h21 = ATL_rnone; + h12 = ATL_rone; } - while(d2 <= tmp); } } *D1 = d1; diff -Nru atlas-3.10.2/src/blas/level1/ATL_sdsdot.c atlas-3.10.3/src/blas/level1/ATL_sdsdot.c --- atlas-3.10.2/src/blas/level1/ATL_sdsdot.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level1/ATL_sdsdot.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level2/ATL_hemv.c atlas-3.10.3/src/blas/level2/ATL_hemv.c --- atlas-3.10.2/src/blas/level2/ATL_hemv.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level2/ATL_hemv.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2012 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/src/blas/level2/ATL_symv.c atlas-3.10.3/src/blas/level2/ATL_symv.c --- atlas-3.10.2/src/blas/level2/ATL_symv.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level2/ATL_symv.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2012 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/src/blas/level2/ATL_trmv.c atlas-3.10.3/src/blas/level2/ATL_trmv.c --- atlas-3.10.2/src/blas/level2/ATL_trmv.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level2/ATL_trmv.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2012 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/src/blas/level2/ATL_trsv.c atlas-3.10.3/src/blas/level2/ATL_trsv.c --- atlas-3.10.2/src/blas/level2/ATL_trsv.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level2/ATL_trsv.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2012 R. Clint Whaley * * Code contributers : R. Clint Whaley, Antoine P. Petitet diff -Nru atlas-3.10.2/src/blas/level2/kernel/ATL_trmvUN.c atlas-3.10.3/src/blas/level2/kernel/ATL_trmvUN.c --- atlas-3.10.2/src/blas/level2/kernel/ATL_trmvUN.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/level2/kernel/ATL_trmvUN.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_CtrsmK.c atlas-3.10.3/src/blas/level3/kernel/ATL_CtrsmK.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_CtrsmK.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_CtrsmK.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_hemmL.c atlas-3.10.3/src/blas/level3/kernel/ATL_hemmL.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_hemmL.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_hemmL.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_hemmR.c atlas-3.10.3/src/blas/level3/kernel/ATL_hemmR.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_hemmR.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_hemmR.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_her2k.c atlas-3.10.3/src/blas/level3/kernel/ATL_her2k.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_her2k.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_her2k.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_herk_N.c atlas-3.10.3/src/blas/level3/kernel/ATL_herk_N.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_herk_N.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_herk_N.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_herk_T.c atlas-3.10.3/src/blas/level3/kernel/ATL_herk_T.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_herk_T.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_herk_T.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_sycopyL.c atlas-3.10.3/src/blas/level3/kernel/ATL_sycopyL.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_sycopyL.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_sycopyL.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_sycopyU.c atlas-3.10.3/src/blas/level3/kernel/ATL_sycopyU.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_sycopyU.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_sycopyU.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_symmL.c atlas-3.10.3/src/blas/level3/kernel/ATL_symmL.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_symmL.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_symmL.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_symmR.c atlas-3.10.3/src/blas/level3/kernel/ATL_symmR.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_symmR.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_symmR.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_syr2k_N.c atlas-3.10.3/src/blas/level3/kernel/ATL_syr2k_N.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_syr2k_N.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_syr2k_N.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_syr2k_putL.c atlas-3.10.3/src/blas/level3/kernel/ATL_syr2k_putL.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_syr2k_putL.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_syr2k_putL.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_syr2k_putU.c atlas-3.10.3/src/blas/level3/kernel/ATL_syr2k_putU.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_syr2k_putU.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_syr2k_putU.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_syr2k_T.c atlas-3.10.3/src/blas/level3/kernel/ATL_syr2k_T.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_syr2k_T.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_syr2k_T.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_syrk_N.c atlas-3.10.3/src/blas/level3/kernel/ATL_syrk_N.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_syrk_N.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_syrk_N.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_syrk_T.c atlas-3.10.3/src/blas/level3/kernel/ATL_syrk_T.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_syrk_T.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_syrk_T.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trcopyL2L.c atlas-3.10.3/src/blas/level3/kernel/ATL_trcopyL2L.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trcopyL2L.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trcopyL2L.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trcopyL2U.c atlas-3.10.3/src/blas/level3/kernel/ATL_trcopyL2U.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trcopyL2U.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trcopyL2U.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trcopyU2L.c atlas-3.10.3/src/blas/level3/kernel/ATL_trcopyU2L.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trcopyU2L.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trcopyU2L.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trcopyU2U.c atlas-3.10.3/src/blas/level3/kernel/ATL_trcopyU2U.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trcopyU2U.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trcopyU2U.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trinvert.c atlas-3.10.3/src/blas/level3/kernel/ATL_trinvert.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trinvert.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trinvert.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trmmL.c atlas-3.10.3/src/blas/level3/kernel/ATL_trmmL.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trmmL.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trmmL.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trmmR.c atlas-3.10.3/src/blas/level3/kernel/ATL_trmmR.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trmmR.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trmmR.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trmv_scal.c atlas-3.10.3/src/blas/level3/kernel/ATL_trmv_scal.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trmv_scal.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trmv_scal.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trputL.c atlas-3.10.3/src/blas/level3/kernel/ATL_trputL.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trputL.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trputL.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trputU.c atlas-3.10.3/src/blas/level3/kernel/ATL_trputU.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trputU.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trputU.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trsmKL.c atlas-3.10.3/src/blas/level3/kernel/ATL_trsmKL.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trsmKL.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trsmKL.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trsmKR.c atlas-3.10.3/src/blas/level3/kernel/ATL_trsmKR.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trsmKR.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trsmKR.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trsmL.c atlas-3.10.3/src/blas/level3/kernel/ATL_trsmL.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trsmL.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trsmL.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/level3/kernel/ATL_trsmR.c atlas-3.10.3/src/blas/level3/kernel/ATL_trsmR.c --- atlas-3.10.2/src/blas/level3/kernel/ATL_trsmR.c 2014-07-10 16:22:08.000000000 +0000 +++ atlas-3.10.3/src/blas/level3/kernel/ATL_trsmR.c 2016-07-28 19:43:06.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_cpcol2blk.c atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_cpcol2blk.c --- atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_cpcol2blk.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_cpcol2blk.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_cpmmJIK.c atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_cpmmJIK.c --- atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_cpmmJIK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_cpmmJIK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_cpputblk.c atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_cpputblk.c --- atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_cpputblk.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_cpputblk.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_cprow2blkT.c atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_cprow2blkT.c --- atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_cprow2blkT.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_cprow2blkT.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_gpmm.c atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_gpmm.c --- atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_gpmm.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_gpmm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_pcol2blk.c atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_pcol2blk.c --- atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_pcol2blk.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_pcol2blk.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_pmmJIK.c atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_pmmJIK.c --- atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_pmmJIK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_pmmJIK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -147,7 +147,8 @@ if (jb != NB || ib != MB) { pNBmm0 = pNBmm = ATL_gNBmm; - if (ib != NB && jb != NB) Mjoin(PATL,gezero)(MB, NB, pC, MB); + if ((!nKb && kb != NB) || (ib != NB && jb != NB)) + Mjoin(PATL,gezero)(MB, NB, pC, MB); } else { @@ -304,7 +305,8 @@ if (jb != NB || ib != MB) { pNBmm0 = pNBmm = ATL_gNBmm; - if (ib != NB && jb != NB) Mjoin(PATL,gezero)(MB, NB, pC, MB); + if ((!nKb && kb != NB) || (ib != NB && jb != NB)) + Mjoin(PATL,gezero)(MB, NB, pC, MB); } else { diff -Nru atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_pputblk.c atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_pputblk.c --- atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_pputblk.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_pputblk.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_prankK.c atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_prankK.c --- atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_prankK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_prankK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_prow2blkT.c atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_prow2blkT.c --- atlas-3.10.2/src/blas/pklevel3/gpmm/ATL_prow2blkT.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/gpmm/ATL_prow2blkT.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/sprk/ATL_cpputblk_aX.c atlas-3.10.3/src/blas/pklevel3/sprk/ATL_cpputblk_aX.c --- atlas-3.10.2/src/blas/pklevel3/sprk/ATL_cpputblk_aX.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/sprk/ATL_cpputblk_aX.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -48,56 +48,131 @@ { ldc += ldc; ldcinc += ldcinc; - for (j=0; j < N; j++) + if (ibeta == ATL_rzero) /* real beta */ { - for (i=0; i <= j; i++) + if (rbeta == ATL_rzero) /* simple assignment to C */ { - k = i+i; - rc = C[k]; - ic = C[k+1]; - t0 = rv = V[i+mn]; - iv = V[i]; - rv = ralpha * rv - ialpha *iv; - iv = ialpha * t0 + ralpha * iv; - t0 = rc; - rc = rbeta * rc - ibeta * ic; - ic = ibeta * t0 + rbeta * ic; - rc += rv; - ic += iv; - C[k] = rc; - C[k+1] = ic; + for (j=0; j < N; j++) + { + for (i=0; i <= j; i++) + { + k = i+i; + t0 = rv = V[i+mn]; + iv = V[i]; + rv = ralpha * rv - ialpha *iv; + iv = ialpha * t0 + ralpha * iv; + C[k] = rv; + C[k+1] = iv; + } + C += ldc; + V += M; + ldc += ldcinc; + } + } + else + { + for (j=0; j < N; j++) + { + for (i=0; i <= j; i++) + { + k = i+i; + rc = C[k]; + ic = C[k+1]; + t0 = rv = V[i+mn]; + iv = V[i]; + rv = ralpha * rv - ialpha *iv; + iv = ialpha * t0 + ralpha * iv; + t0 = rc; + rc = rbeta * rc; + ic = rbeta * ic; + rc += rv; + ic += iv; + C[k] = rc; + C[k+1] = ic; + } + C += ldc; + V += M; + ldc += ldcinc; + } + } + } + else + { + for (j=0; j < N; j++) + { + for (i=0; i <= j; i++) + { + k = i+i; + rc = C[k]; + ic = C[k+1]; + t0 = rv = V[i+mn]; + iv = V[i]; + rv = ralpha * rv - ialpha *iv; + iv = ialpha * t0 + ralpha * iv; + t0 = rc; + rc = rbeta * rc - ibeta * ic; + ic = ibeta * t0 + rbeta * ic; + rc += rv; + ic += iv; + C[k] = rc; + C[k+1] = ic; + } + C += ldc; + V += M; + ldc += ldcinc; } - C += ldc; - V += M; - ldc += ldcinc; } } - else + else /* Lower triangular */ { - ldc += ldc; - ldcinc += ldcinc; - for (j=0; j < N; j++) + if (ibeta == ATL_rzero && rbeta == ATL_rzero) { - ldc += ldcinc; - for (i=j; i < M; i++) + ldc += ldc; + ldcinc += ldcinc; + for (j=0; j < N; j++) { - k = i+i; - rc = C[k]; - ic = C[k+1]; - t0 = rv = V[i+mn]; - iv = V[i]; - rv = ralpha * rv - ialpha *iv; - iv = ialpha * t0 + ralpha * iv; - t0 = rc; - rc = rbeta * rc - ibeta * ic; - ic = ibeta * t0 + rbeta * ic; - rc += rv; - ic += iv; - C[k] = rc; - C[k+1] = ic; + ldc += ldcinc; + for (i=j; i < M; i++) + { + k = i+i; + t0 = rv = V[i+mn]; + iv = V[i]; + rv = ralpha * rv - ialpha *iv; + iv = ialpha * t0 + ralpha * iv; + C[k] = rv; + C[k+1] = iv; + } + C += ldc; + V += M; + } + } + else + { + ldc += ldc; + ldcinc += ldcinc; + for (j=0; j < N; j++) + { + ldc += ldcinc; + for (i=j; i < M; i++) + { + k = i+i; + rc = C[k]; + ic = C[k+1]; + t0 = rv = V[i+mn]; + iv = V[i]; + rv = ralpha * rv - ialpha *iv; + iv = ialpha * t0 + ralpha * iv; + t0 = rc; + rc = rbeta * rc - ibeta * ic; + ic = ibeta * t0 + rbeta * ic; + rc += rv; + ic += iv; + C[k] = rc; + C[k+1] = ic; + } + C += ldc; + V += M; } - C += ldc; - V += M; } } } @@ -123,25 +198,45 @@ if (ldcinc == -1) ldc--; ldc += ldc; ldcinc += ldcinc; - for (j=N; j; j--) + if (rbeta == ATL_rzero && ibeta == ATL_rzero) + { + for (j=N; j; j--) + { + for (i=M; i; i--, C += 2, V++) + { + t0 = rv = V[mn]; + iv = *V; + rv = ralpha * rv - ialpha *iv; + iv = ialpha * t0 + ralpha * iv; + *C = rv; + C[1] = iv; + } + C += ldc; + ldc += ldcinc; + } + } + else { - for (i=M; i; i--, C += 2, V++) + for (j=N; j; j--) { - rc = *C; - ic = C[1]; - t0 = rv = V[mn]; - iv = *V; - rv = ralpha * rv - ialpha *iv; - iv = ialpha * t0 + ralpha * iv; - t0 = rc; - rc = rbeta * rc - ibeta * ic; - ic = ibeta * t0 + rbeta * ic; - rc += rv; - ic += iv; - *C = rc; - C[1] = ic; + for (i=M; i; i--, C += 2, V++) + { + rc = *C; + ic = C[1]; + t0 = rv = V[mn]; + iv = *V; + rv = ralpha * rv - ialpha *iv; + iv = ialpha * t0 + ralpha * iv; + t0 = rc; + rc = rbeta * rc - ibeta * ic; + ic = ibeta * t0 + rbeta * ic; + rc += rv; + ic += iv; + *C = rc; + C[1] = ic; + } + C += ldc; + ldc += ldcinc; } - C += ldc; - ldc += ldcinc; } } diff -Nru atlas-3.10.2/src/blas/pklevel3/sprk/ATL_cprk_kmm.c atlas-3.10.3/src/blas/pklevel3/sprk/ATL_cprk_kmm.c --- atlas-3.10.2/src/blas/pklevel3/sprk/ATL_cprk_kmm.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/sprk/ATL_cprk_kmm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -325,7 +325,7 @@ pA = pC + NBNB2 + i*(K SHIFT); ib = N-i; ib = Mmin(ib, NB); - if (ib != NB || jb != NB) + if ((!nKb && kb != NB) || (ib != NB || jb != NB)) Mjoin(PATL,gezero)(ib, jb, pC, ib); if (nKb) { diff -Nru atlas-3.10.2/src/blas/pklevel3/sprk/ATL_hprk.c atlas-3.10.3/src/blas/pklevel3/sprk/ATL_hprk.c --- atlas-3.10.2/src/blas/pklevel3/sprk/ATL_hprk.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/sprk/ATL_hprk.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/sprk/ATL_hprk_rK.c atlas-3.10.3/src/blas/pklevel3/sprk/ATL_hprk_rK.c --- atlas-3.10.2/src/blas/pklevel3/sprk/ATL_hprk_rK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/sprk/ATL_hprk_rK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/sprk/ATL_phk_kmm.c atlas-3.10.3/src/blas/pklevel3/sprk/ATL_phk_kmm.c --- atlas-3.10.2/src/blas/pklevel3/sprk/ATL_phk_kmm.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/sprk/ATL_phk_kmm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -344,7 +344,7 @@ pA = pC + NBNB2 + incK + i*K2; ib = N-i; ib = Mmin(ib, NB); - if (ib != NB || jb != NB) + if ((!nKb && kb != NB) || (ib != NB || jb != NB)) Mjoin(PATL,gezero)(ib, jb, pC, ib); if (nKb) { diff -Nru atlas-3.10.2/src/blas/pklevel3/sprk/ATL_pputblk_aX.c atlas-3.10.3/src/blas/pklevel3/sprk/ATL_pputblk_aX.c --- atlas-3.10.2/src/blas/pklevel3/sprk/ATL_pputblk_aX.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/sprk/ATL_pputblk_aX.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/sprk/ATL_prk_kmm.c atlas-3.10.3/src/blas/pklevel3/sprk/ATL_prk_kmm.c --- atlas-3.10.2/src/blas/pklevel3/sprk/ATL_prk_kmm.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/sprk/ATL_prk_kmm.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/sprk/ATL_sprk.c atlas-3.10.3/src/blas/pklevel3/sprk/ATL_sprk.c --- atlas-3.10.2/src/blas/pklevel3/sprk/ATL_sprk.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/sprk/ATL_sprk.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/blas/pklevel3/sprk/ATL_sprk_rK.c atlas-3.10.3/src/blas/pklevel3/sprk/ATL_sprk_rK.c --- atlas-3.10.2/src/blas/pklevel3/sprk/ATL_sprk_rK.c 2014-07-10 16:22:07.000000000 +0000 +++ atlas-3.10.3/src/blas/pklevel3/sprk/ATL_sprk_rK.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_gelq2.c atlas-3.10.3/src/lapack/ATL_gelq2.c --- atlas-3.10.2/src/lapack/ATL_gelq2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_gelq2.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_gelqf.c atlas-3.10.3/src/lapack/ATL_gelqf.c --- atlas-3.10.2/src/lapack/ATL_gelqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_gelqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,7 +1,7 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_gelqr.c atlas-3.10.3/src/lapack/ATL_gelqr.c --- atlas-3.10.2/src/lapack/ATL_gelqr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_gelqr.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,6 +1,6 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_gels.c atlas-3.10.3/src/lapack/ATL_gels.c --- atlas-3.10.2/src/lapack/ATL_gels.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_gels.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_geql2.c atlas-3.10.3/src/lapack/ATL_geql2.c --- atlas-3.10.2/src/lapack/ATL_geql2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_geql2.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_geqlf.c atlas-3.10.3/src/lapack/ATL_geqlf.c --- atlas-3.10.2/src/lapack/ATL_geqlf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_geqlf.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,6 +1,6 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_geqlr.c atlas-3.10.3/src/lapack/ATL_geqlr.c --- atlas-3.10.2/src/lapack/ATL_geqlr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_geqlr.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_geqr2.c atlas-3.10.3/src/lapack/ATL_geqr2.c --- atlas-3.10.2/src/lapack/ATL_geqr2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_geqr2.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_geqrf.c atlas-3.10.3/src/lapack/ATL_geqrf.c --- atlas-3.10.2/src/lapack/ATL_geqrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_geqrf.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,6 +1,6 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_geqrr.c atlas-3.10.3/src/lapack/ATL_geqrr.c --- atlas-3.10.2/src/lapack/ATL_geqrr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_geqrr.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_gerq2.c atlas-3.10.3/src/lapack/ATL_gerq2.c --- atlas-3.10.2/src/lapack/ATL_gerq2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_gerq2.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_gerqf.c atlas-3.10.3/src/lapack/ATL_gerqf.c --- atlas-3.10.2/src/lapack/ATL_gerqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_gerqf.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,6 +1,6 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_gerqr.c atlas-3.10.3/src/lapack/ATL_gerqr.c --- atlas-3.10.2/src/lapack/ATL_gerqr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_gerqr.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_getrf.c atlas-3.10.3/src/lapack/ATL_getrf.c --- atlas-3.10.2/src/lapack/ATL_getrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_getrf.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_getrfC.c atlas-3.10.3/src/lapack/ATL_getrfC.c --- atlas-3.10.2/src/lapack/ATL_getrfC.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_getrfC.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_getrfR.c atlas-3.10.3/src/lapack/ATL_getrfR.c --- atlas-3.10.2/src/lapack/ATL_getrfR.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_getrfR.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_getri.c atlas-3.10.3/src/lapack/ATL_getri.c --- atlas-3.10.2/src/lapack/ATL_getri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_getri.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_getriC.c atlas-3.10.3/src/lapack/ATL_getriC.c --- atlas-3.10.2/src/lapack/ATL_getriC.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_getriC.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_getriR.c atlas-3.10.3/src/lapack/ATL_getriR.c --- atlas-3.10.2/src/lapack/ATL_getriR.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_getriR.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_getrs.c atlas-3.10.3/src/lapack/ATL_getrs.c --- atlas-3.10.2/src/lapack/ATL_getrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_getrs.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_lacgv.c atlas-3.10.3/src/lapack/ATL_lacgv.c --- atlas-3.10.2/src/lapack/ATL_lacgv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_lacgv.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_ladiv.c atlas-3.10.3/src/lapack/ATL_ladiv.c --- atlas-3.10.2/src/lapack/ATL_ladiv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_ladiv.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_lapy2.c atlas-3.10.3/src/lapack/ATL_lapy2.c --- atlas-3.10.2/src/lapack/ATL_lapy2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_lapy2.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_lapy3.c atlas-3.10.3/src/lapack/ATL_lapy3.c --- atlas-3.10.2/src/lapack/ATL_lapy3.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_lapy3.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_larfb.c atlas-3.10.3/src/lapack/ATL_larfb.c --- atlas-3.10.2/src/lapack/ATL_larfb.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_larfb.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_larf.c atlas-3.10.3/src/lapack/ATL_larf.c --- atlas-3.10.2/src/lapack/ATL_larf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_larf.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_larfg.c atlas-3.10.3/src/lapack/ATL_larfg.c --- atlas-3.10.2/src/lapack/ATL_larfg.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_larfg.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_larft.c atlas-3.10.3/src/lapack/ATL_larft.c --- atlas-3.10.2/src/lapack/ATL_larft.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_larft.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_laswp.c atlas-3.10.3/src/lapack/ATL_laswp.c --- atlas-3.10.2/src/lapack/ATL_laswp.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_laswp.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_lauum.c atlas-3.10.3/src/lapack/ATL_lauum.c --- atlas-3.10.2/src/lapack/ATL_lauum.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_lauum.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_lauumL.c atlas-3.10.3/src/lapack/ATL_lauumL.c --- atlas-3.10.2/src/lapack/ATL_lauumL.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_lauumL.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_lauumU.c atlas-3.10.3/src/lapack/ATL_lauumU.c --- atlas-3.10.2/src/lapack/ATL_lauumU.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_lauumU.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_ormqr.c atlas-3.10.3/src/lapack/ATL_ormqr.c --- atlas-3.10.2/src/lapack/ATL_ormqr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_ormqr.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/lapack/ATL_potrf.c atlas-3.10.3/src/lapack/ATL_potrf.c --- atlas-3.10.2/src/lapack/ATL_potrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_potrf.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_potrfL.c atlas-3.10.3/src/lapack/ATL_potrfL.c --- atlas-3.10.2/src/lapack/ATL_potrfL.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_potrfL.c 2016-07-28 19:43:05.000000000 +0000 @@ -26,7 +26,7 @@ #define llt_trans AtlasTrans #endif /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_potrfRL.c atlas-3.10.3/src/lapack/ATL_potrfRL.c --- atlas-3.10.2/src/lapack/ATL_potrfRL.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_potrfRL.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_potrfRU.c atlas-3.10.3/src/lapack/ATL_potrfRU.c --- atlas-3.10.2/src/lapack/ATL_potrfRU.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_potrfRU.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_potrfU.c atlas-3.10.3/src/lapack/ATL_potrfU.c --- atlas-3.10.2/src/lapack/ATL_potrfU.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_potrfU.c 2016-07-28 19:43:05.000000000 +0000 @@ -26,7 +26,7 @@ #define llt_trans AtlasTrans #endif /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_potrs.c atlas-3.10.3/src/lapack/ATL_potrs.c --- atlas-3.10.2/src/lapack/ATL_potrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_potrs.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_trtri.c atlas-3.10.3/src/lapack/ATL_trtri.c --- atlas-3.10.2/src/lapack/ATL_trtri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_trtri.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_trtriCL.c atlas-3.10.3/src/lapack/ATL_trtriCL.c --- atlas-3.10.2/src/lapack/ATL_trtriCL.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_trtriCL.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_trtriCU.c atlas-3.10.3/src/lapack/ATL_trtriCU.c --- atlas-3.10.2/src/lapack/ATL_trtriCU.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_trtriCU.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_trtriRL.c atlas-3.10.3/src/lapack/ATL_trtriRL.c --- atlas-3.10.2/src/lapack/ATL_trtriRL.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_trtriRL.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_trtriRU.c atlas-3.10.3/src/lapack/ATL_trtriRU.c --- atlas-3.10.2/src/lapack/ATL_trtriRU.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_trtriRU.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/lapack/ATL_trtrs.c atlas-3.10.3/src/lapack/ATL_trtrs.c --- atlas-3.10.2/src/lapack/ATL_trtrs.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/lapack/ATL_trtrs.c 2016-07-28 19:43:05.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_dsf77dot.c atlas-3.10.3/src/testing/ATL_dsf77dot.c --- atlas-3.10.2/src/testing/ATL_dsf77dot.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_dsf77dot.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_epsilon.c atlas-3.10.3/src/testing/ATL_epsilon.c --- atlas-3.10.2/src/testing/ATL_epsilon.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_epsilon.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1998 Jeff Horner * * Code contributers : Jeff Horner, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77amax.c atlas-3.10.3/src/testing/ATL_f77amax.c --- atlas-3.10.2/src/testing/ATL_f77amax.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77amax.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77asum.c atlas-3.10.3/src/testing/ATL_f77asum.c --- atlas-3.10.2/src/testing/ATL_f77asum.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77asum.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77axpy.c atlas-3.10.3/src/testing/ATL_f77axpy.c --- atlas-3.10.2/src/testing/ATL_f77axpy.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77axpy.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77copy.c atlas-3.10.3/src/testing/ATL_f77copy.c --- atlas-3.10.2/src/testing/ATL_f77copy.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77copy.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77dot.c atlas-3.10.3/src/testing/ATL_f77dot.c --- atlas-3.10.2/src/testing/ATL_f77dot.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77dot.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77dotc_sub.c atlas-3.10.3/src/testing/ATL_f77dotc_sub.c --- atlas-3.10.2/src/testing/ATL_f77dotc_sub.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77dotc_sub.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77dotu_sub.c atlas-3.10.3/src/testing/ATL_f77dotu_sub.c --- atlas-3.10.2/src/testing/ATL_f77dotu_sub.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77dotu_sub.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77gbmv.c atlas-3.10.3/src/testing/ATL_f77gbmv.c --- atlas-3.10.2/src/testing/ATL_f77gbmv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77gbmv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77gelqf.c atlas-3.10.3/src/testing/ATL_f77gelqf.c --- atlas-3.10.2/src/testing/ATL_f77gelqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77gelqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77gels.c atlas-3.10.3/src/testing/ATL_f77gels.c --- atlas-3.10.2/src/testing/ATL_f77gels.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77gels.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2006 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77gemm.c atlas-3.10.3/src/testing/ATL_f77gemm.c --- atlas-3.10.2/src/testing/ATL_f77gemm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77gemm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77gemv.c atlas-3.10.3/src/testing/ATL_f77gemv.c --- atlas-3.10.2/src/testing/ATL_f77gemv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77gemv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77geqlf.c atlas-3.10.3/src/testing/ATL_f77geqlf.c --- atlas-3.10.2/src/testing/ATL_f77geqlf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77geqlf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77geqrf.c atlas-3.10.3/src/testing/ATL_f77geqrf.c --- atlas-3.10.2/src/testing/ATL_f77geqrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77geqrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77ger.c atlas-3.10.3/src/testing/ATL_f77ger.c --- atlas-3.10.2/src/testing/ATL_f77ger.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77ger.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77gerc.c atlas-3.10.3/src/testing/ATL_f77gerc.c --- atlas-3.10.2/src/testing/ATL_f77gerc.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77gerc.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77gerqf.c atlas-3.10.3/src/testing/ATL_f77gerqf.c --- atlas-3.10.2/src/testing/ATL_f77gerqf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77gerqf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77geru.c atlas-3.10.3/src/testing/ATL_f77geru.c --- atlas-3.10.2/src/testing/ATL_f77geru.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77geru.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77gesv.c atlas-3.10.3/src/testing/ATL_f77gesv.c --- atlas-3.10.2/src/testing/ATL_f77gesv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77gesv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77getrf.c atlas-3.10.3/src/testing/ATL_f77getrf.c --- atlas-3.10.2/src/testing/ATL_f77getrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77getrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77getri.c atlas-3.10.3/src/testing/ATL_f77getri.c --- atlas-3.10.2/src/testing/ATL_f77getri.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77getri.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -28,7 +28,7 @@ * */ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77hbmv.c atlas-3.10.3/src/testing/ATL_f77hbmv.c --- atlas-3.10.2/src/testing/ATL_f77hbmv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77hbmv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77hemm.c atlas-3.10.3/src/testing/ATL_f77hemm.c --- atlas-3.10.2/src/testing/ATL_f77hemm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77hemm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77hemv.c atlas-3.10.3/src/testing/ATL_f77hemv.c --- atlas-3.10.2/src/testing/ATL_f77hemv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77hemv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77her2.c atlas-3.10.3/src/testing/ATL_f77her2.c --- atlas-3.10.2/src/testing/ATL_f77her2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77her2.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77her2k.c atlas-3.10.3/src/testing/ATL_f77her2k.c --- atlas-3.10.2/src/testing/ATL_f77her2k.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77her2k.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77her.c atlas-3.10.3/src/testing/ATL_f77her.c --- atlas-3.10.2/src/testing/ATL_f77her.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77her.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77herk.c atlas-3.10.3/src/testing/ATL_f77herk.c --- atlas-3.10.2/src/testing/ATL_f77herk.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77herk.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77hpmv.c atlas-3.10.3/src/testing/ATL_f77hpmv.c --- atlas-3.10.2/src/testing/ATL_f77hpmv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77hpmv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77hpr2.c atlas-3.10.3/src/testing/ATL_f77hpr2.c --- atlas-3.10.2/src/testing/ATL_f77hpr2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77hpr2.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77hpr.c atlas-3.10.3/src/testing/ATL_f77hpr.c --- atlas-3.10.2/src/testing/ATL_f77hpr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77hpr.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77lauum.c atlas-3.10.3/src/testing/ATL_f77lauum.c --- atlas-3.10.2/src/testing/ATL_f77lauum.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77lauum.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77nrm2.c atlas-3.10.3/src/testing/ATL_f77nrm2.c --- atlas-3.10.2/src/testing/ATL_f77nrm2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77nrm2.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77posv.c atlas-3.10.3/src/testing/ATL_f77posv.c --- atlas-3.10.2/src/testing/ATL_f77posv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77posv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77potrf.c atlas-3.10.3/src/testing/ATL_f77potrf.c --- atlas-3.10.2/src/testing/ATL_f77potrf.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77potrf.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77rot.c atlas-3.10.3/src/testing/ATL_f77rot.c --- atlas-3.10.2/src/testing/ATL_f77rot.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77rot.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77rotg.c atlas-3.10.3/src/testing/ATL_f77rotg.c --- atlas-3.10.2/src/testing/ATL_f77rotg.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77rotg.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77rotm.c atlas-3.10.3/src/testing/ATL_f77rotm.c --- atlas-3.10.2/src/testing/ATL_f77rotm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77rotm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77rotmg.c atlas-3.10.3/src/testing/ATL_f77rotmg.c --- atlas-3.10.2/src/testing/ATL_f77rotmg.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77rotmg.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77rscal.c atlas-3.10.3/src/testing/ATL_f77rscal.c --- atlas-3.10.2/src/testing/ATL_f77rscal.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77rscal.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77sbmv.c atlas-3.10.3/src/testing/ATL_f77sbmv.c --- atlas-3.10.2/src/testing/ATL_f77sbmv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77sbmv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77scal.c atlas-3.10.3/src/testing/ATL_f77scal.c --- atlas-3.10.2/src/testing/ATL_f77scal.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77scal.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77spmv.c atlas-3.10.3/src/testing/ATL_f77spmv.c --- atlas-3.10.2/src/testing/ATL_f77spmv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77spmv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77spr2.c atlas-3.10.3/src/testing/ATL_f77spr2.c --- atlas-3.10.2/src/testing/ATL_f77spr2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77spr2.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77spr.c atlas-3.10.3/src/testing/ATL_f77spr.c --- atlas-3.10.2/src/testing/ATL_f77spr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77spr.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77swap.c atlas-3.10.3/src/testing/ATL_f77swap.c --- atlas-3.10.2/src/testing/ATL_f77swap.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77swap.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77symm.c atlas-3.10.3/src/testing/ATL_f77symm.c --- atlas-3.10.2/src/testing/ATL_f77symm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77symm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77symv.c atlas-3.10.3/src/testing/ATL_f77symv.c --- atlas-3.10.2/src/testing/ATL_f77symv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77symv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77syr2.c atlas-3.10.3/src/testing/ATL_f77syr2.c --- atlas-3.10.2/src/testing/ATL_f77syr2.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77syr2.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77syr2k.c atlas-3.10.3/src/testing/ATL_f77syr2k.c --- atlas-3.10.2/src/testing/ATL_f77syr2k.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77syr2k.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77syr.c atlas-3.10.3/src/testing/ATL_f77syr.c --- atlas-3.10.2/src/testing/ATL_f77syr.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77syr.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_f77syrk.c atlas-3.10.3/src/testing/ATL_f77syrk.c --- atlas-3.10.2/src/testing/ATL_f77syrk.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77syrk.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77tbmv.c atlas-3.10.3/src/testing/ATL_f77tbmv.c --- atlas-3.10.2/src/testing/ATL_f77tbmv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77tbmv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77tbsv.c atlas-3.10.3/src/testing/ATL_f77tbsv.c --- atlas-3.10.2/src/testing/ATL_f77tbsv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77tbsv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77tpmv.c atlas-3.10.3/src/testing/ATL_f77tpmv.c --- atlas-3.10.2/src/testing/ATL_f77tpmv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77tpmv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77tpsv.c atlas-3.10.3/src/testing/ATL_f77tpsv.c --- atlas-3.10.2/src/testing/ATL_f77tpsv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77tpsv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77trmm.c atlas-3.10.3/src/testing/ATL_f77trmm.c --- atlas-3.10.2/src/testing/ATL_f77trmm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77trmm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77trmv.c atlas-3.10.3/src/testing/ATL_f77trmv.c --- atlas-3.10.2/src/testing/ATL_f77trmv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77trmv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77trsm.c atlas-3.10.3/src/testing/ATL_f77trsm.c --- atlas-3.10.2/src/testing/ATL_f77trsm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77trsm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_f77trsv.c atlas-3.10.3/src/testing/ATL_f77trsv.c --- atlas-3.10.2/src/testing/ATL_f77trsv.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_f77trsv.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/src/testing/ATL_gbnrm1.c atlas-3.10.3/src/testing/ATL_gbnrm1.c --- atlas-3.10.2/src/testing/ATL_gbnrm1.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_gbnrm1.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,8 @@ t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); #endif } + if (t0 != t0) + return(t0); if (t0 > max) max = t0; } return(max); diff -Nru atlas-3.10.2/src/testing/ATL_gediff.c atlas-3.10.3/src/testing/ATL_gediff.c --- atlas-3.10.2/src/testing/ATL_gediff.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_gediff.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_gediffnrm1.c atlas-3.10.3/src/testing/ATL_gediffnrm1.c --- atlas-3.10.2/src/testing/ATL_gediffnrm1.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_gediffnrm1.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -47,6 +47,8 @@ t0 = ATL_rzero; for (i=0; i != M2; i++) t0 += Mabs(A[i] - B[i]); if (t0 > max) max = t0; + if (t0 != t0) /* if we've got a NaN */ + return(t0); /* report it as norm! */ A += lda2; B += ldb2; } diff -Nru atlas-3.10.2/src/testing/ATL_gegen.c atlas-3.10.3/src/testing/ATL_gegen.c --- atlas-3.10.2/src/testing/ATL_gegen.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_gegen.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_genrm1.c atlas-3.10.3/src/testing/ATL_genrm1.c --- atlas-3.10.2/src/testing/ATL_genrm1.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_genrm1.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -43,6 +43,8 @@ for (j=0; j < N; j++) { t0 = Mjoin(PATL,asum)(M, A, 1); + if (t0 != t0) + return(t0); if (t0 > max) max = t0; A += lda2; } diff -Nru atlas-3.10.2/src/testing/ATL_geprint.c atlas-3.10.3/src/testing/ATL_geprint.c --- atlas-3.10.2/src/testing/ATL_geprint.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_geprint.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_hbnrm.c atlas-3.10.3/src/testing/ATL_hbnrm.c --- atlas-3.10.2/src/testing/ATL_hbnrm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_hbnrm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without @@ -84,8 +84,9 @@ } max = work[0]; - for( j = 1; j < N; j++ ) if( max < work[j] ) max = work[j]; - + for( j = 1; j < N; j++ ) + if (work[j] != work[j] || max < work[j]) + max = work[j]; if( work ) free( work ); return( max ); diff -Nru atlas-3.10.2/src/testing/ATL_hediffnrm.c atlas-3.10.3/src/testing/ATL_hediffnrm.c --- atlas-3.10.2/src/testing/ATL_hediffnrm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_hediffnrm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_henrm.c atlas-3.10.3/src/testing/ATL_henrm.c --- atlas-3.10.2/src/testing/ATL_henrm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_henrm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without @@ -75,7 +75,9 @@ } max = work[0]; - for( j = 1; j < N; j++ ) if( max < work[j] ) max = work[j]; + for(j = 1; j < N; j++) + if (work[j] != work[j] || max < work[j]) + max = work[j]; if( work ) free( work ); diff -Nru atlas-3.10.2/src/testing/ATL_hpnrm.c atlas-3.10.3/src/testing/ATL_hpnrm.c --- atlas-3.10.2/src/testing/ATL_hpnrm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_hpnrm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without @@ -77,7 +77,9 @@ } max = work[0]; - for( j = 1; j < N; j++ ) if( max < work[j] ) max = work[j]; + for(j = 1; j < N; j++) + if (work[j] != work[j] || max < work[j]) + max = work[j]; if( work ) free( work ); diff -Nru atlas-3.10.2/src/testing/ATL_infnrm.c atlas-3.10.3/src/testing/ATL_infnrm.c --- atlas-3.10.2/src/testing/ATL_infnrm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_infnrm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -30,15 +30,32 @@ #include "atlas_misc.h" #include "atlas_tst.h" #include "atlas_level1.h" - TYPE Mjoin(PATL,infnrm)(const int N, const TYPE *X, const int incX) { - int i; - i = Mjoin(Mjoin(ATL_i,PRE),amax)(N, X, incX); -#ifdef TREAL - return(Mabs(X[i*incX])); -#else - i *= (incX<<1); - return(Mabs(X[i]) + Mabs(X[i+1])); -#endif + register TYPE max=0.0; + register int i; + if (N > 0) + { + #ifdef TCPLX + const int incX2=incX+incX; + for (i=0; i < N; i++, X += incX2) + { + register TYPE t0; + t0 = Mabs(*X) + Mabs(X[1]); + if (t0 != t0) + return(t0); + max = (max >= t0) ? max : t0; + } + #else + for (i=0; i < N; i++, X += incX) + { + register TYPE t0; + t0 = Mabs(*X); + if (t0 != t0) + return(t0); + max = (max >= t0) ? max : t0; + } + #endif + } + return(max); } diff -Nru atlas-3.10.2/src/testing/ATL_rand.c atlas-3.10.3/src/testing/ATL_rand.c --- atlas-3.10.2/src/testing/ATL_rand.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_rand.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_sbnrm.c atlas-3.10.3/src/testing/ATL_sbnrm.c --- atlas-3.10.2/src/testing/ATL_sbnrm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_sbnrm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without @@ -102,8 +102,9 @@ } max = work[0]; - for( j = 1; j < N; j++ ) if( max < work[j] ) max = work[j]; - + for( j = 1; j < N; j++ ) + if (work[j] != work[j] || max < work[j]) + max = work[j]; if( work ) free( work ); return( max ); diff -Nru atlas-3.10.2/src/testing/ATL_sdsf77dot.c atlas-3.10.3/src/testing/ATL_sdsf77dot.c --- atlas-3.10.2/src/testing/ATL_sdsf77dot.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_sdsf77dot.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_spnrm.c atlas-3.10.3/src/testing/ATL_spnrm.c --- atlas-3.10.2/src/testing/ATL_spnrm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_spnrm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without @@ -95,7 +95,9 @@ } max = work[0]; - for( j = 1; j < N; j++ ) if( max < work[j] ) max = work[j]; + for(j = 1; j < N; j++) + if (work[j] != work[j] || max < work[j]) + max = work[j]; if( work ) free( work ); diff -Nru atlas-3.10.2/src/testing/ATL_synrm.c atlas-3.10.3/src/testing/ATL_synrm.c --- atlas-3.10.2/src/testing/ATL_synrm.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_synrm.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without @@ -93,7 +93,9 @@ } max = work[0]; - for( j = 1; j < N; j++ ) if( max < work[j] ) max = work[j]; + for(j = 1; j < N; j++) + if (work[j] != work[j] || max < work[j]) + max = work[j]; if( work ) free( work ); diff -Nru atlas-3.10.2/src/testing/ATL_tbnrm1.c atlas-3.10.3/src/testing/ATL_tbnrm1.c --- atlas-3.10.2/src/testing/ATL_tbnrm1.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_tbnrm1.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without @@ -56,6 +56,8 @@ t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); #endif } + if (t0 != t0) + return(t0); if( DIAG == AtlasNonUnit ) t0 += ATL_rone; if (t0 > max) max = t0; @@ -77,6 +79,8 @@ t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); #endif } + if (t0 != t0) + return(t0); if (t0 > max) max = t0; } } diff -Nru atlas-3.10.2/src/testing/ATL_tpnrm1.c atlas-3.10.3/src/testing/ATL_tpnrm1.c --- atlas-3.10.2/src/testing/ATL_tpnrm1.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_tpnrm1.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without @@ -53,6 +53,8 @@ t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); #endif } + if (t0 != t0) + return(t0); if( DIAG == AtlasNonUnit ) t0 += ATL_rone; if (t0 > max) max = t0; iaij += (1 SHIFT); @@ -73,6 +75,8 @@ t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); #endif } + if (t0 != t0) + return(t0); if (t0 > max) max = t0; iaij -= ( ( N - j ) << (1 SHIFT) ) + (1 SHIFT); diff -Nru atlas-3.10.2/src/testing/ATL_trgen.c atlas-3.10.3/src/testing/ATL_trgen.c --- atlas-3.10.2/src/testing/ATL_trgen.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_trgen.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_trnrm1.c atlas-3.10.3/src/testing/ATL_trnrm1.c --- atlas-3.10.2/src/testing/ATL_trnrm1.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_trnrm1.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 Antoine P. Petitet * * Redistribution and use in source and binary forms, with or without @@ -47,6 +47,8 @@ for (j=0; j < N; j++) { t0 = Mjoin(PATL,asum)(j+ioff, A, 1); + if (t0 != t0) + return(t0); if (Diag == AtlasUnit) t0 += ATL_rone; if (t0 > max) max = t0; A += incA; @@ -58,6 +60,8 @@ for (j=N; j; j--) { t0 = Mjoin(PATL,asum)(j+ioff-1, A, 1); + if (t0 != t0) + return(t0); if (Diag == AtlasUnit) t0 += ATL_rone; if (t0 > max) max = t0; A += incA; diff -Nru atlas-3.10.2/src/testing/ATL_tstsqtran.c atlas-3.10.3/src/testing/ATL_tstsqtran.c --- atlas-3.10.2/src/testing/ATL_tstsqtran.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_tstsqtran.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/testing/ATL_vdiff.c atlas-3.10.3/src/testing/ATL_vdiff.c --- atlas-3.10.2/src/testing/ATL_vdiff.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/testing/ATL_vdiff.c 2016-07-28 19:43:04.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/src/threads/ATL_DecAtomicCount_amd64.S atlas-3.10.3/src/threads/ATL_DecAtomicCount_amd64.S --- atlas-3.10.2/src/threads/ATL_DecAtomicCount_amd64.S 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/threads/ATL_DecAtomicCount_amd64.S 2016-07-28 19:43:04.000000000 +0000 @@ -1,19 +1,26 @@ +#ifdef ATL_GAS_WOW64 + #define vp %rcx + #define cnt %edx +#else + #define vp %rdi + #define cnt %ecx +#endif #include "atlas_asm.h" /* rax %rdi/rcx/4 */ /* int ATL_DecAtomicCount(void *vp) */ .text .global ATL_asmdecor(ATL_DecAtomicCount) ATL_asmdecor(ATL_DecAtomicCount): - sub $-128, %rdi /* skip false sharing guard zone */ + sub $-128, vp /* skip false sharing guard zone */ ATOMIC_LOOP: - movl (%rdi), %eax /* read cnt from memory */ - movl %eax, %ecx /* ecx = cnt */ - subl $1, %ecx /* ecx = cnt-1 */ - jl ZERO_RET /* return 0 if count already below 1 */ - lock /* make cmpxchg atomic */ - cmpxchg %ecx, (%rdi) /* put cnt-1 in mem if mem still == cnt in eax */ - je DONE /* ZF set if cmpxchg wrote to mem */ - jmp ATOMIC_LOOP /* ZF=0 means cmpxch failed, try again */ + movl (vp), %eax /* read cnt from memory */ + movl %eax, cnt /* cnt = count */ + subl $1, cnt /* cnt = count-1 */ + jl ZERO_RET /* return 0 if count already below 1 */ + lock /* make cmpxchg atomic */ + cmpxchg cnt, (vp) /* put cnt-1 in mem if mem still == cnt in eax */ + je DONE /* ZF set if cmpxchg wrote to mem */ + jmp ATOMIC_LOOP /* ZF=0 means cmpxch failed, try again */ ZERO_RET: xor %rax, %rax diff -Nru atlas-3.10.2/src/threads/ATL_DecAtomicCount_ia32.S atlas-3.10.3/src/threads/ATL_DecAtomicCount_ia32.S --- atlas-3.10.2/src/threads/ATL_DecAtomicCount_ia32.S 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/threads/ATL_DecAtomicCount_ia32.S 2016-07-28 19:43:04.000000000 +0000 @@ -1,3 +1,4 @@ +#define cnt %ecx #include "atlas_asm.h" /* rax %rdi/rcx/4 */ /* int ATL_DecAtomicCount(void *vp) */ @@ -8,13 +9,13 @@ sub $-128, %edx /* skip false sharing guard zone */ ATOMIC_LOOP: movl (%edx), %eax /* read cnt from memory */ - movl %eax, %ecx /* ecx = cnt */ - subl $1, %ecx /* ecx = cnt-1 */ - jl ZERO_RET /* return 0 if count already below 1 */ - lock /* make cmpxchg atomic */ - cmpxchg %ecx, (%edx) /* put cnt-1 in mem if mem still == cnt in eax */ - je DONE /* ZF set if cmpxchg wrote to mem */ - jmp ATOMIC_LOOP /* ZF=0 means cmpxch failed, try again */ + movl %eax, cnt /* cnt = count */ + subl $1, cnt /* cnt = count-1 */ + jl ZERO_RET /* return 0 if count already below 1 */ + lock /* make cmpxchg atomic */ + cmpxchg cnt, (%edx) /* put cnt-1 in mem if mem still == cnt in eax */ + je DONE /* ZF set if cmpxchg wrote to mem */ + jmp ATOMIC_LOOP /* ZF=0 means cmpxch failed, try again */ ZERO_RET: xor %eax, %eax diff -Nru atlas-3.10.2/src/threads/ATL_DecAtomicCount_win64.S atlas-3.10.3/src/threads/ATL_DecAtomicCount_win64.S --- atlas-3.10.2/src/threads/ATL_DecAtomicCount_win64.S 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/threads/ATL_DecAtomicCount_win64.S 2016-07-28 19:43:04.000000000 +0000 @@ -1,3 +1,10 @@ +#ifdef ATL_GAS_WOW64 + #define vp %rcx + #define cnt %edx +#else + #define vp %rdi + #define cnt %ecx +#endif #include "atlas_asm.h" /* rax %rdi/rcx/4 */ /* int ATL_DecAtomicCount(void *vp) */ @@ -8,13 +15,13 @@ sub $-128, %rdx /* skip false sharing guard zone */ ATOMIC_LOOP: movl (%rdx), %eax /* read cnt from memory */ - movl %eax, %ecx /* ecx = cnt */ - subl $1, %ecx /* ecx = cnt-1 */ - jl ZERO_RET /* return 0 if count already below 1 */ - lock /* make cmpxchg atomic */ - cmpxchg %ecx, (%rdx) /* put cnt-1 in mem if mem still == cnt in eax */ - je DONE /* ZF set if cmpxchg wrote to mem */ - jmp ATOMIC_LOOP /* ZF=0 means cmpxch failed, try again */ + movl %eax, cnt /* cnt = count */ + subl $1, cnt /* cnt = count-1 */ + jl ZERO_RET /* return 0 if count already below 1 */ + lock /* make cmpxchg atomic */ + cmpxchg cnt, (%rdx) /* put cnt-1 in mem if mem still == cnt in eax */ + je DONE /* ZF set if cmpxchg wrote to mem */ + jmp ATOMIC_LOOP /* ZF=0 means cmpxch failed, try again */ ZERO_RET: xor %rax, %rax diff -Nru atlas-3.10.2/src/threads/ATL_ResetAtomicCount_amd64.S atlas-3.10.3/src/threads/ATL_ResetAtomicCount_amd64.S --- atlas-3.10.2/src/threads/ATL_ResetAtomicCount_amd64.S 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/threads/ATL_ResetAtomicCount_amd64.S 2016-07-28 19:43:04.000000000 +0000 @@ -1,4 +1,13 @@ #include "atlas_asm.h" +#ifdef ATL_GAS_WOW64 + #define vp %rcx + #define cnt %rdx + #define ecnt %edx +#else + #define vp %rdi + #define cnt %rsi + #define ecnt %esi +#endif /* * rax rdi rsi * int ATL_ResetAtomicCount(void *vp, int cnt) @@ -8,11 +17,11 @@ .text .global ATL_asmdecor(ATL_ResetAtomicCount) ATL_asmdecor(ATL_ResetAtomicCount): - sub $-128, %rdi /* skip false sharing guard zone */ + sub $-128, vp /* skip false sharing guard zone */ ATOMIC_LOOP: - movl (%rdi), %eax /* read acnt from memory */ + movl (vp), %eax /* read acnt from memory */ lock /* make cmpxchg atomic */ - cmpxchg %esi, (%rdi) /* put cnt in mem if mem still == acnt in eax */ + cmpxchg ecnt, (vp) /* put cnt in mem if mem still == acnt in eax */ je DONE /* ZF set if cmpxchg wrote to mem */ jmp ATOMIC_LOOP /* ZF=0 means cmpxch failed, try again */ DONE: diff -Nru atlas-3.10.2/src/threads/ATL_ResetAtomicCount_ia32.S atlas-3.10.3/src/threads/ATL_ResetAtomicCount_ia32.S --- atlas-3.10.2/src/threads/ATL_ResetAtomicCount_ia32.S 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/threads/ATL_ResetAtomicCount_ia32.S 2016-07-28 19:43:04.000000000 +0000 @@ -1,6 +1,5 @@ #include "atlas_asm.h" /* - * rax rdi rsi * int ATL_ResetAtomicCount(void *vp, int cnt) * Sets vp's acnt=cnt. * RETURNS: acnt before the reset diff -Nru atlas-3.10.2/src/threads/ATL_thread_start.c atlas-3.10.3/src/threads/ATL_thread_start.c --- atlas-3.10.2/src/threads/ATL_thread_start.c 2014-07-10 16:22:06.000000000 +0000 +++ atlas-3.10.3/src/threads/ATL_thread_start.c 2016-07-28 19:43:04.000000000 +0000 @@ -17,7 +17,7 @@ #ifdef ATL_WIN32THREADS DWORD thrID; #else - unsigned thrID; + unsigned int thrID; #endif #ifdef ATL_NOAFFINITY @@ -69,7 +69,9 @@ #else pthread_attr_t attr; #ifndef ATL_NOAFFINITY - #if defined(ATL_PAFF_SETAFFNP) || defined(ATL_PAFF_SCHED) + #if defined(ATL_PAFF_SETAFFNP) + cpu_set_t *cpuset; /* allow IBM random-realloc() */ + #elif defined(ATL_PAFF_SCHED) cpu_set_t cpuset; #elif defined(ATL_PAFF_PLPA) plpa_cpu_set_t cpuset; @@ -101,9 +103,15 @@ ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); /* no chk, OK to fail */ #ifdef ATL_PAFF_SETAFFNP - CPU_ZERO(&cpuset); - CPU_SET(affID, &cpuset); - ATL_assert(!pthread_attr_setaffinity_np(&attr, sizeof(cpuset), &cpuset)); +/* + * On POWER8/Linux, pthread_attr_setaffinity_np sometimes reallocs() the + * cpuset variable, thus it must be malloced and not taken from stack! + */ + cpuset = malloc(sizeof(cpu_set_t)); + CPU_ZERO(cpuset); + CPU_SET(affID, cpuset); + ATL_assert(!pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t),cpuset)); + free(cpuset); #elif defined(ATL_PAFF_SETPROCNP) ATL_assert(!pthread_attr_setprocessor_np(&attr, (pthread_spu_t)affID, PTHREAD_BIND_FORCED_NP)); @@ -112,12 +120,16 @@ #if defined(ATL_PAFF_PBIND) ATL_assert(!processor_bind(P_LWPID, thr->thrH, affID, NULL)); thr->paff_set = 0; /* affinity set by spawner */ - #elif defined(ATL_PAFF_BINDP) - ATL_assert(!bindprocessor(BINDTHREAD, thr->thrH, bindID)); +/* + * AIX can't take a pthread as BINDTHREAD, must be a kernelID, so let + * self-affinity handle it + */ + #elif defined(ATL_PAFF_BINDP) && !defined(ATL_OS_AIX) + ATL_assert(!bindprocessor(BINDTHREAD, thr->thrH, affID)); thr->paff_set = 0; /* affinity set by spawner */ #elif defined(ATL_PAFF_CPUSET) /* untried FreeBSD code */ CPU_ZERO(&mycpuset); /* no manpage, so guess works like linux */ - CPU_SET(bindID, &mycpuset); + CPU_SET(affID, &mycpuset); if (!cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, thr->thrH, sizeof(mycpuset), &mycpuset)); thr->paff_set = 0; /* affinity set by spawner */ diff -Nru atlas-3.10.2/src/threads/blas/level2/ATL_tgemv.c atlas-3.10.3/src/threads/blas/level2/ATL_tgemv.c --- atlas-3.10.2/src/threads/blas/level2/ATL_tgemv.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/src/threads/blas/level2/ATL_tgemv.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,6 @@ #include "atlas_misc.h" #include "atlas_level2.h" +#include "atlas_taffinity.h" #include "atlas_threads.h" #include "atlas_tcacheedge.h" /* @@ -132,7 +133,7 @@ const SCALAR beta, TYPE *Y, ATL_CINT incY) { static size_t ALb=0, ALe=0; - size_t at = (size_t) A; + size_t at = (size_t) A, ce; ATL_INT n, P, ldaP; ATL_TGEMV_t pd; /* @@ -168,8 +169,15 @@ pd.flg |= (2|4); } #endif - P = ATL_DivBySize(CacheEdge); - P = ((size_t)M*N+P-1) / P; /* add more procs only when cache is full */ +/* + * self-affinity is expensive, so require a 32x bigger problem to thread! + */ + #if defined(ATL_PAFF_SELF) && ATL_PAFF_SELF != 0 + ce = ATL_DivBySize(CacheEdge)<<5; + #else + ce = ATL_DivBySize(CacheEdge); + #endif + P = ((size_t)M*N+ce-1) / ce; /* add more procs only when cache is full */ P = (P&1 && P > 1)?P+1 : P; /* don't use odd P; it hurts alignment */ P = Mmin(ATL_NTHREADS, P); /* diff -Nru atlas-3.10.2/src/threads/blas/level2/ATL_tger.c atlas-3.10.3/src/threads/blas/level2/ATL_tger.c --- atlas-3.10.2/src/threads/blas/level2/ATL_tger.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/src/threads/blas/level2/ATL_tger.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,6 @@ #include "atlas_misc.h" #include "atlas_level2.h" +#include "atlas_taffinity.h" #include "atlas_threads.h" #include "atlas_tcacheedge.h" /* @@ -79,6 +80,7 @@ ATL_TGER_t pd; int P; static TYPE *A0=NULL, *A0e=NULL; + size_t ce; if (M < 1 || N < 1 || SCALAR_IS_ZERO(alpha)) /* quick return if no-op */ return; @@ -88,9 +90,15 @@ pd.X = X; pd.Y = Y; pd.A = A; pd.flg = (A0 == A || A0e == A+(M SHIFT)) ? 1 : 2; A0 = A; A0e = A+(M SHIFT); - - P = ATL_DivBySize(CacheEdge); - P = ((size_t)M*N+P-1) / P; /* add more procs only when cache is full */ +/* + * self-affinity is expensive, so require a 32x bigger problem to thread! + */ + #if defined(ATL_PAFF_SELF) && ATL_PAFF_SELF != 0 + ce = ATL_DivBySize(CacheEdge)<<5; + #else + ce = ATL_DivBySize(CacheEdge); + #endif + P = ((size_t)M*N+ce-1) / ce; /* add more procs only when cache is full */ P = (P&1 && P > 1)?P+1 : P; /* don't use odd P, since it hurts alignment */ // printf("TGER, P=%d\n", P); P = Mmin(ATL_NTHREADS, P); diff -Nru atlas-3.10.2/src/threads/blas/level3/ATL_tgemm.c atlas-3.10.3/src/threads/blas/level3/ATL_tgemm.c --- atlas-3.10.2/src/threads/blas/level3/ATL_tgemm.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/src/threads/blas/level3/ATL_tgemm.c 2016-07-28 19:43:20.000000000 +0000 @@ -573,8 +573,10 @@ * of dynamic scheduling, which always seems to pay for itself. * On unloaded AMD machines, the asymptotic loss is roughly 1-2%. * Dynamic scheduling seems to always be a performance loss for MAC OSX + * These routines require strongly-ordered caches, so only enable on x86. */ - #ifndef ATL_OS_OSX + #if !defined(ATL_OS_OSX) && (defined(ATL_GAS_x8664) || \ + defined(ATL_GAS_WOW64) || defined(ATL_GAS_x8632)) #ifdef FindingCE ATL_assert(!Mjoin(PATL,tgemm_bigMN_Kp)(TA, TB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc)); diff -Nru atlas-3.10.2/src/threads/blas/level3/ATL_tgemm_rkK.c atlas-3.10.3/src/threads/blas/level3/ATL_tgemm_rkK.c --- atlas-3.10.2/src/threads/blas/level3/ATL_tgemm_rkK.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/src/threads/blas/level3/ATL_tgemm_rkK.c 2016-07-28 19:43:20.000000000 +0000 @@ -70,7 +70,11 @@ return; } if (mb != NB && nb != NB) + { + if (SCALAR_IS_ZERO(beta)) + Mjoin(PATL,gezero)(mb, nb, C, ldc); mmk_bX = mmk = genmm; + } else if (mb != NB) { mmk = PMBmm; diff -Nru atlas-3.10.2/src/threads/lapack/ATL_tgeql2.c atlas-3.10.3/src/threads/lapack/ATL_tgeql2.c --- atlas-3.10.2/src/threads/lapack/ATL_tgeql2.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/src/threads/lapack/ATL_tgeql2.c 2016-07-28 19:43:19.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/src/threads/lapack/ATL_tgeqr2.c atlas-3.10.3/src/threads/lapack/ATL_tgeqr2.c --- atlas-3.10.2/src/threads/lapack/ATL_tgeqr2.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/src/threads/lapack/ATL_tgeqr2.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Siju Samuel * * Code contributers : Siju Samuel, Anthony M. Castaldo, R. Clint Whaley diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm4x2xVL_simd.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm4x2xVL_simd.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm4x2xVL_simd.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm4x2xVL_simd.c 2016-07-28 19:43:21.000000000 +0000 @@ -0,0 +1,353 @@ +/* + * Automatically Tuned Linear Algebra Software v3.10.3 + * Copyright (C) 2016 R. Clint Whaley + */ +#if !defined(SREAL) && !defined(DREAL) && !defined(SCPLX) && !defined(DCPLX) + #define DREAL 1 +#endif +#include +#include "atlas_simd.h" +#include "atlas_prefetch.h" +#if defined(SCPLX) || defined(DCPLX) + #include "atlas_cplxsimd.h" + #ifndef TCPLX + #define TCPLX 1 + #endif + #define SHIFT <<1 +#else + #define SHIFT + #ifndef TCPLX + #define TREAL 1 + #endif +#endif +#ifndef TYPE + #if defined(SREAL) || defined(SCPLX) + #define TYPE float + #else + #define TYPE double + #endif +#endif +#ifndef ATL_MM_KB + #ifdef KB + #if KB > 0 + #define ATL_KBCONST 1 + #define ATL_MM_KB KB + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif +#else + #if ATL_MM_KB > 0 + #define ATL_KBCONST 1 + #else + #undef ATL_MM_KB + #define ATL_MM_KB K + #define ATL_KBCONST 0 + #endif +#endif +#ifdef BETA1 + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vadd(d_, d_, rA0); \ + ATL_vust(p_, d_); \ + } +#elif defined(BETA0) + #define ATL_vbeta(p_, d_) ATL_vust(p_, d_) +#else + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vmac(d_, rA0, vBE); \ + ATL_vust(p_, d_); \ + } +#endif + +#if ATL_VLEN == 8 + #ifdef BETA0 + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + ((p_)+ldc2)[ 0] = r1_[4]; \ + ((p_)+ldc2)[ 2] = r1_[5]; \ + ((p_)+ldc2)[ 4] = r1_[6]; \ + ((p_)+ldc2)[ 6] = r1_[7]; \ + } + #elif defined(BETA1) + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + ((p_)+ldc2)[ 0] += r1_[4]; \ + ((p_)+ldc2)[ 2] += r1_[5]; \ + ((p_)+ldc2)[ 4] += r1_[6]; \ + ((p_)+ldc2)[ 6] += r1_[7]; \ + } + #else + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + ((p_)+ldc2)[ 0] = beta*((p_)+ldc2)[ 0] + r1_[4]; \ + ((p_)+ldc2)[ 2] = beta*((p_)+ldc2)[ 2] + r1_[5]; \ + ((p_)+ldc2)[ 4] = beta*((p_)+ldc2)[ 4] + r1_[6]; \ + ((p_)+ldc2)[ 6] = beta*((p_)+ldc2)[ 6] + r1_[7]; \ + } + #endif +#elif ATL_VLEN == 4 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #endif + #ifndef BETA0 + #define vwrtC(p_, rc_, rt_, rz_) \ + { \ + ATL_vunpckHI(rt_, rc_, rz_); /* rt_={0, r3, 0, r2} */ \ + ATL_vunpckLO(rc_, rc_, rz_); /* rc_={0, r1, 0, r0} */ \ + ATL_vbeta(p_, rc_); \ + ATL_vbeta((p_)+4, rt_); \ + } + #endif +#elif ATL_VLEN == 2 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + } + #endif +#endif +#ifndef ATL_RESTRICT + #if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999) + #define ATL_RESTRICT restrict + #else + #define ATL_RESTRICT + #endif +#endif + +void ATL_USERMM + (const int M, const int N, const int K, const TYPE alpha, + const TYPE * ATL_RESTRICT A, const int lda, + const TYPE * ATL_RESTRICT B, const int ldb, const TYPE beta, + TYPE * ATL_RESTRICT C, const int ldc) +/* + * Performs a GEMM with M,N,K unrolling (& jam) of (8,6,VLEN). + * Vectorization of VLEN=[4,8] (d,s) along K dim, vec unroll=(8,6,1). + * You may set compile-time constant K dim by defining ATL_MM_KB. + */ +{ + const TYPE *pB0=B, *aa=A, *pA0=A, *pA2=pA0+(lda<<1), *pfA=A+lda*M, + *pfB=B+ldb*N; + const size_t ldc2 = ldc SHIFT; + TYPE *pC0=C; + int i, j, k; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_VTYPE vBE; + #elif ATL_VLEN == 8 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0, beta, 1.0, beta, 1.0}; + #elif ATL_VLEN == 4 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0}; + #endif + const size_t incAm = (lda<<2), incBn = ldb*2; + const size_t incC=ldc2*2; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_vbcast(vBE, &beta); + #endif + + for (j=0; j < N; j += 2) + { + for (i=0; i < M; i += 4) + { + register ATL_VTYPE rA0, rA1, rA2, rA3, rB0, rC00, rC10, rC20, rC30, + rB1, rC01, rC11, rC21, rC31; + /* Peel K=0 iteration to avoid zero of rCxx and extra add */ + ATL_vld(rB0, pB0); + ATL_vld(rA0, pA0); + ATL_vmul(rC00, rA0, rB0); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC10, rA1, rB0); + ATL_vld(rA2, pA2); + ATL_vmul(rC20, rA2, rB0); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC30, rA3, rB0); + + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vld(rB0, pB0); + ATL_vmul(rC01, rA0, rB1); + ATL_vld(rA0, pA0); + ATL_vmul(rC11, rA1, rB1); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC21, rA2, rB1); + ATL_vld(rA2, pA2); + ATL_vmul(rC31, rA3, rB1); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_pfl1R(pfA); + ATL_pfl1R(pfA+8); + + pfA += 8; pfB += 8; +/* + * Stop one iteration early to drain preload pipline of A/B + */ + for (k=(ATL_VLEN<<1); k < ATL_MM_KB; k += ATL_VLEN) + { + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vld(rB0, pB0); + + + ATL_vmac(rC01, rA0, rB1); + ATL_vld(rA0, pA0); + ATL_vmac(rC11, rA1, rB1); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmac(rC21, rA2, rB1); + ATL_vld(rA2, pA2); + ATL_vmac(rC31, rA3, rB1); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + } /* end K-loop */ +/* + * Last iteration peeled out to drain preload pipeline + */ + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB1, pB0+ldb); + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + + #if ATL_VLEN == 2 + ATL_vvrsum2(rC00, rC10); + ATL_vvrsum2(rC20, rC30); + ATL_vvrsum2(rC01, rC11); + ATL_vvrsum2(rC21, rC31); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+4, rC20); + wrtC(pC0+ldc2, rC01); + wrtC(pC0+ldc2+4, rC21); + #else + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+2, rC20); + ATL_vbeta(pC0+ldc2, rC01); + ATL_vbeta(pC0+ldc2+2, rC21); + #endif + #elif ATL_VLEN == 4 + ATL_vvrsum4(rC00, rC10, rC20, rC30); + ATL_vvrsum4(rC01, rC11, rC21, rC31); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+ldc2, rC01); + #else /* real */ + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+ldc2, rC01); + #endif + #elif ATL_VLEN == 8 && defined(ATL_AVX) && defined(ATL_SSE1) \ + && defined(SCPLX) + { + ATL_vvrsum8(rC00,rC10,rC20,rC30, rC01,rC11,rC21,rC31); + wrtC(pC0, rC00); + } + #elif ATL_VLEN == 8 && defined(ATL_AVX) && defined(ATL_SSE1) \ + && defined(SREAL) + { + ATL_vvrsum8(rC00, rC10, rC20, rC30, rC01, rC11, rC21, rC31); + #ifndef BETA0 + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC0), 0); + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC0+ldc2), 1); + #ifdef BETAX + ATL_vmul(rC10, rC10, vBE); + #endif + ATL_vadd(rC00, rC00, rC10); + #endif + _mm_storeu_ps(pC0, _mm256_extractf128_ps(rC00, 0)); + _mm_storeu_ps(pC0+ldc2, _mm256_extractf128_ps(rC00, 1)); + } + #else + #error "VLEN NOT SUPPORTED!" + #endif + pB0 = B; + pC0 += 4 SHIFT; + A += incAm; + pA0 = A; + pA2 = pA0 + (lda<<1); + } /* end of loop over M */ + A = aa; + pA0 = A; + pA2 = A + (lda<<1); + C += incC; + pC0 = C; + B += incBn; + pB0 = B; + } /* end of loop over N */ +} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm4x3xVL_simd.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm4x3xVL_simd.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm4x3xVL_simd.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm4x3xVL_simd.c 2016-07-28 19:43:21.000000000 +0000 @@ -0,0 +1,426 @@ +/* + * Automatically Tuned Linear Algebra Software v3.10.3 + * Copyright (C) 2016 R. Clint Whaley + */ +#if !defined(SREAL) && !defined(DREAL) && !defined(SCPLX) && !defined(DCPLX) + #define DREAL 1 +#endif +#include +#include "atlas_simd.h" +#include "atlas_prefetch.h" +#if defined(SCPLX) || defined(DCPLX) + #include "atlas_cplxsimd.h" + #ifndef TCPLX + #define TCPLX 1 + #endif + #define SHIFT <<1 +#else + #define SHIFT + #ifndef TCPLX + #define TREAL 1 + #endif +#endif +#ifndef TYPE + #if defined(SREAL) || defined(SCPLX) + #define TYPE float + #else + #define TYPE double + #endif +#endif +#ifndef ATL_MM_KB + #ifdef KB + #if KB > 0 + #define ATL_KBCONST 1 + #define ATL_MM_KB KB + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif +#else + #if ATL_MM_KB > 0 + #define ATL_KBCONST 1 + #else + #undef ATL_MM_KB + #define ATL_MM_KB K + #define ATL_KBCONST 0 + #endif +#endif +#ifdef BETA1 + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vadd(d_, d_, rA0); \ + ATL_vust(p_, d_); \ + } +#elif defined(BETA0) + #define ATL_vbeta(p_, d_) ATL_vust(p_, d_) +#else + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vmac(d_, rA0, vBE); \ + ATL_vust(p_, d_); \ + } +#endif + +#if ATL_VLEN == 8 + #ifdef BETA0 + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + ((p_)+ldc2)[ 0] = r1_[4]; \ + ((p_)+ldc2)[ 2] = r1_[5]; \ + ((p_)+ldc2)[ 4] = r1_[6]; \ + ((p_)+ldc2)[ 6] = r1_[7]; \ + } + #elif defined(BETA1) + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + ((p_)+ldc2)[ 0] += r1_[4]; \ + ((p_)+ldc2)[ 2] += r1_[5]; \ + ((p_)+ldc2)[ 4] += r1_[6]; \ + ((p_)+ldc2)[ 6] += r1_[7]; \ + } + #else + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + ((p_)+ldc2)[ 0] = beta*((p_)+ldc2)[ 0] + r1_[4]; \ + ((p_)+ldc2)[ 2] = beta*((p_)+ldc2)[ 2] + r1_[5]; \ + ((p_)+ldc2)[ 4] = beta*((p_)+ldc2)[ 4] + r1_[6]; \ + ((p_)+ldc2)[ 6] = beta*((p_)+ldc2)[ 6] + r1_[7]; \ + } + #endif +#elif ATL_VLEN == 4 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #endif + #ifndef BETA0 + #define vwrtC(p_, rc_, rt_, rz_) \ + { \ + ATL_vunpckHI(rt_, rc_, rz_); /* rt_={0, r3, 0, r2} */ \ + ATL_vunpckLO(rc_, rc_, rz_); /* rc_={0, r1, 0, r0} */ \ + ATL_vbeta(p_, rc_); \ + ATL_vbeta((p_)+4, rt_); \ + } + #endif +#elif ATL_VLEN == 2 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + } + #endif +#endif +#ifndef ATL_RESTRICT + #if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999) + #define ATL_RESTRICT restrict + #else + #define ATL_RESTRICT + #endif +#endif + +void ATL_USERMM + (const int M, const int N, const int K, const TYPE alpha, + const TYPE * ATL_RESTRICT A, const int lda, + const TYPE * ATL_RESTRICT B, const int ldb, const TYPE beta, + TYPE * ATL_RESTRICT C, const int ldc) +/* + * Performs a GEMM with M,N,K unrolling (& jam) of (8,6,VLEN). + * Vectorization of VLEN=[4,8] (d,s) along K dim, vec unroll=(8,6,1). + * You may set compile-time constant K dim by defining ATL_MM_KB. + */ +{ + const TYPE *pB0=B, *pB2=pB0+(ldb<<1), *aa=A, *pA0=A, *pA2=pA0+(lda<<1), + *pfA=A+lda*M, *pfB=B+ldb*N; + const size_t ldc2 = ldc SHIFT; + TYPE *pC0=C, *pC2=pC0+(ldc2<<1); + int i, j, k; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_VTYPE vBE; + #elif ATL_VLEN == 8 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0, beta, 1.0, beta, 1.0}; + #elif ATL_VLEN == 4 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0}; + #endif + const size_t incAm = (lda<<2), incBn = ldb*3; + const size_t incC=ldc2*3; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_vbcast(vBE, &beta); + #endif + + for (j=0; j < N; j += 3) + { + for (i=0; i < M; i += 4) + { + register ATL_VTYPE rA0, rA1, rA2, rA3, rB0, rC00, rC10, rC20, rC30, + rB1, rC01, rC11, rC21, rC31, rB2, rC02, rC12, rC22, + rC32; + /* Peel K=0 iteration to avoid zero of rCxx and extra add */ + ATL_vld(rB0, pB0); + ATL_vld(rA0, pA0); + ATL_vmul(rC00, rA0, rB0); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC10, rA1, rB0); + ATL_vld(rA2, pA2); + ATL_vmul(rC20, rA2, rB0); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC30, rA3, rB0); + + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vld(rB0, pB0); + ATL_vmul(rC01, rA0, rB1); + ATL_vld(rB2, pB2); + pB2 += ATL_VLEN; + ATL_vmul(rC11, rA1, rB1); + ATL_vmul(rC21, rA2, rB1); + ATL_vmul(rC31, rA3, rB1); + ATL_pfl1R(pfA); + ATL_pfl1R(pfA+8); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmul(rC02, rA0, rB2); + ATL_vld(rA0, pA0); + ATL_vmul(rC12, rA1, rB2); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC22, rA2, rB2); + ATL_vld(rA2, pA2); + ATL_vmul(rC32, rA3, rB2); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + pfA += 12; pfB += 12; +/* + * Stop one iteration early to drain preload pipline of A/B + */ + for (k=(ATL_VLEN<<1); k < ATL_MM_KB; k += ATL_VLEN) + { + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB2, pB2); pB2 += ATL_VLEN; + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vld(rB0, pB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + + ATL_vmac(rC02, rA0, rB2); + ATL_vld(rA0, pA0); + ATL_vmac(rC12, rA1, rB2); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmac(rC22, rA2, rB2); + ATL_vld(rA2, pA2); + ATL_vmac(rC32, rA3, rB2); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + } /* end K-loop */ +/* + * Last iteration peeled out to drain preload pipeline + */ + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB2, pB2); + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + + #if ATL_VLEN == 2 + ATL_vvrsum2(rC00, rC10); + ATL_vvrsum2(rC20, rC30); + ATL_vvrsum2(rC01, rC11); + ATL_vvrsum2(rC21, rC31); + ATL_vvrsum2(rC02, rC12); + ATL_vvrsum2(rC22, rC32); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+4, rC20); + wrtC(pC0+ldc2, rC01); + wrtC(pC0+ldc2+4, rC21); + wrtC(pC2, rC02); + wrtC(pC2+4, rC22); + #else + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+2, rC20); + ATL_vbeta(pC0+ldc2, rC01); + ATL_vbeta(pC0+ldc2+2, rC21); + ATL_vbeta(pC2, rC02); + ATL_vbeta(pC2+2, rC22); + #endif + #elif ATL_VLEN == 4 + ATL_vvrsum4(rC00, rC10, rC20, rC30); + ATL_vvrsum4(rC01, rC11, rC21, rC31); + ATL_vvrsum4(rC02, rC12, rC22, rC32); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+ldc2, rC01); + wrtC(pC2, rC02); + #else /* real */ + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+ldc2, rC01); + ATL_vbeta(pC2, rC02); + #endif + #elif ATL_VLEN == 8 && defined(ATL_AVX) && defined(ATL_SSE1) \ + && defined(SCPLX) + { + #ifndef BETA0 + ATL_vzero(rB0); + #endif + #ifdef BETA0 + ATL_vvrsum8(rC00,rC10,rC20,rC30, rC01,rC11,rC21,rC31); + wrtC(pC0, rC00); + #else + ATL_vvrsum8(rC00,rC10, rC01, rC11, rC20,rC30, rC21,rC31); + // 7 6 5 4 3 2 1 0 + // {r31, r21, r30, r20, r11, r01, r10, r00} + ATL_vuld(rC30, pC0+ldc2); + // {i31, r31, i21, r21, i11, r11, i01, r01} + ATL_vuld(rC20, pC0); + // {i30, r30, i20, r20, i10, r10, i00, r00} + ATL_vunpckHI(rC10, rC00, rB0); + // { 0, r31, 0, r21, 0, r11, 0, r01} + ATL_vunpckLO(rC00, rC00, rB0); + // { 0, r30, 0, r20, 0, r10, 0, r00} + #ifdef BETAX + ATL_vmul(rC30, rC30, vBE); + ATL_vmul(rC20, rC20, vBE); + #endif + ATL_vadd(rC10, rC10, rC30); + ATL_vadd(rC00, rC00, rC20); + ATL_vust(pC0+ldc2, rC10); + ATL_vust(pC0, rC00); + #endif + ATL_vvrsum4(rC02,rC12,rC22,rC32); + wrtC0(pC2, rC02); + } + #elif ATL_VLEN == 8 && defined(ATL_AVX) && defined(ATL_SSE1) \ + && defined(SREAL) + { + ATL_vvrsum8(rC00, rC10, rC20, rC30, rC01, rC11, rC21, rC31); + #ifndef BETA0 + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC0), 0); + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC0+ldc2), 1); + #ifdef BETAX + ATL_vmul(rC10, rC10, vBE); + #endif + ATL_vadd(rC00, rC00, rC10); + #endif + _mm_storeu_ps(pC0, _mm256_extractf128_ps(rC00, 0)); + _mm_storeu_ps(pC0+ldc2, _mm256_extractf128_ps(rC00, 1)); + { + __m128 c0, c1, cBE; + ATL_vvrsum4(rC02, rC12, rC22, rC32) + c0 = _mm256_extractf128_ps(rC02, 0); + #ifndef BETA0 + c1 = _mm_loadu_ps(pC2); + #ifdef BETAX + cBE =_mm256_extractf128_ps(vBE, 0); + c1 = _mm_mul_ps(c1, cBE); + #endif + c0 = _mm_add_ps(c0, c1); + #endif + _mm_storeu_ps(pC2, c0); + } + } + #else + #error "VLEN NOT SUPPORTED!" + #endif + pB0 = B; + pB2 = B + (ldb<<1); + pC0 += 4 SHIFT; + pC2 += 4 SHIFT; + A += incAm; + pA0 = A; + pA2 = pA0 + (lda<<1); + } /* end of loop over M */ + A = aa; + pA0 = A; + pA2 = A + (lda<<1); + C += incC; + pC0 = C; + B += incBn; + pB0 = B; + pC2 = C + (ldc2<<1); + pB2 = B + (ldb<<1); + } /* end of loop over N */ +} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm4x4xVL_simd.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm4x4xVL_simd.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm4x4xVL_simd.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm4x4xVL_simd.c 2016-07-28 19:43:21.000000000 +0000 @@ -0,0 +1,451 @@ +/* + * Automatically Tuned Linear Algebra Software v3.10.3 + * Copyright (C) 2016 R. Clint Whaley + */ +#if !defined(SREAL) && !defined(DREAL) && !defined(SCPLX) && !defined(DCPLX) + #define DREAL 1 +#endif +#include +#include "atlas_simd.h" +#include "atlas_prefetch.h" +#if defined(SCPLX) || defined(DCPLX) + #include "atlas_cplxsimd.h" + #ifndef TCPLX + #define TCPLX 1 + #endif + #define SHIFT <<1 +#else + #define SHIFT + #ifndef TCPLX + #define TREAL 1 + #endif +#endif +#ifndef TYPE + #if defined(SREAL) || defined(SCPLX) + #define TYPE float + #else + #define TYPE double + #endif +#endif +#ifndef ATL_MM_KB + #ifdef KB + #if KB > 0 + #define ATL_KBCONST 1 + #define ATL_MM_KB KB + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif +#else + #if ATL_MM_KB > 0 + #define ATL_KBCONST 1 + #else + #undef ATL_MM_KB + #define ATL_MM_KB K + #define ATL_KBCONST 0 + #endif +#endif +#ifdef BETA1 + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vadd(d_, d_, rA0); \ + ATL_vust(p_, d_); \ + } +#elif defined(BETA0) + #define ATL_vbeta(p_, d_) ATL_vust(p_, d_) +#else + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vmac(d_, rA0, vBE); \ + ATL_vust(p_, d_); \ + } +#endif + +#if ATL_VLEN == 8 + #ifdef BETA0 + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + ((p_)+ldc2)[ 0] = r1_[4]; \ + ((p_)+ldc2)[ 2] = r1_[5]; \ + ((p_)+ldc2)[ 4] = r1_[6]; \ + ((p_)+ldc2)[ 6] = r1_[7]; \ + } + #elif defined(BETA1) + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + ((p_)+ldc2)[ 0] += r1_[4]; \ + ((p_)+ldc2)[ 2] += r1_[5]; \ + ((p_)+ldc2)[ 4] += r1_[6]; \ + ((p_)+ldc2)[ 6] += r1_[7]; \ + } + #else + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + ((p_)+ldc2)[ 0] = beta*((p_)+ldc2)[ 0] + r1_[4]; \ + ((p_)+ldc2)[ 2] = beta*((p_)+ldc2)[ 2] + r1_[5]; \ + ((p_)+ldc2)[ 4] = beta*((p_)+ldc2)[ 4] + r1_[6]; \ + ((p_)+ldc2)[ 6] = beta*((p_)+ldc2)[ 6] + r1_[7]; \ + } + #endif +#elif ATL_VLEN == 4 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #endif + #ifndef BETA0 + #define vwrtC(p_, rc_, rt_, rz_) \ + { \ + ATL_vunpckHI(rt_, rc_, rz_); /* rt_={0, r3, 0, r2} */ \ + ATL_vunpckLO(rc_, rc_, rz_); /* rc_={0, r1, 0, r0} */ \ + ATL_vbeta(p_, rc_); \ + ATL_vbeta((p_)+4, rt_); \ + } + #endif +#elif ATL_VLEN == 2 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + } + #endif +#endif +#ifndef ATL_RESTRICT + #if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999) + #define ATL_RESTRICT restrict + #else + #define ATL_RESTRICT + #endif +#endif + +void ATL_USERMM + (const int M, const int N, const int K, const TYPE alpha, + const TYPE * ATL_RESTRICT A, const int lda, + const TYPE * ATL_RESTRICT B, const int ldb, const TYPE beta, + TYPE * ATL_RESTRICT C, const int ldc) +/* + * Performs a GEMM with M,N,K unrolling (& jam) of (8,6,VLEN). + * Vectorization of VLEN=[4,8] (d,s) along K dim, vec unroll=(8,6,1). + * You may set compile-time constant K dim by defining ATL_MM_KB. + */ +{ + const TYPE *pB0=B, *pB2=pB0+(ldb<<1), *aa=A, *pA0=A, *pA2=pA0+(lda<<1), + *pfA=A+lda*M, *pfB=B+ldb*N; + const size_t ldc2 = ldc SHIFT; + TYPE *pC0=C, *pC2=pC0+(ldc2<<1); + int i, j, k; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_VTYPE vBE; + #elif ATL_VLEN == 8 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0, beta, 1.0, beta, 1.0}; + #elif ATL_VLEN == 4 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0}; + #endif + const size_t incAm = (lda<<2), incBn = ldb*4; + const size_t incC=ldc2*4; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_vbcast(vBE, &beta); + #endif + + for (j=0; j < N; j += 4) + { + for (i=0; i < M; i += 4) + { + register ATL_VTYPE rA0, rA1, rA2, rA3, rB0, rC00, rC10, rC20, rC30, + rB1, rC01, rC11, rC21, rC31, rB2, rC02, rC12, rC22, + rC32, rB3, rC03, rC13, rC23, rC33; + /* Peel K=0 iteration to avoid zero of rCxx and extra add */ + ATL_vld(rB0, pB0); + ATL_vld(rA0, pA0); + ATL_vmul(rC00, rA0, rB0); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC10, rA1, rB0); + ATL_vld(rA2, pA2); + ATL_vmul(rC20, rA2, rB0); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC30, rA3, rB0); + + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vld(rB0, pB0); + ATL_vmul(rC01, rA0, rB1); + ATL_vld(rB2, pB2); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + ATL_vmul(rC11, rA1, rB1); + ATL_vmul(rC21, rA2, rB1); + ATL_vmul(rC31, rA3, rB1); + ATL_pfl1R(pfA); + ATL_pfl1R(pfA+8); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmul(rC02, rA0, rB2); + ATL_pfl1R(pfB); + ATL_pfl1R(pfB+8); + ATL_vmul(rC12, rA1, rB2); + ATL_vmul(rC22, rA2, rB2); + ATL_vmul(rC32, rA3, rB2); + ATL_vld(rB2, pB2); + + ATL_vmul(rC03, rA0, rB3); + ATL_vld(rA0, pA0); + ATL_vmul(rC13, rA1, rB3); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC23, rA2, rB3); + ATL_vld(rA2, pA2); + ATL_vmul(rC33, rA3, rB3); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + pfA += 16; pfB += 16; +/* + * Stop one iteration early to drain preload pipline of A/B + */ + for (k=(ATL_VLEN<<1); k < ATL_MM_KB; k += ATL_VLEN) + { + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vld(rB0, pB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + ATL_vld(rB2, pB2); + + + ATL_vmac(rC03, rA0, rB3); + ATL_vld(rA0, pA0); + ATL_vmac(rC13, rA1, rB3); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmac(rC23, rA2, rB3); + ATL_vld(rA2, pA2); + ATL_vmac(rC33, rA3, rB3); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + } /* end K-loop */ +/* + * Last iteration peeled out to drain preload pipeline + */ + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB3, pB2+ldb); + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + + ATL_vmac(rC03, rA0, rB3); + ATL_vmac(rC13, rA1, rB3); + ATL_vmac(rC23, rA2, rB3); + ATL_vmac(rC33, rA3, rB3); + + #if ATL_VLEN == 2 + ATL_vvrsum2(rC00, rC10); + ATL_vvrsum2(rC20, rC30); + ATL_vvrsum2(rC01, rC11); + ATL_vvrsum2(rC21, rC31); + ATL_vvrsum2(rC02, rC12); + ATL_vvrsum2(rC22, rC32); + ATL_vvrsum2(rC03, rC13); + ATL_vvrsum2(rC23, rC33); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+4, rC20); + wrtC(pC0+ldc2, rC01); + wrtC(pC0+ldc2+4, rC21); + wrtC(pC2, rC02); + wrtC(pC2+4, rC22); + wrtC(pC2+ldc2, rC03); + wrtC(pC2+ldc2+4, rC23); + #else + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+2, rC20); + ATL_vbeta(pC0+ldc2, rC01); + ATL_vbeta(pC0+ldc2+2, rC21); + ATL_vbeta(pC2, rC02); + ATL_vbeta(pC2+2, rC22); + ATL_vbeta(pC2+ldc2, rC03); + ATL_vbeta(pC2+ldc2+2, rC23); + #endif + #elif ATL_VLEN == 4 + ATL_vvrsum4(rC00, rC10, rC20, rC30); + ATL_vvrsum4(rC01, rC11, rC21, rC31); + ATL_vvrsum4(rC02, rC12, rC22, rC32); + ATL_vvrsum4(rC03, rC13, rC23, rC33); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+ldc2, rC01); + wrtC(pC2, rC02); + wrtC(pC2+ldc2, rC03); + #else /* real */ + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+ldc2, rC01); + ATL_vbeta(pC2, rC02); + ATL_vbeta(pC2+ldc2, rC03); + #endif + #elif ATL_VLEN == 8 && defined(ATL_AVX) && defined(ATL_SSE1) \ + && defined(SCPLX) + { + #ifndef BETA0 + ATL_vzero(rB0); + #endif + #ifdef BETA0 + ATL_vvrsum8(rC00,rC10,rC20,rC30, rC01,rC11,rC21,rC31); + wrtC(pC0, rC00); + #else + ATL_vvrsum8(rC00,rC10, rC01, rC11, rC20,rC30, rC21,rC31); + // 7 6 5 4 3 2 1 0 + // {r31, r21, r30, r20, r11, r01, r10, r00} + ATL_vuld(rC30, pC0+ldc2); + // {i31, r31, i21, r21, i11, r11, i01, r01} + ATL_vuld(rC20, pC0); + // {i30, r30, i20, r20, i10, r10, i00, r00} + ATL_vunpckHI(rC10, rC00, rB0); + // { 0, r31, 0, r21, 0, r11, 0, r01} + ATL_vunpckLO(rC00, rC00, rB0); + // { 0, r30, 0, r20, 0, r10, 0, r00} + #ifdef BETAX + ATL_vmul(rC30, rC30, vBE); + ATL_vmul(rC20, rC20, vBE); + #endif + ATL_vadd(rC10, rC10, rC30); + ATL_vadd(rC00, rC00, rC20); + ATL_vust(pC0+ldc2, rC10); + ATL_vust(pC0, rC00); + #endif + ATL_vvrsum8(rC02,rC12,rC22,rC32, rC03,rC13,rC23,rC33); + wrtC(pC2, rC02); + } + #elif ATL_VLEN == 8 && defined(ATL_AVX) && defined(ATL_SSE1) \ + && defined(SREAL) + { + ATL_vvrsum8(rC00, rC10, rC20, rC30, rC01, rC11, rC21, rC31); + #ifndef BETA0 + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC0), 0); + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC0+ldc2), 1); + #ifdef BETAX + ATL_vmul(rC10, rC10, vBE); + #endif + ATL_vadd(rC00, rC00, rC10); + #endif + _mm_storeu_ps(pC0, _mm256_extractf128_ps(rC00, 0)); + _mm_storeu_ps(pC0+ldc2, _mm256_extractf128_ps(rC00, 1)); + ATL_vvrsum8(rC02, rC12, rC22, rC32, rC03, rC13, rC23, rC33); + #ifndef BETA0 + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC2), 0); + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC2+ldc2), 1); + #ifdef BETAX + ATL_vmul(rC10, rC10, vBE); + #endif + ATL_vadd(rC02, rC02, rC10); + #endif + _mm_storeu_ps(pC2, _mm256_extractf128_ps(rC02, 0)); + _mm_storeu_ps(pC2+ldc2, _mm256_extractf128_ps(rC02, 1)); + } + #else + #error "VLEN NOT SUPPORTED!" + #endif + pB0 = B; + pB2 = B + (ldb<<1); + pC0 += 4 SHIFT; + pC2 += 4 SHIFT; + A += incAm; + pA0 = A; + pA2 = pA0 + (lda<<1); + } /* end of loop over M */ + A = aa; + pA0 = A; + pA2 = A + (lda<<1); + C += incC; + pC0 = C; + B += incBn; + pB0 = B; + pC2 = C + (ldc2<<1); + pB2 = B + (ldb<<1); + } /* end of loop over N */ +} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm4x5xVL_simd.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm4x5xVL_simd.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm4x5xVL_simd.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm4x5xVL_simd.c 2016-07-28 19:43:21.000000000 +0000 @@ -0,0 +1,526 @@ +/* + * Automatically Tuned Linear Algebra Software v3.10.3 + * Copyright (C) 2016 R. Clint Whaley + */ +#if !defined(SREAL) && !defined(DREAL) && !defined(SCPLX) && !defined(DCPLX) + #define DREAL 1 +#endif +#include +#include "atlas_simd.h" +#include "atlas_prefetch.h" +#if defined(SCPLX) || defined(DCPLX) + #include "atlas_cplxsimd.h" + #ifndef TCPLX + #define TCPLX 1 + #endif + #define SHIFT <<1 +#else + #define SHIFT + #ifndef TCPLX + #define TREAL 1 + #endif +#endif +#ifndef TYPE + #if defined(SREAL) || defined(SCPLX) + #define TYPE float + #else + #define TYPE double + #endif +#endif +#ifndef ATL_MM_KB + #ifdef KB + #if KB > 0 + #define ATL_KBCONST 1 + #define ATL_MM_KB KB + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif +#else + #if ATL_MM_KB > 0 + #define ATL_KBCONST 1 + #else + #undef ATL_MM_KB + #define ATL_MM_KB K + #define ATL_KBCONST 0 + #endif +#endif +#ifdef BETA1 + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vadd(d_, d_, rA0); \ + ATL_vust(p_, d_); \ + } +#elif defined(BETA0) + #define ATL_vbeta(p_, d_) ATL_vust(p_, d_) +#else + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vmac(d_, rA0, vBE); \ + ATL_vust(p_, d_); \ + } +#endif + +#if ATL_VLEN == 8 + #ifdef BETA0 + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + ((p_)+ldc2)[ 0] = r1_[4]; \ + ((p_)+ldc2)[ 2] = r1_[5]; \ + ((p_)+ldc2)[ 4] = r1_[6]; \ + ((p_)+ldc2)[ 6] = r1_[7]; \ + } + #elif defined(BETA1) + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + ((p_)+ldc2)[ 0] += r1_[4]; \ + ((p_)+ldc2)[ 2] += r1_[5]; \ + ((p_)+ldc2)[ 4] += r1_[6]; \ + ((p_)+ldc2)[ 6] += r1_[7]; \ + } + #else + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + ((p_)+ldc2)[ 0] = beta*((p_)+ldc2)[ 0] + r1_[4]; \ + ((p_)+ldc2)[ 2] = beta*((p_)+ldc2)[ 2] + r1_[5]; \ + ((p_)+ldc2)[ 4] = beta*((p_)+ldc2)[ 4] + r1_[6]; \ + ((p_)+ldc2)[ 6] = beta*((p_)+ldc2)[ 6] + r1_[7]; \ + } + #endif +#elif ATL_VLEN == 4 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #endif + #ifndef BETA0 + #define vwrtC(p_, rc_, rt_, rz_) \ + { \ + ATL_vunpckHI(rt_, rc_, rz_); /* rt_={0, r3, 0, r2} */ \ + ATL_vunpckLO(rc_, rc_, rz_); /* rc_={0, r1, 0, r0} */ \ + ATL_vbeta(p_, rc_); \ + ATL_vbeta((p_)+4, rt_); \ + } + #endif +#elif ATL_VLEN == 2 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + } + #endif +#endif +#ifndef ATL_RESTRICT + #if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999) + #define ATL_RESTRICT restrict + #else + #define ATL_RESTRICT + #endif +#endif + +void ATL_USERMM + (const int M, const int N, const int K, const TYPE alpha, + const TYPE * ATL_RESTRICT A, const int lda, + const TYPE * ATL_RESTRICT B, const int ldb, const TYPE beta, + TYPE * ATL_RESTRICT C, const int ldc) +/* + * Performs a GEMM with M,N,K unrolling (& jam) of (8,6,VLEN). + * Vectorization of VLEN=[4,8] (d,s) along K dim, vec unroll=(8,6,1). + * You may set compile-time constant K dim by defining ATL_MM_KB. + */ +{ + const TYPE *pB0=B, *pB2=pB0+(ldb<<1), *pB4=pB2+(ldb<<1), *aa=A, *pA0=A, + *pA2=pA0+(lda<<1), *pfA=A+lda*M, *pfB=B+ldb*N; + const size_t ldc2 = ldc SHIFT; + TYPE *pC0=C, *pC2=pC0+(ldc2<<1), *pC4=pC2+(ldc2<<1); + int i, j, k; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_VTYPE vBE; + #elif ATL_VLEN == 8 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0, beta, 1.0, beta, 1.0}; + #elif ATL_VLEN == 4 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0}; + #endif + const size_t incAm = (lda<<2), incBn = ldb*5; + const size_t incC=ldc2*5; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_vbcast(vBE, &beta); + #endif + + for (j=0; j < N; j += 5) + { + for (i=0; i < M; i += 4) + { + register ATL_VTYPE rA0, rA1, rA2, rA3, rB0, rC00, rC10, rC20, rC30, + rB1, rC01, rC11, rC21, rC31, rB2, rC02, rC12, rC22, + rC32, rB3, rC03, rC13, rC23, rC33, rB4, rC04, rC14, + rC24, rC34; + /* Peel K=0 iteration to avoid zero of rCxx and extra add */ + ATL_vld(rB0, pB0); + ATL_vld(rA0, pA0); + ATL_vmul(rC00, rA0, rB0); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC10, rA1, rB0); + ATL_vld(rA2, pA2); + ATL_vmul(rC20, rA2, rB0); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC30, rA3, rB0); + + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vld(rB0, pB0); + ATL_vmul(rC01, rA0, rB1); + ATL_vld(rB2, pB2); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + ATL_vld(rB4, pB4); + pB4 += ATL_VLEN; + ATL_vmul(rC11, rA1, rB1); + ATL_vmul(rC21, rA2, rB1); + ATL_vmul(rC31, rA3, rB1); + ATL_pfl1R(pfA); + ATL_pfl1R(pfA+8); + ATL_pfl1R(pfA+16); + ATL_pfl1R(pfA+24); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmul(rC02, rA0, rB2); + ATL_pfl1R(pfB); + ATL_pfl1R(pfB+8); + ATL_vmul(rC12, rA1, rB2); + ATL_pfl1R(pfB+16); + ATL_pfl1R(pfB+24); + ATL_vmul(rC22, rA2, rB2); + ATL_vmul(rC32, rA3, rB2); + ATL_vld(rB2, pB2); + + ATL_vmul(rC03, rA0, rB3); + ATL_vmul(rC13, rA1, rB3); + ATL_vmul(rC23, rA2, rB3); + ATL_vmul(rC33, rA3, rB3); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + + ATL_vmul(rC04, rA0, rB4); + ATL_vld(rA0, pA0); + ATL_vmul(rC14, rA1, rB4); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC24, rA2, rB4); + ATL_vld(rA2, pA2); + ATL_vmul(rC34, rA3, rB4); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + pfA += 20; pfB += 20; +/* + * Stop one iteration early to drain preload pipline of A/B + */ + for (k=(ATL_VLEN<<1); k < ATL_MM_KB; k += ATL_VLEN) + { + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB4, pB4); pB4 += ATL_VLEN; + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vld(rB0, pB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + ATL_vld(rB2, pB2); + + ATL_vmac(rC03, rA0, rB3); + ATL_vmac(rC13, rA1, rB3); + ATL_vmac(rC23, rA2, rB3); + ATL_vmac(rC33, rA3, rB3); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + + + ATL_vmac(rC04, rA0, rB4); + ATL_vld(rA0, pA0); + ATL_vmac(rC14, rA1, rB4); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmac(rC24, rA2, rB4); + ATL_vld(rA2, pA2); + ATL_vmac(rC34, rA3, rB4); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + } /* end K-loop */ +/* + * Last iteration peeled out to drain preload pipeline + */ + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB4, pB4); + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + + ATL_vmac(rC03, rA0, rB3); + ATL_vmac(rC13, rA1, rB3); + ATL_vmac(rC23, rA2, rB3); + ATL_vmac(rC33, rA3, rB3); + + ATL_vmac(rC04, rA0, rB4); + ATL_vmac(rC14, rA1, rB4); + ATL_vmac(rC24, rA2, rB4); + ATL_vmac(rC34, rA3, rB4); + + #if ATL_VLEN == 2 + ATL_vvrsum2(rC00, rC10); + ATL_vvrsum2(rC20, rC30); + ATL_vvrsum2(rC01, rC11); + ATL_vvrsum2(rC21, rC31); + ATL_vvrsum2(rC02, rC12); + ATL_vvrsum2(rC22, rC32); + ATL_vvrsum2(rC03, rC13); + ATL_vvrsum2(rC23, rC33); + ATL_vvrsum2(rC04, rC14); + ATL_vvrsum2(rC24, rC34); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+4, rC20); + wrtC(pC0+ldc2, rC01); + wrtC(pC0+ldc2+4, rC21); + wrtC(pC2, rC02); + wrtC(pC2+4, rC22); + wrtC(pC2+ldc2, rC03); + wrtC(pC2+ldc2+4, rC23); + wrtC(pC4, rC04); + wrtC(pC4+4, rC24); + #else + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+2, rC20); + ATL_vbeta(pC0+ldc2, rC01); + ATL_vbeta(pC0+ldc2+2, rC21); + ATL_vbeta(pC2, rC02); + ATL_vbeta(pC2+2, rC22); + ATL_vbeta(pC2+ldc2, rC03); + ATL_vbeta(pC2+ldc2+2, rC23); + ATL_vbeta(pC4, rC04); + ATL_vbeta(pC4+2, rC24); + #endif + #elif ATL_VLEN == 4 + ATL_vvrsum4(rC00, rC10, rC20, rC30); + ATL_vvrsum4(rC01, rC11, rC21, rC31); + ATL_vvrsum4(rC02, rC12, rC22, rC32); + ATL_vvrsum4(rC03, rC13, rC23, rC33); + ATL_vvrsum4(rC04, rC14, rC24, rC34); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+ldc2, rC01); + wrtC(pC2, rC02); + wrtC(pC2+ldc2, rC03); + wrtC(pC4, rC04); + #else /* real */ + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+ldc2, rC01); + ATL_vbeta(pC2, rC02); + ATL_vbeta(pC2+ldc2, rC03); + ATL_vbeta(pC4, rC04); + #endif + #elif ATL_VLEN == 8 && defined(ATL_AVX) && defined(ATL_SSE1) \ + && defined(SCPLX) + { + #ifndef BETA0 + ATL_vzero(rB0); + #endif + #ifdef BETA0 + ATL_vvrsum8(rC00,rC10,rC20,rC30, rC01,rC11,rC21,rC31); + wrtC(pC0, rC00); + #else + ATL_vvrsum8(rC00,rC10, rC01, rC11, rC20,rC30, rC21,rC31); + // 7 6 5 4 3 2 1 0 + // {r31, r21, r30, r20, r11, r01, r10, r00} + ATL_vuld(rC30, pC0+ldc2); + // {i31, r31, i21, r21, i11, r11, i01, r01} + ATL_vuld(rC20, pC0); + // {i30, r30, i20, r20, i10, r10, i00, r00} + ATL_vunpckHI(rC10, rC00, rB0); + // { 0, r31, 0, r21, 0, r11, 0, r01} + ATL_vunpckLO(rC00, rC00, rB0); + // { 0, r30, 0, r20, 0, r10, 0, r00} + #ifdef BETAX + ATL_vmul(rC30, rC30, vBE); + ATL_vmul(rC20, rC20, vBE); + #endif + ATL_vadd(rC10, rC10, rC30); + ATL_vadd(rC00, rC00, rC20); + ATL_vust(pC0+ldc2, rC10); + ATL_vust(pC0, rC00); + #endif + #ifdef BETA0 + ATL_vvrsum8(rC02,rC12,rC22,rC32, rC03,rC13,rC23,rC33); + wrtC(pC2, rC02); + #else + ATL_vvrsum8(rC02,rC12, rC03, rC13, rC22,rC32, rC23,rC33); + // 7 6 5 4 3 2 1 0 + // {r31, r21, r30, r20, r11, r01, r10, r00} + ATL_vuld(rC30, pC2+ldc2); + // {i31, r31, i21, r21, i11, r11, i01, r01} + ATL_vuld(rC20, pC2); + // {i30, r30, i20, r20, i10, r10, i00, r00} + ATL_vunpckHI(rC10, rC02, rB0); + // { 0, r31, 0, r21, 0, r11, 0, r01} + ATL_vunpckLO(rC00, rC02, rB0); + // { 0, r30, 0, r20, 0, r10, 0, r00} + #ifdef BETAX + ATL_vmul(rC30, rC30, vBE); + ATL_vmul(rC20, rC20, vBE); + #endif + ATL_vadd(rC10, rC10, rC30); + ATL_vadd(rC00, rC00, rC20); + ATL_vust(pC2+ldc2, rC10); + ATL_vust(pC2, rC00); + #endif + ATL_vvrsum4(rC04,rC14,rC24,rC34); + wrtC0(pC4, rC04); + } + #elif ATL_VLEN == 8 && defined(ATL_AVX) && defined(ATL_SSE1) \ + && defined(SREAL) + { + ATL_vvrsum8(rC00, rC10, rC20, rC30, rC01, rC11, rC21, rC31); + #ifndef BETA0 + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC0), 0); + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC0+ldc2), 1); + #ifdef BETAX + ATL_vmul(rC10, rC10, vBE); + #endif + ATL_vadd(rC00, rC00, rC10); + #endif + _mm_storeu_ps(pC0, _mm256_extractf128_ps(rC00, 0)); + _mm_storeu_ps(pC0+ldc2, _mm256_extractf128_ps(rC00, 1)); + ATL_vvrsum8(rC02, rC12, rC22, rC32, rC03, rC13, rC23, rC33); + #ifndef BETA0 + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC2), 0); + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC2+ldc2), 1); + #ifdef BETAX + ATL_vmul(rC10, rC10, vBE); + #endif + ATL_vadd(rC02, rC02, rC10); + #endif + _mm_storeu_ps(pC2, _mm256_extractf128_ps(rC02, 0)); + _mm_storeu_ps(pC2+ldc2, _mm256_extractf128_ps(rC02, 1)); + { + __m128 c0, c1, cBE; + ATL_vvrsum4(rC04, rC14, rC24, rC34) + c0 = _mm256_extractf128_ps(rC04, 0); + #ifndef BETA0 + c1 = _mm_loadu_ps(pC4); + #ifdef BETAX + cBE =_mm256_extractf128_ps(vBE, 0); + c1 = _mm_mul_ps(c1, cBE); + #endif + c0 = _mm_add_ps(c0, c1); + #endif + _mm_storeu_ps(pC4, c0); + } + } + #else + #error "VLEN NOT SUPPORTED!" + #endif + pB0 = B; + pB2 = B + (ldb<<1); + pC0 += 4 SHIFT; + pC2 += 4 SHIFT; + pC4 += 4 SHIFT; + pB4 = pB2 + (ldb<<1); + A += incAm; + pA0 = A; + pA2 = pA0 + (lda<<1); + } /* end of loop over M */ + A = aa; + pA0 = A; + pA2 = A + (lda<<1); + C += incC; + pC0 = C; + B += incBn; + pB0 = B; + pC2 = C + (ldc2<<1); + pB2 = B + (ldb<<1); + pB4 = pB2 + (ldb<<1); + pC4 = pC2 + (ldc2<<1); + } /* end of loop over N */ +} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm4x6xVL_simd.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm4x6xVL_simd.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm4x6xVL_simd.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm4x6xVL_simd.c 2016-07-28 19:43:21.000000000 +0000 @@ -0,0 +1,549 @@ +/* + * Automatically Tuned Linear Algebra Software v3.10.3 + * Copyright (C) 2016 R. Clint Whaley + */ +#if !defined(SREAL) && !defined(DREAL) && !defined(SCPLX) && !defined(DCPLX) + #define DREAL 1 +#endif +#include +#include "atlas_simd.h" +#include "atlas_prefetch.h" +#if defined(SCPLX) || defined(DCPLX) + #include "atlas_cplxsimd.h" + #ifndef TCPLX + #define TCPLX 1 + #endif + #define SHIFT <<1 +#else + #define SHIFT + #ifndef TCPLX + #define TREAL 1 + #endif +#endif +#ifndef TYPE + #if defined(SREAL) || defined(SCPLX) + #define TYPE float + #else + #define TYPE double + #endif +#endif +#ifndef ATL_MM_KB + #ifdef KB + #if KB > 0 + #define ATL_KBCONST 1 + #define ATL_MM_KB KB + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif +#else + #if ATL_MM_KB > 0 + #define ATL_KBCONST 1 + #else + #undef ATL_MM_KB + #define ATL_MM_KB K + #define ATL_KBCONST 0 + #endif +#endif +#ifdef BETA1 + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vadd(d_, d_, rA0); \ + ATL_vust(p_, d_); \ + } +#elif defined(BETA0) + #define ATL_vbeta(p_, d_) ATL_vust(p_, d_) +#else + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vmac(d_, rA0, vBE); \ + ATL_vust(p_, d_); \ + } +#endif + +#if ATL_VLEN == 8 + #ifdef BETA0 + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + ((p_)+ldc2)[ 0] = r1_[4]; \ + ((p_)+ldc2)[ 2] = r1_[5]; \ + ((p_)+ldc2)[ 4] = r1_[6]; \ + ((p_)+ldc2)[ 6] = r1_[7]; \ + } + #elif defined(BETA1) + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + ((p_)+ldc2)[ 0] += r1_[4]; \ + ((p_)+ldc2)[ 2] += r1_[5]; \ + ((p_)+ldc2)[ 4] += r1_[6]; \ + ((p_)+ldc2)[ 6] += r1_[7]; \ + } + #else + #define wrtC0(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + ((p_)+ldc2)[ 0] = beta*((p_)+ldc2)[ 0] + r1_[4]; \ + ((p_)+ldc2)[ 2] = beta*((p_)+ldc2)[ 2] + r1_[5]; \ + ((p_)+ldc2)[ 4] = beta*((p_)+ldc2)[ 4] + r1_[6]; \ + ((p_)+ldc2)[ 6] = beta*((p_)+ldc2)[ 6] + r1_[7]; \ + } + #endif +#elif ATL_VLEN == 4 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #endif + #ifndef BETA0 + #define vwrtC(p_, rc_, rt_, rz_) \ + { \ + ATL_vunpckHI(rt_, rc_, rz_); /* rt_={0, r3, 0, r2} */ \ + ATL_vunpckLO(rc_, rc_, rz_); /* rc_={0, r1, 0, r0} */ \ + ATL_vbeta(p_, rc_); \ + ATL_vbeta((p_)+4, rt_); \ + } + #endif +#elif ATL_VLEN == 2 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + } + #endif +#endif +#ifndef ATL_RESTRICT + #if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999) + #define ATL_RESTRICT restrict + #else + #define ATL_RESTRICT + #endif +#endif + +void ATL_USERMM + (const int M, const int N, const int K, const TYPE alpha, + const TYPE * ATL_RESTRICT A, const int lda, + const TYPE * ATL_RESTRICT B, const int ldb, const TYPE beta, + TYPE * ATL_RESTRICT C, const int ldc) +/* + * Performs a GEMM with M,N,K unrolling (& jam) of (8,6,VLEN). + * Vectorization of VLEN=[4,8] (d,s) along K dim, vec unroll=(8,6,1). + * You may set compile-time constant K dim by defining ATL_MM_KB. + */ +{ + const TYPE *pB0=B, *pB2=pB0+(ldb<<1), *pB4=pB2+(ldb<<1), *aa=A, *pA0=A, + *pA2=pA0+(lda<<1), *pfA=A+lda*M, *pfB=B+ldb*N; + const size_t ldc2 = ldc SHIFT; + TYPE *pC0=C, *pC2=pC0+(ldc2<<1), *pC4=pC2+(ldc2<<1); + int i, j, k; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_VTYPE vBE; + #elif ATL_VLEN == 8 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0, beta, 1.0, beta, 1.0}; + #elif ATL_VLEN == 4 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0}; + #endif + const size_t incAm = (lda<<2), incBn = ldb*6; + const size_t incC=ldc2*6; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_vbcast(vBE, &beta); + #endif + + for (j=0; j < N; j += 6) + { + for (i=0; i < M; i += 4) + { + register ATL_VTYPE rA0, rA1, rA2, rA3, rB0, rC00, rC10, rC20, rC30, + rB1, rC01, rC11, rC21, rC31, rB2, rC02, rC12, rC22, + rC32, rB3, rC03, rC13, rC23, rC33, rB4, rC04, rC14, + rC24, rC34, rB5, rC05, rC15, rC25, rC35; + /* Peel K=0 iteration to avoid zero of rCxx and extra add */ + ATL_vld(rB0, pB0); + ATL_vld(rA0, pA0); + ATL_vmul(rC00, rA0, rB0); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC10, rA1, rB0); + ATL_vld(rA2, pA2); + ATL_vmul(rC20, rA2, rB0); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC30, rA3, rB0); + + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vld(rB0, pB0); + ATL_vmul(rC01, rA0, rB1); + ATL_vld(rB2, pB2); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + ATL_vld(rB4, pB4); + ATL_vmul(rC11, rA1, rB1); + ATL_vmul(rC21, rA2, rB1); + ATL_vld(rB5, pB4+ldb); pB4 += ATL_VLEN; + ATL_vmul(rC31, rA3, rB1); + ATL_pfl1R(pfA); + ATL_pfl1R(pfA+8); + ATL_pfl1R(pfA+16); + ATL_pfl1R(pfA+24); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmul(rC02, rA0, rB2); + ATL_pfl1R(pfB); + ATL_pfl1R(pfB+8); + ATL_vmul(rC12, rA1, rB2); + ATL_pfl1R(pfB+16); + ATL_pfl1R(pfB+24); + ATL_vmul(rC22, rA2, rB2); + ATL_vmul(rC32, rA3, rB2); + ATL_vld(rB2, pB2); + + ATL_vmul(rC03, rA0, rB3); + ATL_vmul(rC13, rA1, rB3); + ATL_vmul(rC23, rA2, rB3); + ATL_vmul(rC33, rA3, rB3); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + + ATL_vmul(rC04, rA0, rB4); + ATL_vmul(rC14, rA1, rB4); + ATL_vmul(rC24, rA2, rB4); + ATL_vmul(rC34, rA3, rB4); + ATL_vld(rB4, pB4); + + ATL_vmul(rC05, rA0, rB5); + ATL_vld(rA0, pA0); + ATL_vmul(rC15, rA1, rB5); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC25, rA2, rB5); + ATL_vld(rA2, pA2); + ATL_vmul(rC35, rA3, rB5); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + pfA += 24; pfB += 24; +/* + * Stop one iteration early to drain preload pipline of A/B + */ + for (k=(ATL_VLEN<<1); k < ATL_MM_KB; k += ATL_VLEN) + { + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB5, pB4+ldb); pB4 += ATL_VLEN; + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vld(rB0, pB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + ATL_vld(rB2, pB2); + + ATL_vmac(rC03, rA0, rB3); + ATL_vmac(rC13, rA1, rB3); + ATL_vmac(rC23, rA2, rB3); + ATL_vmac(rC33, rA3, rB3); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + + ATL_vmac(rC04, rA0, rB4); + ATL_vmac(rC14, rA1, rB4); + ATL_vmac(rC24, rA2, rB4); + ATL_vmac(rC34, rA3, rB4); + ATL_vld(rB4, pB4); + + + ATL_vmac(rC05, rA0, rB5); + ATL_vld(rA0, pA0); + ATL_vmac(rC15, rA1, rB5); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmac(rC25, rA2, rB5); + ATL_vld(rA2, pA2); + ATL_vmac(rC35, rA3, rB5); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + } /* end K-loop */ +/* + * Last iteration peeled out to drain preload pipeline + */ + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB5, pB4+ldb); + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + + ATL_vmac(rC03, rA0, rB3); + ATL_vmac(rC13, rA1, rB3); + ATL_vmac(rC23, rA2, rB3); + ATL_vmac(rC33, rA3, rB3); + + ATL_vmac(rC04, rA0, rB4); + ATL_vmac(rC14, rA1, rB4); + ATL_vmac(rC24, rA2, rB4); + ATL_vmac(rC34, rA3, rB4); + + ATL_vmac(rC05, rA0, rB5); + ATL_vmac(rC15, rA1, rB5); + ATL_vmac(rC25, rA2, rB5); + ATL_vmac(rC35, rA3, rB5); + + #if ATL_VLEN == 2 + ATL_vvrsum2(rC00, rC10); + ATL_vvrsum2(rC20, rC30); + ATL_vvrsum2(rC01, rC11); + ATL_vvrsum2(rC21, rC31); + ATL_vvrsum2(rC02, rC12); + ATL_vvrsum2(rC22, rC32); + ATL_vvrsum2(rC03, rC13); + ATL_vvrsum2(rC23, rC33); + ATL_vvrsum2(rC04, rC14); + ATL_vvrsum2(rC24, rC34); + ATL_vvrsum2(rC05, rC15); + ATL_vvrsum2(rC25, rC35); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+4, rC20); + wrtC(pC0+ldc2, rC01); + wrtC(pC0+ldc2+4, rC21); + wrtC(pC2, rC02); + wrtC(pC2+4, rC22); + wrtC(pC2+ldc2, rC03); + wrtC(pC2+ldc2+4, rC23); + wrtC(pC4, rC04); + wrtC(pC4+4, rC24); + wrtC(pC4+ldc2, rC05); + wrtC(pC4+ldc2+4, rC25); + #else + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+2, rC20); + ATL_vbeta(pC0+ldc2, rC01); + ATL_vbeta(pC0+ldc2+2, rC21); + ATL_vbeta(pC2, rC02); + ATL_vbeta(pC2+2, rC22); + ATL_vbeta(pC2+ldc2, rC03); + ATL_vbeta(pC2+ldc2+2, rC23); + ATL_vbeta(pC4, rC04); + ATL_vbeta(pC4+2, rC24); + ATL_vbeta(pC4+ldc2, rC05); + ATL_vbeta(pC4+ldc2+2, rC25); + #endif + #elif ATL_VLEN == 4 + ATL_vvrsum4(rC00, rC10, rC20, rC30); + ATL_vvrsum4(rC01, rC11, rC21, rC31); + ATL_vvrsum4(rC02, rC12, rC22, rC32); + ATL_vvrsum4(rC03, rC13, rC23, rC33); + ATL_vvrsum4(rC04, rC14, rC24, rC34); + ATL_vvrsum4(rC05, rC15, rC25, rC35); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+ldc2, rC01); + wrtC(pC2, rC02); + wrtC(pC2+ldc2, rC03); + wrtC(pC4, rC04); + wrtC(pC4+ldc2, rC05); + #else /* real */ + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+ldc2, rC01); + ATL_vbeta(pC2, rC02); + ATL_vbeta(pC2+ldc2, rC03); + ATL_vbeta(pC4, rC04); + ATL_vbeta(pC4+ldc2, rC05); + #endif + #elif ATL_VLEN == 8 && defined(ATL_AVX) && defined(ATL_SSE1) \ + && defined(SCPLX) + { + #ifndef BETA0 + ATL_vzero(rB0); + #endif + #ifdef BETA0 + ATL_vvrsum8(rC00,rC10,rC20,rC30, rC01,rC11,rC21,rC31); + wrtC(pC0, rC00); + #else + ATL_vvrsum8(rC00,rC10, rC01, rC11, rC20,rC30, rC21,rC31); + // 7 6 5 4 3 2 1 0 + // {r31, r21, r30, r20, r11, r01, r10, r00} + ATL_vuld(rC30, pC0+ldc2); + // {i31, r31, i21, r21, i11, r11, i01, r01} + ATL_vuld(rC20, pC0); + // {i30, r30, i20, r20, i10, r10, i00, r00} + ATL_vunpckHI(rC10, rC00, rB0); + // { 0, r31, 0, r21, 0, r11, 0, r01} + ATL_vunpckLO(rC00, rC00, rB0); + // { 0, r30, 0, r20, 0, r10, 0, r00} + #ifdef BETAX + ATL_vmul(rC30, rC30, vBE); + ATL_vmul(rC20, rC20, vBE); + #endif + ATL_vadd(rC10, rC10, rC30); + ATL_vadd(rC00, rC00, rC20); + ATL_vust(pC0+ldc2, rC10); + ATL_vust(pC0, rC00); + #endif + #ifdef BETA0 + ATL_vvrsum8(rC02,rC12,rC22,rC32, rC03,rC13,rC23,rC33); + wrtC(pC2, rC02); + #else + ATL_vvrsum8(rC02,rC12, rC03, rC13, rC22,rC32, rC23,rC33); + // 7 6 5 4 3 2 1 0 + // {r31, r21, r30, r20, r11, r01, r10, r00} + ATL_vuld(rC30, pC2+ldc2); + // {i31, r31, i21, r21, i11, r11, i01, r01} + ATL_vuld(rC20, pC2); + // {i30, r30, i20, r20, i10, r10, i00, r00} + ATL_vunpckHI(rC10, rC02, rB0); + // { 0, r31, 0, r21, 0, r11, 0, r01} + ATL_vunpckLO(rC00, rC02, rB0); + // { 0, r30, 0, r20, 0, r10, 0, r00} + #ifdef BETAX + ATL_vmul(rC30, rC30, vBE); + ATL_vmul(rC20, rC20, vBE); + #endif + ATL_vadd(rC10, rC10, rC30); + ATL_vadd(rC00, rC00, rC20); + ATL_vust(pC2+ldc2, rC10); + ATL_vust(pC2, rC00); + #endif + ATL_vvrsum8(rC04,rC14,rC24,rC34, rC05,rC15,rC25,rC35); + wrtC(pC4, rC04); + } + #elif ATL_VLEN == 8 && defined(ATL_AVX) && defined(ATL_SSE1) \ + && defined(SREAL) + { + ATL_vvrsum8(rC00, rC10, rC20, rC30, rC01, rC11, rC21, rC31); + #ifndef BETA0 + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC0), 0); + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC0+ldc2), 1); + #ifdef BETAX + ATL_vmul(rC10, rC10, vBE); + #endif + ATL_vadd(rC00, rC00, rC10); + #endif + _mm_storeu_ps(pC0, _mm256_extractf128_ps(rC00, 0)); + _mm_storeu_ps(pC0+ldc2, _mm256_extractf128_ps(rC00, 1)); + ATL_vvrsum8(rC02, rC12, rC22, rC32, rC03, rC13, rC23, rC33); + #ifndef BETA0 + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC2), 0); + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC2+ldc2), 1); + #ifdef BETAX + ATL_vmul(rC10, rC10, vBE); + #endif + ATL_vadd(rC02, rC02, rC10); + #endif + _mm_storeu_ps(pC2, _mm256_extractf128_ps(rC02, 0)); + _mm_storeu_ps(pC2+ldc2, _mm256_extractf128_ps(rC02, 1)); + ATL_vvrsum8(rC04, rC14, rC24, rC34, rC05, rC15, rC25, rC35); + #ifndef BETA0 + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC4), 0); + rC10 = _mm256_insertf128_ps(rC10, _mm_loadu_ps(pC4+ldc2), 1); + #ifdef BETAX + ATL_vmul(rC10, rC10, vBE); + #endif + ATL_vadd(rC04, rC04, rC10); + #endif + _mm_storeu_ps(pC4, _mm256_extractf128_ps(rC04, 0)); + _mm_storeu_ps(pC4+ldc2, _mm256_extractf128_ps(rC04, 1)); + } + #else + #error "VLEN NOT SUPPORTED!" + #endif + pB0 = B; + pB2 = B + (ldb<<1); + pC0 += 4 SHIFT; + pC2 += 4 SHIFT; + pC4 += 4 SHIFT; + pB4 = pB2 + (ldb<<1); + A += incAm; + pA0 = A; + pA2 = pA0 + (lda<<1); + } /* end of loop over M */ + A = aa; + pA0 = A; + pA2 = A + (lda<<1); + C += incC; + pC0 = C; + B += incBn; + pB0 = B; + pC2 = C + (ldc2<<1); + pB2 = B + (ldb<<1); + pB4 = pB2 + (ldb<<1); + pC4 = pC2 + (ldc2<<1); + } /* end of loop over N */ +} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm8x2xVL_simd.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm8x2xVL_simd.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm8x2xVL_simd.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm8x2xVL_simd.c 2016-07-28 19:43:21.000000000 +0000 @@ -0,0 +1,354 @@ +/* + * Automatically Tuned Linear Algebra Software v3.10.3 + * Copyright (C) 2016 R. Clint Whaley + */ +#if !defined(SREAL) && !defined(DREAL) && !defined(SCPLX) && !defined(DCPLX) + #define DREAL 1 +#endif +#include +#include "atlas_simd.h" +#include "atlas_prefetch.h" +#if defined(SCPLX) || defined(DCPLX) + #include "atlas_cplxsimd.h" + #ifndef TCPLX + #define TCPLX 1 + #endif + #define SHIFT <<1 +#else + #define SHIFT + #ifndef TCPLX + #define TREAL 1 + #endif +#endif +#ifndef TYPE + #if defined(SREAL) || defined(SCPLX) + #define TYPE float + #else + #define TYPE double + #endif +#endif +#ifndef ATL_MM_KB + #ifdef KB + #if KB > 0 + #define ATL_KBCONST 1 + #define ATL_MM_KB KB + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif +#else + #if ATL_MM_KB > 0 + #define ATL_KBCONST 1 + #else + #undef ATL_MM_KB + #define ATL_MM_KB K + #define ATL_KBCONST 0 + #endif +#endif +#ifdef BETA1 + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vadd(d_, d_, rA0); \ + ATL_vust(p_, d_); \ + } +#elif defined(BETA0) + #define ATL_vbeta(p_, d_) ATL_vust(p_, d_) +#else + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vmac(d_, rA0, vBE); \ + ATL_vust(p_, d_); \ + } +#endif + +#if defined (TCPLX) && ATL_VLEN == 4 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #endif + #ifndef BETA0 + #define vwrtC(p_, rc_, rt_, rz_) \ + { \ + ATL_vunpckHI(rt_, rc_, rz_); /* rt_={0, r3, 0, r2} */ \ + ATL_vunpckLO(rc_, rc_, rz_); /* rc_={0, r1, 0, r0} */ \ + ATL_vbeta(p_, rc_); \ + ATL_vbeta((p_)+4, rt_); \ + } + #endif +#elif defined (TCPLX) && ATL_VLEN == 2 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + } + #endif +#endif +#ifndef ATL_RESTRICT + #if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999) + #define ATL_RESTRICT restrict + #else + #define ATL_RESTRICT + #endif +#endif +void ATL_USERMM + (const int M, const int N, const int K, const TYPE alpha, + const TYPE * ATL_RESTRICT A, const int lda, + const TYPE * ATL_RESTRICT B, const int ldb, const TYPE beta, + TYPE * ATL_RESTRICT C, const int ldc) +/* + * Performs a GEMM with M,N,K unrolling (& jam) of (8,6,VLEN). + * Vectorization of VLEN=[4,8] (d,s) along K dim, vec unroll=(8,6,1). + * You may set compile-time constant K dim by defining ATL_MM_KB. + */ +{ + const TYPE *pB0=B, *aa=A, *pA0=A, *pA2=pA0+(lda<<1), *pA4=pA2+(lda<<1), + *pA6=pA4+(lda<<1), *pfA=A+lda*M, *pfB=B+ldb*N; + const size_t ldc2 = ldc SHIFT; + TYPE *pC0=C; + int i, j, k; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_VTYPE vBE; + #elif ATL_VLEN == 4 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0}; + #endif + #if ATL_KBCONST == 0 + const size_t incAm = (lda<<3), incBn = ldb*2; + #else + #define incAm (ATL_MM_KB<<3) + #define incBn (2*ATL_MM_KB) + #endif + const size_t incC=ldc2*2; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_vbcast(vBE, &beta); + #endif + + for (j=0; j < N; j += 2) + { + for (i=0; i < M; i += 8) + { + register ATL_VTYPE rA0, rA1, rA2, rA3, rA4, rA5, rA6, rA7, rB0, rC00, + rC10, rC20, rC30, rC40, rC50, rC60, rC70, rB1, rC01, + rC11, rC21, rC31, rC41, rC51, rC61, rC71; + /* Peel K=0 iteration to avoid zero of rCxx and extra add */ + ATL_vld(rB0, pB0); + ATL_vld(rA0, pA0); + ATL_vmul(rC00, rA0, rB0); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC10, rA1, rB0); + ATL_vld(rA2, pA2); + ATL_vmul(rC20, rA2, rB0); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC30, rA3, rB0); + ATL_vld(rA4, pA4); + ATL_vmul(rC40, rA4, rB0); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmul(rC50, rA5, rB0); + ATL_vld(rA6, pA6); + ATL_vmul(rC60, rA6, rB0); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + ATL_vmul(rC70, rA7, rB0); + + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vld(rB0, pB0); + ATL_vmul(rC01, rA0, rB1); + ATL_vld(rA0, pA0); + ATL_vmul(rC11, rA1, rB1); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC21, rA2, rB1); + ATL_vld(rA2, pA2); + ATL_vmul(rC31, rA3, rB1); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC41, rA4, rB1); + ATL_vld(rA4, pA4); + ATL_vmul(rC51, rA5, rB1); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_pfl1R(pfA); + ATL_pfl1R(pfA+8); + ATL_vmul(rC61, rA6, rB1); + ATL_vld(rA6, pA6); + ATL_vmul(rC71, rA7, rB1); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + + pfA += 16; pfB += 16; +/* + * Stop one iteration early to drain preload pipline of A/B + */ + for (k=(ATL_VLEN<<1); k < ATL_MM_KB; k += ATL_VLEN) + { + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vmac(rC40, rA4, rB0); + ATL_vmac(rC50, rA5, rB0); + ATL_vmac(rC60, rA6, rB0); + ATL_vmac(rC70, rA7, rB0); + ATL_vld(rB0, pB0); + + + ATL_vmac(rC01, rA0, rB1); + ATL_vld(rA0, pA0); + ATL_vmac(rC11, rA1, rB1); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmac(rC21, rA2, rB1); + ATL_vld(rA2, pA2); + ATL_vmac(rC31, rA3, rB1); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmac(rC41, rA4, rB1); + ATL_vld(rA4, pA4); + ATL_vmac(rC51, rA5, rB1); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmac(rC61, rA6, rB1); + ATL_vld(rA6, pA6); + ATL_vmac(rC71, rA7, rB1); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + } /* end K-loop */ +/* + * Last iteration peeled out to drain preload pipeline + */ + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB1, pB0+ldb); + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vmac(rC40, rA4, rB0); + ATL_vmac(rC50, rA5, rB0); + ATL_vmac(rC60, rA6, rB0); + ATL_vmac(rC70, rA7, rB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vmac(rC41, rA4, rB1); + ATL_vmac(rC51, rA5, rB1); + ATL_vmac(rC61, rA6, rB1); + ATL_vmac(rC71, rA7, rB1); + + #if ATL_VLEN == 2 + ATL_vvrsum2(rC00, rC10); + ATL_vvrsum2(rC20, rC30); + ATL_vvrsum2(rC40, rC50); + ATL_vvrsum2(rC60, rC70); + ATL_vvrsum2(rC01, rC11); + ATL_vvrsum2(rC21, rC31); + ATL_vvrsum2(rC41, rC51); + ATL_vvrsum2(rC61, rC71); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+4, rC20); + wrtC(pC0+8, rC40); + wrtC(pC0+12, rC60); + pC0 += ldc2; + wrtC(pC0, rC01); + wrtC(pC0+4, rC21); + wrtC(pC0+8, rC41); + wrtC(pC0+12, rC61); + #else + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+2, rC20); + ATL_vbeta(pC0+4, rC40); + ATL_vbeta(pC0+6, rC60); + pC0 += ldc2; + ATL_vbeta(pC0, rC01); + ATL_vbeta(pC0+2, rC21); + ATL_vbeta(pC0+4, rC41); + ATL_vbeta(pC0+6, rC61); + #endif + #elif ATL_VLEN == 4 + #if defined(TCPLX) && !defined(BETA0) // want permuted order + ATL_vvrsum4(rC00, rC20, rC10, rC30); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC40, rC50, rC60, rC70); + ATL_vvrsum4(rC01, rC21, rC11, rC31); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC41, rC51, rC61, rC71); + #else // want natural order + ATL_vvrsum4(rC00, rC10, rC20, rC30); + ATL_vvrsum4(rC40, rC50, rC60, rC70); + ATL_vvrsum4(rC01, rC11, rC21, rC31); + ATL_vvrsum4(rC41, rC51, rC61, rC71); + #endif + #ifdef TCPLX + #ifndef BETA0 + ATL_vzero(rB0); // rB0 ={ 0, 0, 0, 0} + vwrtC(pC0, rC00, rC20, rB0); + wrtC(pC0+8, rC40); + pC0 += ldc2; + vwrtC(pC0, rC01, rC21, rB0); + wrtC(pC0+8, rC41); + #else + wrtC(pC0, rC00); + wrtC(pC0+8, rC40); + pC0 += ldc2; + wrtC(pC0, rC01); + wrtC(pC0+8, rC41); + #endif + #else /* real */ + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+4, rC40); + pC0 += ldc; + ATL_vbeta(pC0, rC01); + ATL_vbeta(pC0+4, rC41); + #endif + #else + #error "VLEN NOT SUPPORTED!" + #endif + pC0 -= ldc2*1 - (8 SHIFT); + pB0 = B; + A += incAm; + pA0 = A; + pA2 = pA0 + (lda<<1); + pA4 = pA2 + (lda<<1); + pA6 = pA4 + (lda<<1); + } /* end of loop over M */ + A = aa; + pA0 = A; + pA2 = A + (lda<<1); + pA4 = pA2 + (lda<<1); + pA6 = pA4 + (lda<<1); + C += incC; + pC0 = C; + B += incBn; + pB0 = B; + } /* end of loop over N */ +} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm8x4xVL_simd.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm8x4xVL_simd.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm8x4xVL_simd.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm8x4xVL_simd.c 2016-07-28 19:43:21.000000000 +0000 @@ -0,0 +1,477 @@ +/* + * Automatically Tuned Linear Algebra Software v3.10.3 + * Copyright (C) 2016 R. Clint Whaley + */ +#if !defined(SREAL) && !defined(DREAL) && !defined(SCPLX) && !defined(DCPLX) + #define DREAL 1 +#endif +#include +#include "atlas_simd.h" +#include "atlas_prefetch.h" +#if defined(SCPLX) || defined(DCPLX) + #include "atlas_cplxsimd.h" + #ifndef TCPLX + #define TCPLX 1 + #endif + #define SHIFT <<1 +#else + #define SHIFT + #ifndef TCPLX + #define TREAL 1 + #endif +#endif +#ifndef TYPE + #if defined(SREAL) || defined(SCPLX) + #define TYPE float + #else + #define TYPE double + #endif +#endif +#ifndef ATL_MM_KB + #ifdef KB + #if KB > 0 + #define ATL_KBCONST 1 + #define ATL_MM_KB KB + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif +#else + #if ATL_MM_KB > 0 + #define ATL_KBCONST 1 + #else + #undef ATL_MM_KB + #define ATL_MM_KB K + #define ATL_KBCONST 0 + #endif +#endif +#ifdef BETA1 + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vadd(d_, d_, rA0); \ + ATL_vust(p_, d_); \ + } +#elif defined(BETA0) + #define ATL_vbeta(p_, d_) ATL_vust(p_, d_) +#else + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vmac(d_, rA0, vBE); \ + ATL_vust(p_, d_); \ + } +#endif + +#if defined (TCPLX) && ATL_VLEN == 4 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #endif + #ifndef BETA0 + #define vwrtC(p_, rc_, rt_, rz_) \ + { \ + ATL_vunpckHI(rt_, rc_, rz_); /* rt_={0, r3, 0, r2} */ \ + ATL_vunpckLO(rc_, rc_, rz_); /* rc_={0, r1, 0, r0} */ \ + ATL_vbeta(p_, rc_); \ + ATL_vbeta((p_)+4, rt_); \ + } + #endif +#elif defined (TCPLX) && ATL_VLEN == 2 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + } + #endif +#endif +#ifndef ATL_RESTRICT + #if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999) + #define ATL_RESTRICT restrict + #else + #define ATL_RESTRICT + #endif +#endif +void ATL_USERMM + (const int M, const int N, const int K, const TYPE alpha, + const TYPE * ATL_RESTRICT A, const int lda, + const TYPE * ATL_RESTRICT B, const int ldb, const TYPE beta, + TYPE * ATL_RESTRICT C, const int ldc) +/* + * Performs a GEMM with M,N,K unrolling (& jam) of (8,6,VLEN). + * Vectorization of VLEN=[4,8] (d,s) along K dim, vec unroll=(8,6,1). + * You may set compile-time constant K dim by defining ATL_MM_KB. + */ +{ + const TYPE *pB0=B, *pB2=pB0+(ldb<<1), *aa=A, *pA0=A, *pA2=pA0+(lda<<1), + *pA4=pA2+(lda<<1), *pA6=pA4+(lda<<1), *pfA=A+lda*M, *pfB=B+ldb*N; + const size_t ldc2 = ldc SHIFT; + TYPE *pC0=C; + int i, j, k; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_VTYPE vBE; + #elif ATL_VLEN == 4 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0}; + #endif + #if ATL_KBCONST == 0 + const size_t incAm = (lda<<3), incBn = ldb*4; + #else + #define incAm (ATL_MM_KB<<3) + #define incBn (4*ATL_MM_KB) + #endif + const size_t incC=ldc2*4; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_vbcast(vBE, &beta); + #endif + + for (j=0; j < N; j += 4) + { + for (i=0; i < M; i += 8) + { + register ATL_VTYPE rA0, rA1, rA2, rA3, rA4, rA5, rA6, rA7, rB0, rC00, + rC10, rC20, rC30, rC40, rC50, rC60, rC70, rB1, rC01, + rC11, rC21, rC31, rC41, rC51, rC61, rC71, rB2, rC02, + rC12, rC22, rC32, rC42, rC52, rC62, rC72, rB3, rC03, + rC13, rC23, rC33, rC43, rC53, rC63, rC73; + /* Peel K=0 iteration to avoid zero of rCxx and extra add */ + ATL_vld(rB0, pB0); + ATL_vld(rA0, pA0); + ATL_vmul(rC00, rA0, rB0); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC10, rA1, rB0); + ATL_vld(rA2, pA2); + ATL_vmul(rC20, rA2, rB0); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC30, rA3, rB0); + ATL_vld(rA4, pA4); + ATL_vmul(rC40, rA4, rB0); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmul(rC50, rA5, rB0); + ATL_vld(rA6, pA6); + ATL_vmul(rC60, rA6, rB0); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + ATL_vmul(rC70, rA7, rB0); + + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vld(rB0, pB0); + ATL_vmul(rC01, rA0, rB1); + ATL_vld(rB2, pB2); + ATL_vmul(rC11, rA1, rB1); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + ATL_vmul(rC21, rA2, rB1); + ATL_vmul(rC31, rA3, rB1); + ATL_vmul(rC41, rA4, rB1); + ATL_vmul(rC51, rA5, rB1); + ATL_pfl1R(pfA); + ATL_pfl1R(pfA+8); + ATL_vmul(rC61, rA6, rB1); + ATL_pfl1R(pfA+16); + ATL_pfl1R(pfA+24); + ATL_vmul(rC71, rA7, rB1); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmul(rC02, rA0, rB2); + ATL_pfl1R(pfB); + ATL_pfl1R(pfB+8); + ATL_vmul(rC12, rA1, rB2); + ATL_pfl1R(pfB+16); + ATL_pfl1R(pfB+24); + ATL_vmul(rC22, rA2, rB2); + ATL_vmul(rC32, rA3, rB2); + ATL_vmul(rC42, rA4, rB2); + ATL_vmul(rC52, rA5, rB2); + ATL_vmul(rC62, rA6, rB2); + ATL_vmul(rC72, rA7, rB2); + ATL_vld(rB2, pB2); + + ATL_vmul(rC03, rA0, rB3); + ATL_vld(rA0, pA0); + ATL_vmul(rC13, rA1, rB3); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC23, rA2, rB3); + ATL_vld(rA2, pA2); + ATL_vmul(rC33, rA3, rB3); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC43, rA4, rB3); + ATL_vld(rA4, pA4); + ATL_vmul(rC53, rA5, rB3); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmul(rC63, rA6, rB3); + ATL_vld(rA6, pA6); + ATL_vmul(rC73, rA7, rB3); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + pfA += 32; pfB += 32; +/* + * Stop one iteration early to drain preload pipline of A/B + */ + for (k=(ATL_VLEN<<1); k < ATL_MM_KB; k += ATL_VLEN) + { + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vmac(rC40, rA4, rB0); + ATL_vmac(rC50, rA5, rB0); + ATL_vmac(rC60, rA6, rB0); + ATL_vmac(rC70, rA7, rB0); + ATL_vld(rB0, pB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vmac(rC41, rA4, rB1); + ATL_vmac(rC51, rA5, rB1); + ATL_vmac(rC61, rA6, rB1); + ATL_vmac(rC71, rA7, rB1); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + ATL_vmac(rC42, rA4, rB2); + ATL_vmac(rC52, rA5, rB2); + ATL_vmac(rC62, rA6, rB2); + ATL_vmac(rC72, rA7, rB2); + ATL_vld(rB2, pB2); + + + ATL_vmac(rC03, rA0, rB3); + ATL_vld(rA0, pA0); + ATL_vmac(rC13, rA1, rB3); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmac(rC23, rA2, rB3); + ATL_vld(rA2, pA2); + ATL_vmac(rC33, rA3, rB3); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmac(rC43, rA4, rB3); + ATL_vld(rA4, pA4); + ATL_vmac(rC53, rA5, rB3); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmac(rC63, rA6, rB3); + ATL_vld(rA6, pA6); + ATL_vmac(rC73, rA7, rB3); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + } /* end K-loop */ +/* + * Last iteration peeled out to drain preload pipeline + */ + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB3, pB2+ldb); + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vmac(rC40, rA4, rB0); + ATL_vmac(rC50, rA5, rB0); + ATL_vmac(rC60, rA6, rB0); + ATL_vmac(rC70, rA7, rB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vmac(rC41, rA4, rB1); + ATL_vmac(rC51, rA5, rB1); + ATL_vmac(rC61, rA6, rB1); + ATL_vmac(rC71, rA7, rB1); + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + ATL_vmac(rC42, rA4, rB2); + ATL_vmac(rC52, rA5, rB2); + ATL_vmac(rC62, rA6, rB2); + ATL_vmac(rC72, rA7, rB2); + + ATL_vmac(rC03, rA0, rB3); + ATL_vmac(rC13, rA1, rB3); + ATL_vmac(rC23, rA2, rB3); + ATL_vmac(rC33, rA3, rB3); + ATL_vmac(rC43, rA4, rB3); + ATL_vmac(rC53, rA5, rB3); + ATL_vmac(rC63, rA6, rB3); + ATL_vmac(rC73, rA7, rB3); + + #if ATL_VLEN == 2 + ATL_vvrsum2(rC00, rC10); + ATL_vvrsum2(rC20, rC30); + ATL_vvrsum2(rC40, rC50); + ATL_vvrsum2(rC60, rC70); + ATL_vvrsum2(rC01, rC11); + ATL_vvrsum2(rC21, rC31); + ATL_vvrsum2(rC41, rC51); + ATL_vvrsum2(rC61, rC71); + ATL_vvrsum2(rC02, rC12); + ATL_vvrsum2(rC22, rC32); + ATL_vvrsum2(rC42, rC52); + ATL_vvrsum2(rC62, rC72); + ATL_vvrsum2(rC03, rC13); + ATL_vvrsum2(rC23, rC33); + ATL_vvrsum2(rC43, rC53); + ATL_vvrsum2(rC63, rC73); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+4, rC20); + wrtC(pC0+8, rC40); + wrtC(pC0+12, rC60); + pC0 += ldc2; + wrtC(pC0, rC01); + wrtC(pC0+4, rC21); + wrtC(pC0+8, rC41); + wrtC(pC0+12, rC61); + pC0 += ldc2; + wrtC(pC0, rC02); + wrtC(pC0+4, rC22); + wrtC(pC0+8, rC42); + wrtC(pC0+12, rC62); + pC0 += ldc2; + wrtC(pC0, rC03); + wrtC(pC0+4, rC23); + wrtC(pC0+8, rC43); + wrtC(pC0+12, rC63); + #else + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+2, rC20); + ATL_vbeta(pC0+4, rC40); + ATL_vbeta(pC0+6, rC60); + pC0 += ldc2; + ATL_vbeta(pC0, rC01); + ATL_vbeta(pC0+2, rC21); + ATL_vbeta(pC0+4, rC41); + ATL_vbeta(pC0+6, rC61); + pC0 += ldc2; + ATL_vbeta(pC0, rC02); + ATL_vbeta(pC0+2, rC22); + ATL_vbeta(pC0+4, rC42); + ATL_vbeta(pC0+6, rC62); + pC0 += ldc2; + ATL_vbeta(pC0, rC03); + ATL_vbeta(pC0+2, rC23); + ATL_vbeta(pC0+4, rC43); + ATL_vbeta(pC0+6, rC63); + #endif + #elif ATL_VLEN == 4 + #if defined(TCPLX) && !defined(BETA0) // want permuted order + ATL_vvrsum4(rC00, rC20, rC10, rC30); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC40, rC50, rC60, rC70); + ATL_vvrsum4(rC01, rC21, rC11, rC31); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC41, rC51, rC61, rC71); + ATL_vvrsum4(rC02, rC22, rC12, rC32); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC42, rC52, rC62, rC72); + ATL_vvrsum4(rC03, rC23, rC13, rC33); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC43, rC53, rC63, rC73); + #else // want natural order + ATL_vvrsum4(rC00, rC10, rC20, rC30); + ATL_vvrsum4(rC40, rC50, rC60, rC70); + ATL_vvrsum4(rC01, rC11, rC21, rC31); + ATL_vvrsum4(rC41, rC51, rC61, rC71); + ATL_vvrsum4(rC02, rC12, rC22, rC32); + ATL_vvrsum4(rC42, rC52, rC62, rC72); + ATL_vvrsum4(rC03, rC13, rC23, rC33); + ATL_vvrsum4(rC43, rC53, rC63, rC73); + #endif + #ifdef TCPLX + #ifndef BETA0 + ATL_vzero(rB0); // rB0 ={ 0, 0, 0, 0} + vwrtC(pC0, rC00, rC20, rB0); + wrtC(pC0+8, rC40); + pC0 += ldc2; + vwrtC(pC0, rC01, rC21, rB0); + wrtC(pC0+8, rC41); + pC0 += ldc2; + vwrtC(pC0, rC02, rC22, rB0); + wrtC(pC0+8, rC42); + pC0 += ldc2; + vwrtC(pC0, rC03, rC23, rB0); + wrtC(pC0+8, rC43); + #else + wrtC(pC0, rC00); + wrtC(pC0+8, rC40); + pC0 += ldc2; + wrtC(pC0, rC01); + wrtC(pC0+8, rC41); + pC0 += ldc2; + wrtC(pC0, rC02); + wrtC(pC0+8, rC42); + pC0 += ldc2; + wrtC(pC0, rC03); + wrtC(pC0+8, rC43); + #endif + #else /* real */ + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+4, rC40); + pC0 += ldc; + ATL_vbeta(pC0, rC01); + ATL_vbeta(pC0+4, rC41); + pC0 += ldc; + ATL_vbeta(pC0, rC02); + ATL_vbeta(pC0+4, rC42); + pC0 += ldc; + ATL_vbeta(pC0, rC03); + ATL_vbeta(pC0+4, rC43); + #endif + #else + #error "VLEN NOT SUPPORTED!" + #endif + pC0 -= ldc2*3 - (8 SHIFT); + pB0 = B; + pB2 = B + (ldb<<1); + A += incAm; + pA0 = A; + pA2 = pA0 + (lda<<1); + pA4 = pA2 + (lda<<1); + pA6 = pA4 + (lda<<1); + } /* end of loop over M */ + A = aa; + pA0 = A; + pA2 = A + (lda<<1); + pA4 = pA2 + (lda<<1); + pA6 = pA4 + (lda<<1); + C += incC; + pC0 = C; + B += incBn; + pB0 = B; + pB2 = B + (ldb<<1); + } /* end of loop over N */ +} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm8x5xVL_simd.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm8x5xVL_simd.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm8x5xVL_simd.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm8x5xVL_simd.c 2016-07-28 19:43:21.000000000 +0000 @@ -0,0 +1,543 @@ +/* + * Automatically Tuned Linear Algebra Software v3.10.3 + * Copyright (C) 2016 R. Clint Whaley + */ +#if !defined(SREAL) && !defined(DREAL) && !defined(SCPLX) && !defined(DCPLX) + #define DREAL 1 +#endif +#include +#include "atlas_simd.h" +#include "atlas_prefetch.h" +#if defined(SCPLX) || defined(DCPLX) + #include "atlas_cplxsimd.h" + #ifndef TCPLX + #define TCPLX 1 + #endif + #define SHIFT <<1 +#else + #define SHIFT + #ifndef TCPLX + #define TREAL 1 + #endif +#endif +#ifndef TYPE + #if defined(SREAL) || defined(SCPLX) + #define TYPE float + #else + #define TYPE double + #endif +#endif +#ifndef ATL_MM_KB + #ifdef KB + #if KB > 0 + #define ATL_KBCONST 1 + #define ATL_MM_KB KB + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif +#else + #if ATL_MM_KB > 0 + #define ATL_KBCONST 1 + #else + #undef ATL_MM_KB + #define ATL_MM_KB K + #define ATL_KBCONST 0 + #endif +#endif +#ifdef BETA1 + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vadd(d_, d_, rA0); \ + ATL_vust(p_, d_); \ + } +#elif defined(BETA0) + #define ATL_vbeta(p_, d_) ATL_vust(p_, d_) +#else + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vmac(d_, rA0, vBE); \ + ATL_vust(p_, d_); \ + } +#endif + +#if defined (TCPLX) && ATL_VLEN == 4 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #endif + #ifndef BETA0 + #define vwrtC(p_, rc_, rt_, rz_) \ + { \ + ATL_vunpckHI(rt_, rc_, rz_); /* rt_={0, r3, 0, r2} */ \ + ATL_vunpckLO(rc_, rc_, rz_); /* rc_={0, r1, 0, r0} */ \ + ATL_vbeta(p_, rc_); \ + ATL_vbeta((p_)+4, rt_); \ + } + #endif +#elif defined (TCPLX) && ATL_VLEN == 2 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + } + #endif +#endif +#ifndef ATL_RESTRICT + #if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999) + #define ATL_RESTRICT restrict + #else + #define ATL_RESTRICT + #endif +#endif +void ATL_USERMM + (const int M, const int N, const int K, const TYPE alpha, + const TYPE * ATL_RESTRICT A, const int lda, + const TYPE * ATL_RESTRICT B, const int ldb, const TYPE beta, + TYPE * ATL_RESTRICT C, const int ldc) +/* + * Performs a GEMM with M,N,K unrolling (& jam) of (8,6,VLEN). + * Vectorization of VLEN=[4,8] (d,s) along K dim, vec unroll=(8,6,1). + * You may set compile-time constant K dim by defining ATL_MM_KB. + */ +{ + const TYPE *pB0=B, *pB2=pB0+(ldb<<1), *pB4=pB2+(ldb<<1), *aa=A, *pA0=A, + *pA2=pA0+(lda<<1), *pA4=pA2+(lda<<1), *pA6=pA4+(lda<<1), + *pfA=A+lda*M, *pfB=B+ldb*N; + const size_t ldc2 = ldc SHIFT; + TYPE *pC0=C; + int i, j, k; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_VTYPE vBE; + #elif ATL_VLEN == 4 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0}; + #endif + #if ATL_KBCONST == 0 + const size_t incAm = (lda<<3), incBn = ldb*5; + #else + #define incAm (ATL_MM_KB<<3) + #define incBn (5*ATL_MM_KB) + #endif + const size_t incC=ldc2*5; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_vbcast(vBE, &beta); + #endif + + for (j=0; j < N; j += 5) + { + for (i=0; i < M; i += 8) + { + register ATL_VTYPE rA0, rA1, rA2, rA3, rA4, rA5, rA6, rA7, rB0, rC00, + rC10, rC20, rC30, rC40, rC50, rC60, rC70, rB1, rC01, + rC11, rC21, rC31, rC41, rC51, rC61, rC71, rB2, rC02, + rC12, rC22, rC32, rC42, rC52, rC62, rC72, rB3, rC03, + rC13, rC23, rC33, rC43, rC53, rC63, rC73, rB4, rC04, + rC14, rC24, rC34, rC44, rC54, rC64, rC74; + /* Peel K=0 iteration to avoid zero of rCxx and extra add */ + ATL_vld(rB0, pB0); + ATL_vld(rA0, pA0); + ATL_vmul(rC00, rA0, rB0); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC10, rA1, rB0); + ATL_vld(rA2, pA2); + ATL_vmul(rC20, rA2, rB0); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC30, rA3, rB0); + ATL_vld(rA4, pA4); + ATL_vmul(rC40, rA4, rB0); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmul(rC50, rA5, rB0); + ATL_vld(rA6, pA6); + ATL_vmul(rC60, rA6, rB0); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + ATL_vmul(rC70, rA7, rB0); + + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vld(rB0, pB0); + ATL_vmul(rC01, rA0, rB1); + ATL_vld(rB2, pB2); + ATL_vmul(rC11, rA1, rB1); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + ATL_vld(rB4, pB4); + pB4 += ATL_VLEN; + ATL_vmul(rC21, rA2, rB1); + ATL_vmul(rC31, rA3, rB1); + ATL_vmul(rC41, rA4, rB1); + ATL_vmul(rC51, rA5, rB1); + ATL_pfl1R(pfA); + ATL_pfl1R(pfA+8); + ATL_vmul(rC61, rA6, rB1); + ATL_pfl1R(pfA+16); + ATL_pfl1R(pfA+24); + ATL_vmul(rC71, rA7, rB1); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmul(rC02, rA0, rB2); + ATL_pfl1R(pfB); + ATL_pfl1R(pfB+8); + ATL_vmul(rC12, rA1, rB2); + ATL_pfl1R(pfB+16); + ATL_pfl1R(pfB+24); + ATL_vmul(rC22, rA2, rB2); + ATL_pfl1R(pfB+32); + ATL_pfl1R(pfB+40); + ATL_vmul(rC32, rA3, rB2); + ATL_vmul(rC42, rA4, rB2); + ATL_pfl1R(pfA+32); + ATL_pfl1R(pfA+40); + ATL_vmul(rC52, rA5, rB2); + ATL_vmul(rC62, rA6, rB2); + ATL_vmul(rC72, rA7, rB2); + ATL_vld(rB2, pB2); + + ATL_vmul(rC03, rA0, rB3); + ATL_vmul(rC13, rA1, rB3); + ATL_vmul(rC23, rA2, rB3); + ATL_vmul(rC33, rA3, rB3); + ATL_vmul(rC43, rA4, rB3); + ATL_vmul(rC53, rA5, rB3); + ATL_vmul(rC63, rA6, rB3); + ATL_vmul(rC73, rA7, rB3); + ATL_vld(rB3, pB2+lda); pB2 += ATL_VLEN; + + ATL_vmul(rC04, rA0, rB4); + ATL_vld(rA0, pA0); + ATL_vmul(rC14, rA1, rB4); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC24, rA2, rB4); + ATL_vld(rA2, pA2); + ATL_vmul(rC34, rA3, rB4); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC44, rA4, rB4); + ATL_vld(rA4, pA4); + ATL_vmul(rC54, rA5, rB4); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmul(rC64, rA6, rB4); + ATL_vld(rA6, pA6); + ATL_vmul(rC74, rA7, rB4); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + pfA += 40; pfB += 40; +/* + * Stop one iteration early to drain preload pipline of A/B + */ + for (k=(ATL_VLEN<<1); k < ATL_MM_KB; k += ATL_VLEN) + { + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB4, pB4); pB4 += ATL_VLEN; + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vmac(rC40, rA4, rB0); + ATL_vmac(rC50, rA5, rB0); + ATL_vmac(rC60, rA6, rB0); + ATL_vmac(rC70, rA7, rB0); + ATL_vld(rB0, pB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vmac(rC41, rA4, rB1); + ATL_vmac(rC51, rA5, rB1); + ATL_vmac(rC61, rA6, rB1); + ATL_vmac(rC71, rA7, rB1); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + ATL_vmac(rC42, rA4, rB2); + ATL_vmac(rC52, rA5, rB2); + ATL_vmac(rC62, rA6, rB2); + ATL_vmac(rC72, rA7, rB2); + ATL_vld(rB2, pB2); + + ATL_vmac(rC03, rA0, rB3); + ATL_vmac(rC13, rA1, rB3); + ATL_vmac(rC23, rA2, rB3); + ATL_vmac(rC33, rA3, rB3); + ATL_vmac(rC43, rA4, rB3); + ATL_vmac(rC53, rA5, rB3); + ATL_vmac(rC63, rA6, rB3); + ATL_vmac(rC73, rA7, rB3); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + + + ATL_vmac(rC04, rA0, rB4); + ATL_vld(rA0, pA0); + ATL_vmac(rC14, rA1, rB4); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmac(rC24, rA2, rB4); + ATL_vld(rA2, pA2); + ATL_vmac(rC34, rA3, rB4); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmac(rC44, rA4, rB4); + ATL_vld(rA4, pA4); + ATL_vmac(rC54, rA5, rB4); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmac(rC64, rA6, rB4); + ATL_vld(rA6, pA6); + ATL_vmac(rC74, rA7, rB4); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + } /* end K-loop */ +/* + * Last iteration peeled out to drain preload pipeline + */ + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB4, pB4); + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vmac(rC40, rA4, rB0); + ATL_vmac(rC50, rA5, rB0); + ATL_vmac(rC60, rA6, rB0); + ATL_vmac(rC70, rA7, rB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vmac(rC41, rA4, rB1); + ATL_vmac(rC51, rA5, rB1); + ATL_vmac(rC61, rA6, rB1); + ATL_vmac(rC71, rA7, rB1); + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + ATL_vmac(rC42, rA4, rB2); + ATL_vmac(rC52, rA5, rB2); + ATL_vmac(rC62, rA6, rB2); + ATL_vmac(rC72, rA7, rB2); + + ATL_vmac(rC03, rA0, rB3); + ATL_vmac(rC13, rA1, rB3); + ATL_vmac(rC23, rA2, rB3); + ATL_vmac(rC33, rA3, rB3); + ATL_vmac(rC43, rA4, rB3); + ATL_vmac(rC53, rA5, rB3); + ATL_vmac(rC63, rA6, rB3); + ATL_vmac(rC73, rA7, rB3); + + ATL_vmac(rC04, rA0, rB4); + ATL_vmac(rC14, rA1, rB4); + ATL_vmac(rC24, rA2, rB4); + ATL_vmac(rC34, rA3, rB4); + ATL_vmac(rC44, rA4, rB4); + ATL_vmac(rC54, rA5, rB4); + ATL_vmac(rC64, rA6, rB4); + ATL_vmac(rC74, rA7, rB4); + + #if ATL_VLEN == 2 + ATL_vvrsum2(rC00, rC10); + ATL_vvrsum2(rC20, rC30); + ATL_vvrsum2(rC40, rC50); + ATL_vvrsum2(rC60, rC70); + ATL_vvrsum2(rC01, rC11); + ATL_vvrsum2(rC21, rC31); + ATL_vvrsum2(rC41, rC51); + ATL_vvrsum2(rC61, rC71); + ATL_vvrsum2(rC02, rC12); + ATL_vvrsum2(rC22, rC32); + ATL_vvrsum2(rC42, rC52); + ATL_vvrsum2(rC62, rC72); + ATL_vvrsum2(rC03, rC13); + ATL_vvrsum2(rC23, rC33); + ATL_vvrsum2(rC43, rC53); + ATL_vvrsum2(rC63, rC73); + ATL_vvrsum2(rC04, rC14); + ATL_vvrsum2(rC24, rC34); + ATL_vvrsum2(rC44, rC54); + ATL_vvrsum2(rC64, rC74); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+4, rC20); + wrtC(pC0+8, rC40); + wrtC(pC0+12, rC60); + pC0 += ldc2; + wrtC(pC0, rC01); + wrtC(pC0+4, rC21); + wrtC(pC0+8, rC41); + wrtC(pC0+12, rC61); + pC0 += ldc2; + wrtC(pC0, rC02); + wrtC(pC0+4, rC22); + wrtC(pC0+8, rC42); + wrtC(pC0+12, rC62); + pC0 += ldc2; + wrtC(pC0, rC03); + wrtC(pC0+4, rC23); + wrtC(pC0+8, rC43); + wrtC(pC0+12, rC63); + pC0 += ldc2; + wrtC(pC0, rC04); + wrtC(pC0+4, rC24); + wrtC(pC0+8, rC44); + wrtC(pC0+12, rC64); + #else + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+2, rC20); + ATL_vbeta(pC0+4, rC40); + ATL_vbeta(pC0+6, rC60); + pC0 += ldc2; + ATL_vbeta(pC0, rC01); + ATL_vbeta(pC0+2, rC21); + ATL_vbeta(pC0+4, rC41); + ATL_vbeta(pC0+6, rC61); + pC0 += ldc2; + ATL_vbeta(pC0, rC02); + ATL_vbeta(pC0+2, rC22); + ATL_vbeta(pC0+4, rC42); + ATL_vbeta(pC0+6, rC62); + pC0 += ldc2; + ATL_vbeta(pC0, rC03); + ATL_vbeta(pC0+2, rC23); + ATL_vbeta(pC0+4, rC43); + ATL_vbeta(pC0+6, rC63); + pC0 += ldc2; + ATL_vbeta(pC0, rC04); + ATL_vbeta(pC0+2, rC24); + ATL_vbeta(pC0+4, rC44); + ATL_vbeta(pC0+6, rC64); + #endif + #elif ATL_VLEN == 4 + #if defined(TCPLX) && !defined(BETA0) // want permuted order + ATL_vvrsum4(rC00, rC20, rC10, rC30); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC40, rC50, rC60, rC70); + ATL_vvrsum4(rC01, rC21, rC11, rC31); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC41, rC51, rC61, rC71); + ATL_vvrsum4(rC02, rC22, rC12, rC32); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC42, rC52, rC62, rC72); + ATL_vvrsum4(rC03, rC23, rC13, rC33); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC43, rC53, rC63, rC73); + ATL_vvrsum4(rC04, rC24, rC14, rC34); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC44, rC54, rC64, rC74); + #else // want natural order + ATL_vvrsum4(rC00, rC10, rC20, rC30); + ATL_vvrsum4(rC40, rC50, rC60, rC70); + ATL_vvrsum4(rC01, rC11, rC21, rC31); + ATL_vvrsum4(rC41, rC51, rC61, rC71); + ATL_vvrsum4(rC02, rC12, rC22, rC32); + ATL_vvrsum4(rC42, rC52, rC62, rC72); + ATL_vvrsum4(rC03, rC13, rC23, rC33); + ATL_vvrsum4(rC43, rC53, rC63, rC73); + ATL_vvrsum4(rC04, rC14, rC24, rC34); + ATL_vvrsum4(rC44, rC54, rC64, rC74); + #endif + #ifdef TCPLX + #ifndef BETA0 + ATL_vzero(rB0); // rB0 ={ 0, 0, 0, 0} + vwrtC(pC0, rC00, rC20, rB0); + wrtC(pC0+8, rC40); + pC0 += ldc2; + vwrtC(pC0, rC01, rC21, rB0); + wrtC(pC0+8, rC41); + pC0 += ldc2; + vwrtC(pC0, rC02, rC22, rB0); + wrtC(pC0+8, rC42); + pC0 += ldc2; + vwrtC(pC0, rC03, rC23, rB0); + wrtC(pC0+8, rC43); + pC0 += ldc2; + vwrtC(pC0, rC04, rC24, rB0); + wrtC(pC0+8, rC44); + #else + wrtC(pC0, rC00); + wrtC(pC0+8, rC40); + pC0 += ldc2; + wrtC(pC0, rC01); + wrtC(pC0+8, rC41); + pC0 += ldc2; + wrtC(pC0, rC02); + wrtC(pC0+8, rC42); + pC0 += ldc2; + wrtC(pC0, rC03); + wrtC(pC0+8, rC43); + pC0 += ldc2; + wrtC(pC0, rC04); + wrtC(pC0+8, rC44); + #endif + #else /* real */ + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+4, rC40); + pC0 += ldc; + ATL_vbeta(pC0, rC01); + ATL_vbeta(pC0+4, rC41); + pC0 += ldc; + ATL_vbeta(pC0, rC02); + ATL_vbeta(pC0+4, rC42); + pC0 += ldc; + ATL_vbeta(pC0, rC03); + ATL_vbeta(pC0+4, rC43); + pC0 += ldc; + ATL_vbeta(pC0, rC04); + ATL_vbeta(pC0+4, rC44); + #endif + #else + #error "VLEN NOT SUPPORTED!" + #endif + pC0 -= ldc2*4 - (8 SHIFT); + pB0 = B; + pB2 = B + (ldb<<1); + pB4 = pB2 + (ldb<<1); + A += incAm; + pA0 = A; + pA2 = pA0 + (lda<<1); + pA4 = pA2 + (lda<<1); + pA6 = pA4 + (lda<<1); + } /* end of loop over M */ + A = aa; + pA0 = A; + pA2 = A + (lda<<1); + pA4 = pA2 + (lda<<1); + pA6 = pA4 + (lda<<1); + C += incC; + pC0 = C; + B += incBn; + pB0 = B; + pB2 = B + (ldb<<1); + pB4 = pB2 + (ldb<<1); + } /* end of loop over N */ +} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm8x6xVL_simd.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm8x6xVL_simd.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_amm8x6xVL_simd.c 1970-01-01 00:00:00.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_amm8x6xVL_simd.c 2016-07-28 19:43:21.000000000 +0000 @@ -0,0 +1,600 @@ +/* + * Automatically Tuned Linear Algebra Software v3.10.3 + * Copyright (C) 2016 R. Clint Whaley + */ +#if !defined(SREAL) && !defined(DREAL) && !defined(SCPLX) && !defined(DCPLX) + #define DREAL 1 +#endif +#include +#include "atlas_simd.h" +#include "atlas_prefetch.h" +#if defined(SCPLX) || defined(DCPLX) + #include "atlas_cplxsimd.h" + #ifndef TCPLX + #define TCPLX 1 + #endif + #define SHIFT <<1 +#else + #define SHIFT + #ifndef TCPLX + #define TREAL 1 + #endif +#endif +#ifndef TYPE + #if defined(SREAL) || defined(SCPLX) + #define TYPE float + #else + #define TYPE double + #endif +#endif +#ifndef ATL_MM_KB + #ifdef KB + #if KB > 0 + #define ATL_KBCONST 1 + #define ATL_MM_KB KB + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif + #else + #define ATL_KBCONST 0 + #define ATL_MM_KB K + #endif +#else + #if ATL_MM_KB > 0 + #define ATL_KBCONST 1 + #else + #undef ATL_MM_KB + #define ATL_MM_KB K + #define ATL_KBCONST 0 + #endif +#endif +#ifdef BETA1 + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vadd(d_, d_, rA0); \ + ATL_vust(p_, d_); \ + } +#elif defined(BETA0) + #define ATL_vbeta(p_, d_) ATL_vust(p_, d_) +#else + #define ATL_vbeta(p_, d_) \ + { \ + ATL_vuld(rA0, p_); \ + ATL_vmac(d_, rA0, vBE); \ + ATL_vust(p_, d_); \ + } +#endif + +#if defined (TCPLX) && ATL_VLEN == 4 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + (p_)[ 4] = r1_[2]; \ + (p_)[ 6] = r1_[3]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + (p_)[ 4] += r1_[2]; \ + (p_)[ 6] += r1_[3]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + (p_)[ 4] = beta*(p_)[ 4] + r1_[2]; \ + (p_)[ 6] = beta*(p_)[ 6] + r1_[3]; \ + } + #endif + #ifndef BETA0 + #define vwrtC(p_, rc_, rt_, rz_) \ + { \ + ATL_vunpckHI(rt_, rc_, rz_); /* rt_={0, r3, 0, r2} */ \ + ATL_vunpckLO(rc_, rc_, rz_); /* rc_={0, r1, 0, r0} */ \ + ATL_vbeta(p_, rc_); \ + ATL_vbeta((p_)+4, rt_); \ + } + #endif +#elif defined (TCPLX) && ATL_VLEN == 2 + #ifdef BETA0 + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = r1_[0]; \ + (p_)[ 2] = r1_[1]; \ + } + #elif defined(BETA1) + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] += r1_[0]; \ + (p_)[ 2] += r1_[1]; \ + } + #else + #define wrtC(p_, r1_) \ + { \ + (p_)[ 0] = beta*(p_)[ 0] + r1_[0]; \ + (p_)[ 2] = beta*(p_)[ 2] + r1_[1]; \ + } + #endif +#endif +#ifndef ATL_RESTRICT + #if defined(__STDC_VERSION__) && (__STDC_VERSION__/100 >= 1999) + #define ATL_RESTRICT restrict + #else + #define ATL_RESTRICT + #endif +#endif +void ATL_USERMM + (const int M, const int N, const int K, const TYPE alpha, + const TYPE * ATL_RESTRICT A, const int lda, + const TYPE * ATL_RESTRICT B, const int ldb, const TYPE beta, + TYPE * ATL_RESTRICT C, const int ldc) +/* + * Performs a GEMM with M,N,K unrolling (& jam) of (8,6,VLEN). + * Vectorization of VLEN=[4,8] (d,s) along K dim, vec unroll=(8,6,1). + * You may set compile-time constant K dim by defining ATL_MM_KB. + */ +{ + const TYPE *pB0=B, *pB2=pB0+(ldb<<1), *pB4=pB2+(ldb<<1), *aa=A, *pA0=A, + *pA2=pA0+(lda<<1), *pA4=pA2+(lda<<1), *pA6=pA4+(lda<<1), + *pfA=A+lda*M, *pfB=B+ldb*N; + const size_t ldc2 = ldc SHIFT; + TYPE *pC0=C; + int i, j, k; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_VTYPE vBE; + #elif ATL_VLEN == 4 && defined(TCPLX) && defined(BETAX) + const ATL_VTYPE vBE={beta, 1.0, beta, 1.0}; + #endif + #if ATL_KBCONST == 0 + const size_t incAm = (lda<<3), incBn = ldb*6; + #else + #define incAm (ATL_MM_KB<<3) + #define incBn (6*ATL_MM_KB) + #endif + const size_t incC=ldc2*6; + #if !defined(BETA0) && !defined(BETA1) && !defined(TCPLX) + ATL_vbcast(vBE, &beta); + #endif + + for (j=0; j < N; j += 6) + { + for (i=0; i < M; i += 8) + { + register ATL_VTYPE rA0, rA1, rA2, rA3, rA4, rA5, rA6, rA7, rB0, rC00, + rC10, rC20, rC30, rC40, rC50, rC60, rC70, rB1, rC01, + rC11, rC21, rC31, rC41, rC51, rC61, rC71, rB2, rC02, + rC12, rC22, rC32, rC42, rC52, rC62, rC72, rB3, rC03, + rC13, rC23, rC33, rC43, rC53, rC63, rC73, rB4, rC04, + rC14, rC24, rC34, rC44, rC54, rC64, rC74, rB5, rC05, + rC15, rC25, rC35, rC45, rC55, rC65, rC75; + /* Peel K=0 iteration to avoid zero of rCxx and extra add */ + ATL_vld(rB0, pB0); + ATL_vld(rA0, pA0); + ATL_vmul(rC00, rA0, rB0); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC10, rA1, rB0); + ATL_vld(rA2, pA2); + ATL_vmul(rC20, rA2, rB0); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC30, rA3, rB0); + ATL_vld(rA4, pA4); + ATL_vmul(rC40, rA4, rB0); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmul(rC50, rA5, rB0); + ATL_vld(rA6, pA6); + ATL_vmul(rC60, rA6, rB0); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + ATL_vmul(rC70, rA7, rB0); + + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + ATL_vld(rB0, pB0); + ATL_vmul(rC01, rA0, rB1); + ATL_vld(rB2, pB2); + ATL_vmul(rC11, rA1, rB1); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + ATL_vld(rB4, pB4); + ATL_vmul(rC21, rA2, rB1); + ATL_vld(rB5, pB4+ldb); pB4 += ATL_VLEN; + ATL_vmul(rC31, rA3, rB1); + ATL_vmul(rC41, rA4, rB1); + ATL_vmul(rC51, rA5, rB1); + ATL_pfl1R(pfA); + ATL_pfl1R(pfA+8); + ATL_vmul(rC61, rA6, rB1); + ATL_pfl1R(pfA+16); + ATL_pfl1R(pfA+24); + ATL_vmul(rC71, rA7, rB1); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmul(rC02, rA0, rB2); + ATL_pfl1R(pfB); + ATL_pfl1R(pfB+8); + ATL_vmul(rC12, rA1, rB2); + ATL_pfl1R(pfB+16); + ATL_pfl1R(pfB+24); + ATL_vmul(rC22, rA2, rB2); + ATL_pfl1R(pfB+32); + ATL_pfl1R(pfB+40); + ATL_vmul(rC32, rA3, rB2); + ATL_vmul(rC42, rA4, rB2); + ATL_pfl1R(pfA+32); + ATL_pfl1R(pfA+40); + ATL_vmul(rC52, rA5, rB2); + ATL_vmul(rC62, rA6, rB2); + ATL_vmul(rC72, rA7, rB2); + ATL_vld(rB2, pB2); + + ATL_vmul(rC03, rA0, rB3); + ATL_vmul(rC13, rA1, rB3); + ATL_vmul(rC23, rA2, rB3); + ATL_vmul(rC33, rA3, rB3); + ATL_vmul(rC43, rA4, rB3); + ATL_vmul(rC53, rA5, rB3); + ATL_vmul(rC63, rA6, rB3); + ATL_vmul(rC73, rA7, rB3); + ATL_vld(rB3, pB2+lda); pB2 += ATL_VLEN; + + ATL_vmul(rC04, rA0, rB4); + ATL_vmul(rC14, rA1, rB4); + ATL_vmul(rC24, rA2, rB4); + ATL_vmul(rC34, rA3, rB4); + ATL_vmul(rC44, rA4, rB4); + ATL_vmul(rC54, rA5, rB4); + ATL_vmul(rC64, rA6, rB4); + ATL_vmul(rC74, rA7, rB4); + ATL_vld(rB4, pB4); + + ATL_vmul(rC05, rA0, rB5); + ATL_vld(rA0, pA0); + ATL_vmul(rC15, rA1, rB5); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmul(rC25, rA2, rB5); + ATL_vld(rA2, pA2); + ATL_vmul(rC35, rA3, rB5); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmul(rC45, rA4, rB5); + ATL_vld(rA4, pA4); + ATL_vmul(rC55, rA5, rB5); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmul(rC65, rA6, rB5); + ATL_vld(rA6, pA6); + ATL_vmul(rC75, rA7, rB5); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + pfA += 48; pfB += 48; +/* + * Stop one iteration early to drain preload pipline of A/B + */ + for (k=(ATL_VLEN<<1); k < ATL_MM_KB; k += ATL_VLEN) + { + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB5, pB4+ldb); pB4 += ATL_VLEN; + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vmac(rC40, rA4, rB0); + ATL_vmac(rC50, rA5, rB0); + ATL_vmac(rC60, rA6, rB0); + ATL_vmac(rC70, rA7, rB0); + ATL_vld(rB0, pB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vmac(rC41, rA4, rB1); + ATL_vmac(rC51, rA5, rB1); + ATL_vmac(rC61, rA6, rB1); + ATL_vmac(rC71, rA7, rB1); + ATL_vld(rB1, pB0+ldb); pB0 += ATL_VLEN; + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + ATL_vmac(rC42, rA4, rB2); + ATL_vmac(rC52, rA5, rB2); + ATL_vmac(rC62, rA6, rB2); + ATL_vmac(rC72, rA7, rB2); + ATL_vld(rB2, pB2); + + ATL_vmac(rC03, rA0, rB3); + ATL_vmac(rC13, rA1, rB3); + ATL_vmac(rC23, rA2, rB3); + ATL_vmac(rC33, rA3, rB3); + ATL_vmac(rC43, rA4, rB3); + ATL_vmac(rC53, rA5, rB3); + ATL_vmac(rC63, rA6, rB3); + ATL_vmac(rC73, rA7, rB3); + ATL_vld(rB3, pB2+ldb); pB2 += ATL_VLEN; + + ATL_vmac(rC04, rA0, rB4); + ATL_vmac(rC14, rA1, rB4); + ATL_vmac(rC24, rA2, rB4); + ATL_vmac(rC34, rA3, rB4); + ATL_vmac(rC44, rA4, rB4); + ATL_vmac(rC54, rA5, rB4); + ATL_vmac(rC64, rA6, rB4); + ATL_vmac(rC74, rA7, rB4); + ATL_vld(rB4, pB4); + + + ATL_vmac(rC05, rA0, rB5); + ATL_vld(rA0, pA0); + ATL_vmac(rC15, rA1, rB5); + ATL_vld(rA1, pA0+lda); pA0 += ATL_VLEN; + ATL_vmac(rC25, rA2, rB5); + ATL_vld(rA2, pA2); + ATL_vmac(rC35, rA3, rB5); + ATL_vld(rA3, pA2+lda); pA2 += ATL_VLEN; + ATL_vmac(rC45, rA4, rB5); + ATL_vld(rA4, pA4); + ATL_vmac(rC55, rA5, rB5); + ATL_vld(rA5, pA4+lda); pA4 += ATL_VLEN; + ATL_vmac(rC65, rA6, rB5); + ATL_vld(rA6, pA6); + ATL_vmac(rC75, rA7, rB5); + ATL_vld(rA7, pA6+lda); pA6 += ATL_VLEN; + } /* end K-loop */ +/* + * Last iteration peeled out to drain preload pipeline + */ + ATL_vmac(rC00, rA0, rB0); + ATL_vld(rB5, pB4+ldb); + ATL_vmac(rC10, rA1, rB0); + ATL_vmac(rC20, rA2, rB0); + ATL_vmac(rC30, rA3, rB0); + ATL_vmac(rC40, rA4, rB0); + ATL_vmac(rC50, rA5, rB0); + ATL_vmac(rC60, rA6, rB0); + ATL_vmac(rC70, rA7, rB0); + + ATL_vmac(rC01, rA0, rB1); + ATL_vmac(rC11, rA1, rB1); + ATL_vmac(rC21, rA2, rB1); + ATL_vmac(rC31, rA3, rB1); + ATL_vmac(rC41, rA4, rB1); + ATL_vmac(rC51, rA5, rB1); + ATL_vmac(rC61, rA6, rB1); + ATL_vmac(rC71, rA7, rB1); + + ATL_vmac(rC02, rA0, rB2); + ATL_vmac(rC12, rA1, rB2); + ATL_vmac(rC22, rA2, rB2); + ATL_vmac(rC32, rA3, rB2); + ATL_vmac(rC42, rA4, rB2); + ATL_vmac(rC52, rA5, rB2); + ATL_vmac(rC62, rA6, rB2); + ATL_vmac(rC72, rA7, rB2); + + ATL_vmac(rC03, rA0, rB3); + ATL_vmac(rC13, rA1, rB3); + ATL_vmac(rC23, rA2, rB3); + ATL_vmac(rC33, rA3, rB3); + ATL_vmac(rC43, rA4, rB3); + ATL_vmac(rC53, rA5, rB3); + ATL_vmac(rC63, rA6, rB3); + ATL_vmac(rC73, rA7, rB3); + + ATL_vmac(rC04, rA0, rB4); + ATL_vmac(rC14, rA1, rB4); + ATL_vmac(rC24, rA2, rB4); + ATL_vmac(rC34, rA3, rB4); + ATL_vmac(rC44, rA4, rB4); + ATL_vmac(rC54, rA5, rB4); + ATL_vmac(rC64, rA6, rB4); + ATL_vmac(rC74, rA7, rB4); + + ATL_vmac(rC05, rA0, rB5); + ATL_vmac(rC15, rA1, rB5); + ATL_vmac(rC25, rA2, rB5); + ATL_vmac(rC35, rA3, rB5); + ATL_vmac(rC45, rA4, rB5); + ATL_vmac(rC55, rA5, rB5); + ATL_vmac(rC65, rA6, rB5); + ATL_vmac(rC75, rA7, rB5); + + #if ATL_VLEN == 2 + ATL_vvrsum2(rC00, rC10); + ATL_vvrsum2(rC20, rC30); + ATL_vvrsum2(rC40, rC50); + ATL_vvrsum2(rC60, rC70); + ATL_vvrsum2(rC01, rC11); + ATL_vvrsum2(rC21, rC31); + ATL_vvrsum2(rC41, rC51); + ATL_vvrsum2(rC61, rC71); + ATL_vvrsum2(rC02, rC12); + ATL_vvrsum2(rC22, rC32); + ATL_vvrsum2(rC42, rC52); + ATL_vvrsum2(rC62, rC72); + ATL_vvrsum2(rC03, rC13); + ATL_vvrsum2(rC23, rC33); + ATL_vvrsum2(rC43, rC53); + ATL_vvrsum2(rC63, rC73); + ATL_vvrsum2(rC04, rC14); + ATL_vvrsum2(rC24, rC34); + ATL_vvrsum2(rC44, rC54); + ATL_vvrsum2(rC64, rC74); + ATL_vvrsum2(rC05, rC15); + ATL_vvrsum2(rC25, rC35); + ATL_vvrsum2(rC45, rC55); + ATL_vvrsum2(rC65, rC75); + #ifdef TCPLX + wrtC(pC0, rC00); + wrtC(pC0+4, rC20); + wrtC(pC0+8, rC40); + wrtC(pC0+12, rC60); + pC0 += ldc2; + wrtC(pC0, rC01); + wrtC(pC0+4, rC21); + wrtC(pC0+8, rC41); + wrtC(pC0+12, rC61); + pC0 += ldc2; + wrtC(pC0, rC02); + wrtC(pC0+4, rC22); + wrtC(pC0+8, rC42); + wrtC(pC0+12, rC62); + pC0 += ldc2; + wrtC(pC0, rC03); + wrtC(pC0+4, rC23); + wrtC(pC0+8, rC43); + wrtC(pC0+12, rC63); + pC0 += ldc2; + wrtC(pC0, rC04); + wrtC(pC0+4, rC24); + wrtC(pC0+8, rC44); + wrtC(pC0+12, rC64); + pC0 += ldc2; + wrtC(pC0, rC05); + wrtC(pC0+4, rC25); + wrtC(pC0+8, rC45); + wrtC(pC0+12, rC65); + #else + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+2, rC20); + ATL_vbeta(pC0+4, rC40); + ATL_vbeta(pC0+6, rC60); + pC0 += ldc2; + ATL_vbeta(pC0, rC01); + ATL_vbeta(pC0+2, rC21); + ATL_vbeta(pC0+4, rC41); + ATL_vbeta(pC0+6, rC61); + pC0 += ldc2; + ATL_vbeta(pC0, rC02); + ATL_vbeta(pC0+2, rC22); + ATL_vbeta(pC0+4, rC42); + ATL_vbeta(pC0+6, rC62); + pC0 += ldc2; + ATL_vbeta(pC0, rC03); + ATL_vbeta(pC0+2, rC23); + ATL_vbeta(pC0+4, rC43); + ATL_vbeta(pC0+6, rC63); + pC0 += ldc2; + ATL_vbeta(pC0, rC04); + ATL_vbeta(pC0+2, rC24); + ATL_vbeta(pC0+4, rC44); + ATL_vbeta(pC0+6, rC64); + pC0 += ldc2; + ATL_vbeta(pC0, rC05); + ATL_vbeta(pC0+2, rC25); + ATL_vbeta(pC0+4, rC45); + ATL_vbeta(pC0+6, rC65); + #endif + #elif ATL_VLEN == 4 + #if defined(TCPLX) && !defined(BETA0) // want permuted order + ATL_vvrsum4(rC00, rC20, rC10, rC30); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC40, rC50, rC60, rC70); + ATL_vvrsum4(rC01, rC21, rC11, rC31); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC41, rC51, rC61, rC71); + ATL_vvrsum4(rC02, rC22, rC12, rC32); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC42, rC52, rC62, rC72); + ATL_vvrsum4(rC03, rC23, rC13, rC33); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC43, rC53, rC63, rC73); + ATL_vvrsum4(rC04, rC24, rC14, rC34); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC44, rC54, rC64, rC74); + ATL_vvrsum4(rC05, rC25, rC15, rC35); // rC00={r3, r1, r2, r0} + ATL_vvrsum4(rC45, rC55, rC65, rC75); + #else // want natural order + ATL_vvrsum4(rC00, rC10, rC20, rC30); + ATL_vvrsum4(rC40, rC50, rC60, rC70); + ATL_vvrsum4(rC01, rC11, rC21, rC31); + ATL_vvrsum4(rC41, rC51, rC61, rC71); + ATL_vvrsum4(rC02, rC12, rC22, rC32); + ATL_vvrsum4(rC42, rC52, rC62, rC72); + ATL_vvrsum4(rC03, rC13, rC23, rC33); + ATL_vvrsum4(rC43, rC53, rC63, rC73); + ATL_vvrsum4(rC04, rC14, rC24, rC34); + ATL_vvrsum4(rC44, rC54, rC64, rC74); + ATL_vvrsum4(rC05, rC15, rC25, rC35); + ATL_vvrsum4(rC45, rC55, rC65, rC75); + #endif + #ifdef TCPLX + #ifndef BETA0 + ATL_vzero(rB0); // rB0 ={ 0, 0, 0, 0} + vwrtC(pC0, rC00, rC20, rB0); + wrtC(pC0+8, rC40); + pC0 += ldc2; + vwrtC(pC0, rC01, rC21, rB0); + wrtC(pC0+8, rC41); + pC0 += ldc2; + vwrtC(pC0, rC02, rC22, rB0); + wrtC(pC0+8, rC42); + pC0 += ldc2; + vwrtC(pC0, rC03, rC23, rB0); + wrtC(pC0+8, rC43); + pC0 += ldc2; + vwrtC(pC0, rC04, rC24, rB0); + wrtC(pC0+8, rC44); + pC0 += ldc2; + vwrtC(pC0, rC05, rC25, rB0); + wrtC(pC0+8, rC45); + #else + wrtC(pC0, rC00); + wrtC(pC0+8, rC40); + pC0 += ldc2; + wrtC(pC0, rC01); + wrtC(pC0+8, rC41); + pC0 += ldc2; + wrtC(pC0, rC02); + wrtC(pC0+8, rC42); + pC0 += ldc2; + wrtC(pC0, rC03); + wrtC(pC0+8, rC43); + pC0 += ldc2; + wrtC(pC0, rC04); + wrtC(pC0+8, rC44); + pC0 += ldc2; + wrtC(pC0, rC05); + wrtC(pC0+8, rC45); + #endif + #else /* real */ + ATL_vbeta(pC0, rC00); + ATL_vbeta(pC0+4, rC40); + pC0 += ldc; + ATL_vbeta(pC0, rC01); + ATL_vbeta(pC0+4, rC41); + pC0 += ldc; + ATL_vbeta(pC0, rC02); + ATL_vbeta(pC0+4, rC42); + pC0 += ldc; + ATL_vbeta(pC0, rC03); + ATL_vbeta(pC0+4, rC43); + pC0 += ldc; + ATL_vbeta(pC0, rC04); + ATL_vbeta(pC0+4, rC44); + pC0 += ldc; + ATL_vbeta(pC0, rC05); + ATL_vbeta(pC0+4, rC45); + #endif + #else + #error "VLEN NOT SUPPORTED!" + #endif + pC0 -= ldc2*5 - (8 SHIFT); + pB0 = B; + pB2 = B + (ldb<<1); + pB4 = pB2 + (ldb<<1); + A += incAm; + pA0 = A; + pA2 = pA0 + (lda<<1); + pA4 = pA2 + (lda<<1); + pA6 = pA4 + (lda<<1); + } /* end of loop over M */ + A = aa; + pA0 = A; + pA2 = A + (lda<<1); + pA4 = pA2 + (lda<<1); + pA6 = pA4 + (lda<<1); + C += incC; + pC0 = C; + B += incBn; + pB0 = B; + pB2 = B + (ldb<<1); + pB4 = pB2 + (ldb<<1); + } /* end of loop over N */ +} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2002 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC_K.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC_K.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC_K.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC_K.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2002 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC_MN.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC_MN.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC_MN.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm14x1x56_sse2pABC_MN.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2002 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm1x14x56_sse2pABC.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm1x14x56_sse2pABC.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm1x14x56_sse2pABC.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm1x14x56_sse2pABC.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm1x6x72_sse2.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm1x6x72_sse2.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm1x6x72_sse2.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm1x6x72_sse2.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm2x1x24_5pABC.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm2x1x24_5pABC.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm2x1x24_5pABC.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm2x1x24_5pABC.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm2x1x40_5pABC.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm2x1x40_5pABC.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm2x1x40_5pABC.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm2x1x40_5pABC.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm2x2x128_sse2.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm2x2x128_sse2.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm2x2x128_sse2.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm2x2x128_sse2.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm2x2x2_sse2.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm2x2x2_sse2.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm2x2x2_sse2.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm2x2x2_sse2.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x1x44_4_sse2.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x1x44_4_sse2.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x1x44_4_sse2.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x1x44_4_sse2.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2004 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x1x90_x87.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x1x90_x87.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x1x90_x87.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x1x90_x87.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2006 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x2x128_sse2.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x2x128_sse2.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x2x128_sse2.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x2x128_sse2.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x2x256_avx.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x2x256_avx.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x2x256_avx.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x2x256_avx.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x2x4_avx.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x2x4_avx.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x2x4_avx.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x2x4_avx.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x16r8_US.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x16r8_US.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x16r8_US.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x16r8_US.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -! Automatically Tuned Linear Algebra Software v3.10.2 +! Automatically Tuned Linear Algebra Software v3.10.3 ! (C) Copyright 2002 R. Clint Whaley ! ! Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x2_mips.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x2_mips.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x2_mips.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x2_mips.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_arm.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_arm.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_arm.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_arm.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2011 R. Clint Whaley * * Code contributers : R. Clint Whaley, Tom Wallace @@ -113,7 +113,7 @@ #include "atlas_asm.h" .code 32 .fpu vfpv3 -#ifdef ATL_ARM_HARDFP +#ifndef ATL_ARM_SOFTFP .eabi_attribute 28, 1 #endif .text @@ -126,7 +126,7 @@ /* * Load needed vals to registers */ -#ifdef ATL_ARM_HARDFP +#ifndef ATL_ARM_SOFTFP add M0, SP, #FSIZE ldmIA M0, {lda,pB0,pA00,pC0,ldc} /* lda,B,ldb,C,ldc */ vmov M0, zero, d1 /* put beta in M0, zero */ @@ -177,7 +177,7 @@ #else add PTR, pC0, ldc #ifdef BETAX - #ifdef ATL_ARM_HARDFP + #ifndef ATL_ARM_SOFTFP fldd rb0, [SP, #0] #else fldd rb0, [SP, #(FSIZE+24)] @@ -367,7 +367,7 @@ mov M, M0 bne NLOOP - #ifdef ATL_ARM_HARDFP + #ifndef ATL_ARM_SOFTFP pop {r0,r1} /* clear beta off stack */ #endif ldmIA SP!, {r4-r11,r14} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x2_US.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x2_US.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x2_US.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x2_US.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -! Automatically Tuned Linear Algebra Software v3.10.2 +! Automatically Tuned Linear Algebra Software v3.10.3 ! (C) Copyright 2002 R. Clint Whaley ! ! Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2005 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x8_US.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x8_US.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4x8_US.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4x8_US.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -! Automatically Tuned Linear Algebra Software v3.10.2 +! Automatically Tuned Linear Algebra Software v3.10.3 ! (C) Copyright 2002 R. Clint Whaley ! ! Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4xUR2_mips.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4xUR2_mips.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4xUR2_mips.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4xUR2_mips.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4xUR3_mips.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4xUR3_mips.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4xUR3_mips.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4xUR3_mips.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4xURx_mips.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4xURx_mips.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm4x4xURx_mips.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm4x4xURx_mips.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x30_x87.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x30_x87.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x30_x87.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x30_x87.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x60pABC.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x60pABC.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x60pABC.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x60pABC.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2002 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x60_sse2_32.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x60_sse2_32.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x60_sse2_32.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x60_sse2_32.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2004 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x60_sse2.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x60_sse2.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x60_sse2.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x60_sse2.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2002 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x72_sse2.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x72_sse2.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x72_sse2.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x72_sse2.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2002 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x72_sse2_K.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x72_sse2_K.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm6x1x72_sse2_K.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm6x1x72_sse2_K.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm8x1x120_L1pf.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm8x1x120_L1pf.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm8x1x120_L1pf.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm8x1x120_L1pf.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm8x1x120_sse2.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm8x1x120_sse2.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm8x1x120_sse2.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm8x1x120_sse2.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2006 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm8x4x2_vsx.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm8x4x2_vsx.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm8x4x2_vsx.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm8x4x2_vsx.c 2016-07-28 19:43:21.000000000 +0000 @@ -30,6 +30,14 @@ #include #include "atlas_misc.h" #include "atlas_prefetch.h" /* ATL_pfl1R, ATL_pfl1W */ +/* + * RCW: This test works with gcc at least; need to adapt permute code to le + * if we want to remove + */ +#if _LITTLE_ENDIAN + #error "This kernel works for big endian only!\n" +#endif + #define VEC_SIZE 2 diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm_julian_gas_30.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm_julian_gas_30.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm_julian_gas_30.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm_julian_gas_30.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 Julian Ruhe * * Code contributers : Julian Ruhe, Peter Soendergaard diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm_sse2_80.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm_sse2_80.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm_sse2_80.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm_sse2_80.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm_sse2_80M.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm_sse2_80M.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm_sse2_80M.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm_sse2_80M.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm_sse2_80N.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm_sse2_80N.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm_sse2_80N.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm_sse2_80N.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm_sse2_K.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm_sse2_K.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_dmm_sse2_K.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_dmm_sse2_K.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x3x2p.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x3x2p.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x3x2p.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x3x2p.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x3x8p.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x3x8p.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x3x8p.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x3x8p.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2_1_pref.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2_1_pref.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2_1_pref.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2_1_pref.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2_1_prefCU.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2_1_prefCU.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2_1_prefCU.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2_1_prefCU.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2rp.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2rp.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2rp.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2rp.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2US.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2US.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2US.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2US.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1999 The Australian National University * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2US_MN.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2US_MN.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2US_MN.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2US_MN.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Code contributers : R. Clint Whaley, Viet Nguyen and Peter Strazdins diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2US_NB.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2US_NB.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x2US_NB.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x2US_NB.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Code contributers : R. Clint Whaley, Viet Nguyen and Peter Strazdins diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x4_av.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x4_av.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x4_av.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x4_av.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x56_av.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x56_av.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x56_av.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x56_av.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x8_av.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x8_av.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x8_av.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x8_av.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x8_bpfab.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x8_bpfab.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x8_bpfab.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x8_bpfab.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x8_bpfabc.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x8_bpfabc.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x8_bpfabc.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x8_bpfabc.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x8p.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x8p.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm4x4x8p.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm4x4x8p.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm6x8x8_1p.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm6x8x8_1p.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm6x8x8_1p.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm6x8x8_1p.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm8x8x2.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm8x8x2.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_mm8x8x2.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_mm8x8x2.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_objdummy.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_objdummy.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_objdummy.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_objdummy.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm10x1x120_sse.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm10x1x120_sse.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm10x1x120_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm10x1x120_sse.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2006 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm14x1x84_sse.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm14x1x84_sse.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm14x1x84_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm14x1x84_sse.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm14x1x84_sseCU.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm14x1x84_sseCU.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm14x1x84_sseCU.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm14x1x84_sseCU.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm2x2x256_sse.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm2x2x256_sse.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm2x2x256_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm2x2x256_sse.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_3dnow_100.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_3dnow_100.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_3dnow_100.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_3dnow_100.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_3dnow_100M.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_3dnow_100M.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_3dnow_100M.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_3dnow_100M.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_3dnow_100N.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_3dnow_100N.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_3dnow_100N.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_3dnow_100N.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_3dnow_K.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_3dnow_K.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_3dnow_K.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_3dnow_K.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x1x256_sse.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x1x256_sse.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x1x256_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x1x256_sse.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2008 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x1x60_4_sse2.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x1x60_4_sse2.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x1x60_4_sse2.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x1x60_4_sse2.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2004 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x2x256_avx.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x2x256_avx.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x2x256_avx.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x2x256_avx.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x2x8_avx.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x2x8_avx.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x2x8_avx.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x2x8_avx.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x16_av.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x16_av.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x16_av.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x16_av.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x16_US.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x16_US.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x16_US.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x16_US.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -! Automatically Tuned Linear Algebra Software v3.10.2 +! Automatically Tuned Linear Algebra Software v3.10.3 ! (C) Copyright 2002 R. Clint Whaley ! ! Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x2dld_arm.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x2dld_arm.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x2dld_arm.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x2dld_arm.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -137,7 +137,7 @@ #include "atlas_asm.h" .code 32 .fpu vfpv3 -#ifdef ATL_ARM_HARDFP +#ifndef ATL_ARM_SOFTFP .eabi_attribute 28, 1 #endif .text @@ -154,7 +154,7 @@ /* * Load needed vals to registers */ -#ifdef ATL_ARM_HARDFP +#ifndef ATL_ARM_SOFTFP add M0, SP, #FSIZE ldmIA M0, {lda,pB0,pA1,pC0,ldc} /* lda,B,ldb,C,ldc */ vmov.32 M0, d0[1] /* put beta in M0 */ @@ -214,7 +214,7 @@ #else add PTR, pC0, ldc #ifdef BETAX - #ifdef ATL_ARM_HARDFP + #ifndef ATL_ARM_SOFTFP flds rb0, [SP, #0] #else flds rb0, [SP, #(FSIZE+16)] @@ -387,7 +387,7 @@ mov M, M0 bne NLOOP - #ifdef ATL_ARM_HARDFP + #ifndef ATL_ARM_SOFTFP pop {r0} /* clear beta off stack */ #endif add SP, SP, #VSIZE diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x2pf_arm.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x2pf_arm.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x2pf_arm.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x2pf_arm.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2011 R. Clint Whaley * * Code contributers : R. Clint Whaley, Tom Wallace @@ -115,7 +115,7 @@ #include "atlas_asm.h" .code 32 .fpu vfpv3 -#ifdef ATL_ARM_HARDFP +#ifndef ATL_ARM_SOFTFP .eabi_attribute 28, 1 #endif .text @@ -128,7 +128,7 @@ /* * Load needed vals to registers */ -#ifdef ATL_ARM_HARDFP +#ifndef ATL_ARM_SOFTFP add M0, SP, #FSIZE ldmIA M0, {lda,pB0,pA00,pC0,ldc} /* lda,B,ldb,C,ldc */ vmov.32 M0, d0[1] /* put beta in M0 */ @@ -178,7 +178,7 @@ #else add PTR, pC0, ldc #ifdef BETAX - #ifdef ATL_ARM_HARDFP + #ifndef ATL_ARM_SOFTFP flds rb0, [SP, #0] #else flds rb0, [SP, #(FSIZE+16)] @@ -368,7 +368,7 @@ mov M, M0 bne NLOOP - #ifdef ATL_ARM_HARDFP + #ifndef ATL_ARM_SOFTFP pop {r0} /* clear beta off stack */ #endif ldmIA SP!, {r4-r11,r14} diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x2_US.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x2_US.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x2_US.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x2_US.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -! Automatically Tuned Linear Algebra Software v3.10.2 +! Automatically Tuned Linear Algebra Software v3.10.3 ! (C) Copyright 2002 R. Clint Whaley ! ! Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x4_av.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x4_av.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x4_av.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x4_av.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x4_neon.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x4_neon.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x4_neon.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x4_neon.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011, 2010 Vesperix Corporation * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x72_US.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x72_US.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4x72_US.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4x72_US.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -! Automatically Tuned Linear Algebra Software v3.10.2 +! Automatically Tuned Linear Algebra Software v3.10.3 ! (C) Copyright 2002 R. Clint Whaley ! ! Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4xURx_mips.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4xURx_mips.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm4x4xURx_mips.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm4x4xURx_mips.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm6x1x120_sse.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm6x1x120_sse.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm6x1x120_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm6x1x120_sse.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2004 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm6x1x60_sse.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm6x1x60_sse.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm6x1x60_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm6x1x60_sse.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2002 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm6x1x60_x87.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm6x1x60_x87.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm6x1x60_x87.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm6x1x60_x87.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm6x1x80_sse.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm6x1x80_sse.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm6x1x80_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm6x1x80_sse.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2004 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smmMNCU_av.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smmMNCU_av.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smmMNCU_av.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smmMNCU_av.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_56.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_56.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_56.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_56.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_56M.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_56M.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_56M.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_56M.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_56N.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_56N.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_56N.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_56N.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_60.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_60.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_60.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_60.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_60M.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_60M.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_60M.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_60M.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_60N.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_60N.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_60N.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_60N.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_64.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_64.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_64.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_64.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_64M.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_64M.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_64M.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_64M.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_64N.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_64N.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_64N.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_64N.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_K.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_K.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse1_K.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse1_K.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse2_112.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse2_112.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse2_112.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse2_112.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse2_112M.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse2_112M.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse2_112M.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse2_112M.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse2_112N.c atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse2_112N.c --- atlas-3.10.2/tune/blas/gemm/CASES/ATL_smm_sse2_112N.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ATL_smm_sse2_112N.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 Peter Soendergaard * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/ccases.0 atlas-3.10.3/tune/blas/gemm/CASES/ccases.0 --- atlas-3.10.2/tune/blas/gemm/CASES/ccases.0 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/ccases.0 2016-07-28 19:43:21.000000000 +0000 @@ -1,8 +1,17 @@ "" -6 +15 1 480 4 4 1 1 1 4 4 2 ATL_mm4x4x2US.c "V. Nguyen & P. Strazdins" 2 8 4 4 2 1 1 4 4 2 ATL_mm4x4x2_1_pref.c "R. Clint Whaley" 3 208 4 4 1 1 1 4 4 2 ATL_mm4x4x2_1_prefCU.c "R. Clint Whaley" 5 192 4 4 8 0 4 4 4 8 ATL_mm4x4x8_bpfabc.c "R. Clint Whaley" 6 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" 7 8 4 3 2 0 4 4 3 2 ATL_mm4x3x2p.c "R. Clint Whaley" + 8 448 4 2 16 1 8 4 2 8 ATL_amm4x2xVL_simd.c "R. Clint Whaley" + 9 448 8 2 16 1 8 8 2 8 ATL_amm8x2xVL_simd.c "R. Clint Whaley" + 10 448 8 4 16 1 8 8 4 8 ATL_amm8x4xVL_simd.c "R. Clint Whaley" + 11 448 8 5 16 1 8 8 5 8 ATL_amm8x5xVL_simd.c "R. Clint Whaley" + 12 448 8 6 16 1 8 8 6 8 ATL_amm8x6xVL_simd.c "R. Clint Whaley" + 13 448 4 3 16 1 8 4 3 8 ATL_amm4x3xVL_simd.c "R. Clint Whaley" + 14 448 4 4 16 1 8 4 4 8 ATL_amm4x4xVL_simd.c "R. Clint Whaley" + 15 448 4 5 16 1 8 4 5 8 ATL_amm4x5xVL_simd.c "R. Clint Whaley" + 16 448 4 6 16 1 8 4 6 8 ATL_amm4x6xVL_simd.c "R. Clint Whaley" diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/dcases.0 atlas-3.10.3/tune/blas/gemm/CASES/dcases.0 --- atlas-3.10.2/tune/blas/gemm/CASES/dcases.0 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/dcases.0 2016-07-28 19:43:21.000000000 +0000 @@ -1,8 +1,17 @@ "" -6 +15 1 480 4 4 1 1 1 4 4 2 ATL_mm4x4x2US.c "V. Nguyen & P. Strazdins" 2 8 4 4 2 1 1 4 4 2 ATL_mm4x4x2_1_pref.c "R. Clint Whaley" 3 208 4 4 1 1 1 4 4 2 ATL_mm4x4x2_1_prefCU.c "R. Clint Whaley" 5 192 4 4 8 0 4 4 4 8 ATL_mm4x4x8_bpfabc.c "R. Clint Whaley" 6 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" 7 8 4 3 2 0 4 4 3 2 ATL_mm4x3x2p.c "R. Clint Whaley" + 8 448 4 2 8 1 8 4 2 4 ATL_amm4x2xVL_simd.c "R. Clint Whaley" + 9 448 8 2 8 1 8 8 2 4 ATL_amm8x2xVL_simd.c "R. Clint Whaley" + 10 448 8 4 8 1 8 8 4 4 ATL_amm8x4xVL_simd.c "R. Clint Whaley" + 11 448 8 5 8 1 8 8 5 4 ATL_amm8x5xVL_simd.c "R. Clint Whaley" + 12 448 8 6 8 1 8 8 6 4 ATL_amm8x6xVL_simd.c "R. Clint Whaley" + 13 448 4 3 8 1 8 4 3 4 ATL_amm4x3xVL_simd.c "R. Clint Whaley" + 14 448 4 4 8 1 8 4 4 4 ATL_amm4x4xVL_simd.c "R. Clint Whaley" + 15 448 4 5 8 1 8 4 5 4 ATL_amm4x5xVL_simd.c "R. Clint Whaley" + 16 448 4 6 8 1 8 4 6 4 ATL_amm4x6xVL_simd.c "R. Clint Whaley" diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_b0.asm atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_b0.asm --- atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_b0.asm 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_b0.asm 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -; Automatically Tuned Linear Algebra Software v3.10.2 +; Automatically Tuned Linear Algebra Software v3.10.3 ; (C) Copyright 2001 Julian Ruhe ; ; Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_b1.asm atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_b1.asm --- atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_b1.asm 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_b1.asm 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -; Automatically Tuned Linear Algebra Software v3.10.2 +; Automatically Tuned Linear Algebra Software v3.10.3 ; (C) Copyright 2001 Julian Ruhe ; ; Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_bX.asm atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_bX.asm --- atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_bX.asm 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1_bX.asm 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -; Automatically Tuned Linear Algebra Software v3.10.2 +; Automatically Tuned Linear Algebra Software v3.10.3 ; (C) Copyright 2001 Julian Ruhe ; ; Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1.cfg atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1.cfg --- atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1.cfg 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1.cfg 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -; Automatically Tuned Linear Algebra Software v3.10.2 +; Automatically Tuned Linear Algebra Software v3.10.3 ; (C) Copyright 2001 Julian Ruhe ; ; Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1.mcr atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1.mcr --- atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1.mcr 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_dJIK30x30x30TN30x30x0_a1.mcr 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -; Automatically Tuned Linear Algebra Software v3.10.2 +; Automatically Tuned Linear Algebra Software v3.10.3 ; (C) Copyright 2001 Julian Ruhe ; ; Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_b0.asm atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_b0.asm --- atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_b0.asm 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_b0.asm 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -; Automatically Tuned Linear Algebra Software v3.10.2 +; Automatically Tuned Linear Algebra Software v3.10.3 ; (C) Copyright 2001 Julian Ruhe ; ; Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_b1.asm atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_b1.asm --- atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_b1.asm 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_b1.asm 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -; Automatically Tuned Linear Algebra Software v3.10.2 +; Automatically Tuned Linear Algebra Software v3.10.3 ; (C) Copyright 2001 Julian Ruhe ; ; Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_bX.asm atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_bX.asm --- atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_bX.asm 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1_bX.asm 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -; Automatically Tuned Linear Algebra Software v3.10.2 +; Automatically Tuned Linear Algebra Software v3.10.3 ; (C) Copyright 2001 Julian Ruhe ; ; Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1.cfg atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1.cfg --- atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1.cfg 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1.cfg 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -; Automatically Tuned Linear Algebra Software v3.10.2 +; Automatically Tuned Linear Algebra Software v3.10.3 ; (C) Copyright 2001 Julian Ruhe ; ; Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1.mcr atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1.mcr --- atlas-3.10.2/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1.mcr 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/objs/ATL_sJIK48x48x48TN48x48x0_a1.mcr 2016-07-28 19:43:21.000000000 +0000 @@ -1,4 +1,4 @@ -; Automatically Tuned Linear Algebra Software v3.10.2 +; Automatically Tuned Linear Algebra Software v3.10.3 ; (C) Copyright 2001 Julian Ruhe ; ; Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/scases.0 atlas-3.10.3/tune/blas/gemm/CASES/scases.0 --- atlas-3.10.2/tune/blas/gemm/CASES/scases.0 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/scases.0 2016-07-28 19:43:21.000000000 +0000 @@ -1,8 +1,17 @@ "" -6 +15 1 480 4 4 1 1 1 4 4 2 ATL_mm4x4x2US.c "V. Nguyen & P. Strazdins" 2 8 4 4 2 1 1 4 4 2 ATL_mm4x4x2_1_pref.c "R. Clint Whaley" 3 208 4 4 1 1 1 4 4 2 ATL_mm4x4x2_1_prefCU.c "R. Clint Whaley" 5 192 4 4 8 0 4 4 4 8 ATL_mm4x4x8_bpfabc.c "R. Clint Whaley" 6 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" 7 8 4 3 2 0 4 4 3 2 ATL_mm4x3x2p.c "R. Clint Whaley" + 8 448 4 2 16 1 8 4 2 8 ATL_amm4x2xVL_simd.c "R. Clint Whaley" + 9 448 8 2 16 1 8 8 2 8 ATL_amm8x2xVL_simd.c "R. Clint Whaley" + 10 448 8 4 16 1 8 8 4 8 ATL_amm8x4xVL_simd.c "R. Clint Whaley" + 11 448 8 5 16 1 8 8 5 8 ATL_amm8x5xVL_simd.c "R. Clint Whaley" + 12 448 8 6 16 1 8 8 6 8 ATL_amm8x6xVL_simd.c "R. Clint Whaley" + 13 448 4 3 16 1 8 4 3 8 ATL_amm4x3xVL_simd.c "R. Clint Whaley" + 14 448 4 4 16 1 8 4 4 8 ATL_amm4x4xVL_simd.c "R. Clint Whaley" + 15 448 4 5 16 1 8 4 5 8 ATL_amm4x5xVL_simd.c "R. Clint Whaley" + 16 448 4 6 16 1 8 4 6 8 ATL_amm4x6xVL_simd.c "R. Clint Whaley" diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/zcases.0 atlas-3.10.3/tune/blas/gemm/CASES/zcases.0 --- atlas-3.10.2/tune/blas/gemm/CASES/zcases.0 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/zcases.0 2016-07-28 19:43:21.000000000 +0000 @@ -1,8 +1,17 @@ "" -6 +15 1 480 4 4 1 1 1 4 4 2 ATL_mm4x4x2US.c "V. Nguyen & P. Strazdins" 2 8 4 4 2 1 1 4 4 2 ATL_mm4x4x2_1_pref.c "R. Clint Whaley" 3 208 4 4 1 1 1 4 4 2 ATL_mm4x4x2_1_prefCU.c "R. Clint Whaley" 5 192 4 4 8 0 4 4 4 8 ATL_mm4x4x8_bpfabc.c "R. Clint Whaley" 6 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" 7 8 4 3 2 0 4 4 3 2 ATL_mm4x3x2p.c "R. Clint Whaley" + 8 448 4 2 8 1 8 4 2 4 ATL_amm4x2xVL_simd.c "R. Clint Whaley" + 9 448 8 2 8 1 8 8 2 4 ATL_amm8x2xVL_simd.c "R. Clint Whaley" + 10 448 8 4 8 1 8 8 4 4 ATL_amm8x4xVL_simd.c "R. Clint Whaley" + 11 448 8 5 8 1 8 8 5 4 ATL_amm8x5xVL_simd.c "R. Clint Whaley" + 12 448 8 6 8 1 8 8 6 4 ATL_amm8x6xVL_simd.c "R. Clint Whaley" + 13 448 4 3 8 1 8 4 3 4 ATL_amm4x3xVL_simd.c "R. Clint Whaley" + 14 448 4 4 8 1 8 4 4 4 ATL_amm4x4xVL_simd.c "R. Clint Whaley" + 15 448 4 5 8 1 8 4 5 4 ATL_amm4x5xVL_simd.c "R. Clint Whaley" + 16 448 4 6 8 1 8 4 6 4 ATL_amm4x6xVL_simd.c "R. Clint Whaley" diff -Nru atlas-3.10.2/tune/blas/gemm/CASES/zcases.SSE atlas-3.10.3/tune/blas/gemm/CASES/zcases.SSE --- atlas-3.10.2/tune/blas/gemm/CASES/zcases.SSE 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/CASES/zcases.SSE 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ "" -44 +42 200 8 4 1 4 1 1 4 1 4 ATL_gemm_SSE.c "Camm Maguire" \ gcc -fomit-frame-pointer -O @@ -90,9 +90,6 @@ 231 8 -56 -56 -56 1 1 14 1 56 ATL_dmm14x1x56_sse2pABC.c "R. Clint Whaley" \ gcc -m64 -x assembler-with-cpp -232 128 6 1 -60 1 1 6 1 60 ATL_dmm6x1x60_sse2_32.c "R. Clint Whaley" \ -gcc --m32 -x assembler-with-cpp 233 80 1 6 -72 0 1 1 6 72 ATL_dmm1x6x72_sse2.c "R. Clint Whaley" \ gcc -x assembler-with-cpp @@ -105,9 +102,6 @@ 236 192 6 1 -72 0 1 6 1 72 ATL_dmm6x1x72_sse2.c "R. Clint Whaley" \ gcc -x assembler-with-cpp -237 128 6 1 -60 1 1 6 1 60 ATL_dmm6x1x60_sse2_32.c "R. Clint Whaley" \ -gcc --x assembler-with-cpp 238 192 4 1 -44 0 4 4 1 44 ATL_dmm4x1x44_4_sse2.c "R. Clint Whaley" \ gcc -x assembler-with-cpp diff -Nru atlas-3.10.2/tune/blas/gemm/emit_mm.c atlas-3.10.3/tune/blas/gemm/emit_mm.c --- atlas-3.10.2/tune/blas/gemm/emit_mm.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/emit_mm.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -1650,7 +1650,7 @@ cTA, cTB, M, N, K); fprintf(fpout, " * lda=%d, ldb=%d, ldc=%d, mu=%d, nu=%d, ku=%d, pf=%d\n", lda, ldb, ldc, mu, nu, ku, pfA); - fprintf(fpout, " * Generated by ATLAS/tune/blas/gemm/emit_mm.c (3.10.2)\n"); + fprintf(fpout, " * Generated by ATLAS/tune/blas/gemm/emit_mm.c (3.10.3)\n"); fprintf(fpout, " */\n"); } diff -Nru atlas-3.10.2/tune/blas/gemm/fc.c atlas-3.10.3/tune/blas/gemm/fc.c --- atlas-3.10.2/tune/blas/gemm/fc.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/fc.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/findCE.c atlas-3.10.3/tune/blas/gemm/findCE.c --- atlas-3.10.2/tune/blas/gemm/findCE.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/findCE.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/gmmsearch.c atlas-3.10.3/tune/blas/gemm/gmmsearch.c --- atlas-3.10.2/tune/blas/gemm/gmmsearch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/gmmsearch.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/hcsearch.c atlas-3.10.3/tune/blas/gemm/hcsearch.c --- atlas-3.10.2/tune/blas/gemm/hcsearch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/hcsearch.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/mmcuncpsearch.c atlas-3.10.3/tune/blas/gemm/mmcuncpsearch.c --- atlas-3.10.2/tune/blas/gemm/mmcuncpsearch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/mmcuncpsearch.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, * 2007, 2008, 2009, 2010 R. Clint Whaley * @@ -2986,7 +2986,8 @@ "FOUND # OF REGISTERS TO BE %d; TRYING 8 FOR SAFETY.\n", nreg); nreg = 8; } -#if !defined (ATL_GAS_x8632) && !defined(ATL_GAS_x8664) +#if !defined (ATL_GAS_x8632) && !defined(ATL_GAS_x8664) && \ + !defined(ATL_GAS_WOW64) else if (nreg < 16) { fprintf(stderr, diff -Nru atlas-3.10.2/tune/blas/gemm/mmflagsearch.c atlas-3.10.3/tune/blas/gemm/mmflagsearch.c --- atlas-3.10.2/tune/blas/gemm/mmflagsearch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/mmflagsearch.c 2016-07-28 19:43:20.000000000 +0000 @@ -56,6 +56,42 @@ return(i); } +ATL_flagnode_t *GetClangOptFlags(void) +{ + char *gccflags[] = +{"REPLACE THIS LINE WT ARCH-DEP FLAGS ALWAYS USED (eg, -mfpmath=sse -msse3)", + "4", "-O2", "-O1", "-O3", "-Os", + "0", + "# Flags to probe once optimization level is selected", + "-freroll-loops", + "-fno-reroll-loops", + "-funroll-loops", + "-fno-unroll-loops", + "-adce", "-dce", + "-die", + "-dse", + "-indvars", + "-ldcssa", + "-licm", + "-loop-reduce", + "-loop-rotate", + "-loop-simplify", + "-loop-unroll", + "-loop-unswitch", + NULL +}; + int i; + ATL_flagnode_t *bp, *pf; + + pf = bp = NewFlagNode(gccflags[0]); + for (i=1; gccflags[i]; i++) + { + pf->next = NewFlagNode(gccflags[i]); + pf = pf->next; + } + return(bp); +} + ATL_flagnode_t *GetGccOptFlags(void) { char *gccflags[] = @@ -226,7 +262,7 @@ ln = ReadFlagLine(fpin); n = strtol(ln, NULL, 0); if (!n || !lvls) - optflags = NULL; + oflags = NULL; else { ln = ReadFlagLine(fpin); @@ -362,13 +398,21 @@ case 'f': /* flag file */ if (++i >= nargs) PrintUsage(args[0], i, NULL); - if (!strcmp(args[i], "gcc")) + if (!strcmp(args[i], "gcc") || !strcmp(args[i], "clang")) { FILE *fpout; ATL_flagnode_t *bp, *fp; - fpout = fopen("gccflags.txt", "w"); + if (!strcmp(args[i], "clang")) + { + fpout = fopen("clangflags.txt", "w"); + bp = GetClangOptFlags(); + } + else + { + fpout = fopen("gccflags.txt", "w"); + bp = GetGccOptFlags(); + } assert(fpout); - bp = GetGccOptFlags(); for (fp=bp; fp; fp = fp->next) fprintf(fpout, "%s\n", fp->flags); fclose(fpout); @@ -693,8 +737,8 @@ PrintMMLine(stdout, mmp); } baseflags = ReadFlags(fpflag, &optlvls, &lvlflags, &flags); - GreedyLinearFlagSearch(pre, verb, NULL, 1.01, mmp, baseflags, optlvls, - lvlflags, flags); + baseflags = GreedyLinearFlagSearch(pre, verb, NULL, 1.01, mmp, baseflags, + optlvls, lvlflags, flags); KillAllFlagNodes(lvlflags); KillAllFlagNodes(optlvls); KillAllFlagNodes(flags); diff -Nru atlas-3.10.2/tune/blas/gemm/mmgen_sse.c atlas-3.10.3/tune/blas/gemm/mmgen_sse.c --- atlas-3.10.2/tune/blas/gemm/mmgen_sse.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/mmgen_sse.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 Chad Zalkin * * Code contributers : Chad Zalkin, R. Clint Whaley diff -Nru atlas-3.10.2/tune/blas/gemm/mmksearch_sse.c atlas-3.10.3/tune/blas/gemm/mmksearch_sse.c --- atlas-3.10.2/tune/blas/gemm/mmksearch_sse.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/mmksearch_sse.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009 R. Clint Whaley * * Code contributers : R. Clint Whaley, Chad Zalkin diff -Nru atlas-3.10.2/tune/blas/gemm/mmsearch.c atlas-3.10.3/tune/blas/gemm/mmsearch.c --- atlas-3.10.2/tune/blas/gemm/mmsearch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/mmsearch.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/mmtst.c atlas-3.10.3/tune/blas/gemm/mmtst.c --- atlas-3.10.2/tune/blas/gemm/mmtst.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/mmtst.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/tfc.c atlas-3.10.3/tune/blas/gemm/tfc.c --- atlas-3.10.2/tune/blas/gemm/tfc.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/tfc.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -56,6 +56,12 @@ #define DENMAT 175 #define MAXALLOC (8*1024*1024*8) +#if MAXALLOC < 3*L2SIZE + #undef MAXALLOC + #define MAXALLOC (3*L2SIZE) +#endif + + #ifdef ATL_DeclareSlens F77_INTEGER ATL_Slen1, ATL_Slen2; diff -Nru atlas-3.10.2/tune/blas/gemm/ummsearch.c atlas-3.10.3/tune/blas/gemm/ummsearch.c --- atlas-3.10.2/tune/blas/gemm/ummsearch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/ummsearch.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 * R. Clint Whaley * diff -Nru atlas-3.10.2/tune/blas/gemm/usercomb.c atlas-3.10.3/tune/blas/gemm/usercomb.c --- atlas-3.10.2/tune/blas/gemm/usercomb.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/usercomb.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/userflag.c atlas-3.10.3/tune/blas/gemm/userflag.c --- atlas-3.10.2/tune/blas/gemm/userflag.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/userflag.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemm/userindex.c atlas-3.10.3/tune/blas/gemm/userindex.c --- atlas-3.10.2/tune/blas/gemm/userindex.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemm/userindex.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2000 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/mvktime.c atlas-3.10.3/tune/blas/gemv/mvktime.c --- atlas-3.10.2/tune/blas/gemv/mvktime.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/mvktime.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/MVNCASES/ATL_cgemvN_8x4_sse3.c atlas-3.10.3/tune/blas/gemv/MVNCASES/ATL_cgemvN_8x4_sse3.c --- atlas-3.10.2/tune/blas/gemv/MVNCASES/ATL_cgemvN_8x4_sse3.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/MVNCASES/ATL_cgemvN_8x4_sse3.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/MVNCASES/ATL_cgemvN_axpy.c atlas-3.10.3/tune/blas/gemv/MVNCASES/ATL_cgemvN_axpy.c --- atlas-3.10.2/tune/blas/gemv/MVNCASES/ATL_cgemvN_axpy.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/MVNCASES/ATL_cgemvN_axpy.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/MVNCASES/ATL_gemvN_axpy.c atlas-3.10.3/tune/blas/gemv/MVNCASES/ATL_gemvN_axpy.c --- atlas-3.10.2/tune/blas/gemv/MVNCASES/ATL_gemvN_axpy.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/MVNCASES/ATL_gemvN_axpy.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/MVNCASES/ATL_sgemvN_8x4_sse.c atlas-3.10.3/tune/blas/gemv/MVNCASES/ATL_sgemvN_8x4_sse.c --- atlas-3.10.2/tune/blas/gemv/MVNCASES/ATL_sgemvN_8x4_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/MVNCASES/ATL_sgemvN_8x4_sse.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/mvnhgen.c atlas-3.10.3/tune/blas/gemv/mvnhgen.c --- atlas-3.10.2/tune/blas/gemv/mvnhgen.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/mvnhgen.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/mvnktest.c atlas-3.10.3/tune/blas/gemv/mvnktest.c --- atlas-3.10.2/tune/blas/gemv/mvnktest.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/mvnktest.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/mvntest.c atlas-3.10.3/tune/blas/gemv/mvntest.c --- atlas-3.10.2/tune/blas/gemv/mvntest.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/mvntest.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_cgemvT_8x4_avx.c atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_cgemvT_8x4_avx.c --- atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_cgemvT_8x4_avx.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_cgemvT_8x4_avx.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_cgemvT_8x4_sse3.c atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_cgemvT_8x4_sse3.c --- atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_cgemvT_8x4_sse3.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_cgemvT_8x4_sse3.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_cgemvT_dot.c atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_cgemvT_dot.c --- atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_cgemvT_dot.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_cgemvT_dot.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_gemvT_dot.c atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_gemvT_dot.c --- atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_gemvT_dot.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_gemvT_dot.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_sgemvT_8x4_neon.S atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_sgemvT_8x4_neon.S --- atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_sgemvT_8x4_neon.S 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_sgemvT_8x4_neon.S 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 Md. Majedul Haque Sujon * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_sgemvT_8x4_sse.c atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_sgemvT_8x4_sse.c --- atlas-3.10.2/tune/blas/gemv/MVTCASES/ATL_sgemvT_8x4_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/MVTCASES/ATL_sgemvT_8x4_sse.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/mvthgen.c atlas-3.10.3/tune/blas/gemv/mvthgen.c --- atlas-3.10.2/tune/blas/gemv/mvthgen.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/mvthgen.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/mvtktest.c atlas-3.10.3/tune/blas/gemv/mvtktest.c --- atlas-3.10.2/tune/blas/gemv/mvtktest.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/mvtktest.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/gemv/mvttest.c atlas-3.10.3/tune/blas/gemv/mvttest.c --- atlas-3.10.2/tune/blas/gemv/mvttest.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/gemv/mvttest.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R1CASES/ATL_cgerk_2x1p.c atlas-3.10.3/tune/blas/ger/R1CASES/ATL_cgerk_2x1p.c --- atlas-3.10.2/tune/blas/ger/R1CASES/ATL_cgerk_2x1p.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R1CASES/ATL_cgerk_2x1p.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R1CASES/ATL_cgerk_8x4_sse3.c atlas-3.10.3/tune/blas/ger/R1CASES/ATL_cgerk_8x4_sse3.c --- atlas-3.10.2/tune/blas/ger/R1CASES/ATL_cgerk_8x4_sse3.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R1CASES/ATL_cgerk_8x4_sse3.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R1CASES/ATL_cgerk_axpy.c atlas-3.10.3/tune/blas/ger/R1CASES/ATL_cgerk_axpy.c --- atlas-3.10.2/tune/blas/ger/R1CASES/ATL_cgerk_axpy.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R1CASES/ATL_cgerk_axpy.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R1CASES/ATL_dgerk_4x8_sse.c atlas-3.10.3/tune/blas/ger/R1CASES/ATL_dgerk_4x8_sse.c --- atlas-3.10.2/tune/blas/ger/R1CASES/ATL_dgerk_4x8_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R1CASES/ATL_dgerk_4x8_sse.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R1CASES/ATL_gerk_1x4_0.c atlas-3.10.3/tune/blas/ger/R1CASES/ATL_gerk_1x4_0.c --- atlas-3.10.2/tune/blas/ger/R1CASES/ATL_gerk_1x4_0.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R1CASES/ATL_gerk_1x4_0.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R1CASES/ATL_gerk_4x4_1.c atlas-3.10.3/tune/blas/ger/R1CASES/ATL_gerk_4x4_1.c --- atlas-3.10.2/tune/blas/ger/R1CASES/ATL_gerk_4x4_1.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R1CASES/ATL_gerk_4x4_1.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R1CASES/ATL_gerk_8x4_0.c atlas-3.10.3/tune/blas/ger/R1CASES/ATL_gerk_8x4_0.c --- atlas-3.10.2/tune/blas/ger/R1CASES/ATL_gerk_8x4_0.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R1CASES/ATL_gerk_8x4_0.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R1CASES/ATL_gerk_axpy.c atlas-3.10.3/tune/blas/ger/R1CASES/ATL_gerk_axpy.c --- atlas-3.10.2/tune/blas/ger/R1CASES/ATL_gerk_axpy.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R1CASES/ATL_gerk_axpy.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2009, 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R1CASES/ATL_sgerk_8x4_sse.c atlas-3.10.3/tune/blas/ger/R1CASES/ATL_sgerk_8x4_sse.c --- atlas-3.10.2/tune/blas/ger/R1CASES/ATL_sgerk_8x4_sse.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R1CASES/ATL_sgerk_8x4_sse.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R1CASES/ATL_zgerk_1x4_sse3.c atlas-3.10.3/tune/blas/ger/R1CASES/ATL_zgerk_1x4_sse3.c --- atlas-3.10.2/tune/blas/ger/R1CASES/ATL_zgerk_1x4_sse3.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R1CASES/ATL_zgerk_1x4_sse3.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/r1hgen.c atlas-3.10.3/tune/blas/ger/r1hgen.c --- atlas-3.10.2/tune/blas/ger/r1hgen.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/r1hgen.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/r1ktest.c atlas-3.10.3/tune/blas/ger/r1ktest.c --- atlas-3.10.2/tune/blas/ger/r1ktest.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/r1ktest.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/r1ktime.c atlas-3.10.3/tune/blas/ger/r1ktime.c --- atlas-3.10.2/tune/blas/ger/r1ktime.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/r1ktime.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R2CASES/ATL_dger2k_2x2_sse3.c atlas-3.10.3/tune/blas/ger/R2CASES/ATL_dger2k_2x2_sse3.c --- atlas-3.10.2/tune/blas/ger/R2CASES/ATL_dger2k_2x2_sse3.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R2CASES/ATL_dger2k_2x2_sse3.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R2CASES/ATL_sger2K_NEON_lda4.S atlas-3.10.3/tune/blas/ger/R2CASES/ATL_sger2K_NEON_lda4.S --- atlas-3.10.2/tune/blas/ger/R2CASES/ATL_sger2K_NEON_lda4.S 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R2CASES/ATL_sger2K_NEON_lda4.S 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 Md. Rakib Hasan * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R2CASES/ATL_sger2K_NEON.S atlas-3.10.3/tune/blas/ger/R2CASES/ATL_sger2K_NEON.S --- atlas-3.10.2/tune/blas/ger/R2CASES/ATL_sger2K_NEON.S 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R2CASES/ATL_sger2K_NEON.S 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 Md. Rakib Hasan * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/R2CASES/ATL_zger2k_2x1_sse3.c atlas-3.10.3/tune/blas/ger/R2CASES/ATL_zger2k_2x1_sse3.c --- atlas-3.10.2/tune/blas/ger/R2CASES/ATL_zger2k_2x1_sse3.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R2CASES/ATL_zger2k_2x1_sse3.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff -Nru atlas-3.10.2/tune/blas/ger/R2CASES/ATL_zger2k_rk2_sse3.c atlas-3.10.3/tune/blas/ger/R2CASES/ATL_zger2k_rk2_sse3.c --- atlas-3.10.2/tune/blas/ger/R2CASES/ATL_zger2k_rk2_sse3.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/R2CASES/ATL_zger2k_rk2_sse3.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2012, 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/r2hgen.c atlas-3.10.3/tune/blas/ger/r2hgen.c --- atlas-3.10.2/tune/blas/ger/r2hgen.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/r2hgen.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/r2ktime.c atlas-3.10.3/tune/blas/ger/r2ktime.c --- atlas-3.10.2/tune/blas/ger/r2ktime.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/r2ktime.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010, 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/ger/s1nxtune.c atlas-3.10.3/tune/blas/ger/s1nxtune.c --- atlas-3.10.2/tune/blas/ger/s1nxtune.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/s1nxtune.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -206,7 +206,7 @@ double t0, tL, tN; /* 0, Last, Next */ double tB, tE, tM; /* beginning, end, middle timings */ ATL_INT n0, nL, nN; - ATL_INT nB, nE, nM; + ATL_INT nB, nE, nM=0; t0 = GetTimes(Uplo, verb, nsample, nreps, flushelts, N, N); printf("\n Time for N=NX=%d : %e\n", N, t0); diff -Nru atlas-3.10.2/tune/blas/ger/s2nxtune.c atlas-3.10.3/tune/blas/ger/s2nxtune.c --- atlas-3.10.2/tune/blas/ger/s2nxtune.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/ger/s2nxtune.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2010 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -207,7 +207,7 @@ double t0, tL, tN; /* 0, Last, Next */ double tB, tE, tM; /* beginning, end, middle timings */ ATL_INT n0, nL, nN; - ATL_INT nB, nE, nM; + ATL_INT nB, nE, nM=0; t0 = GetTimes(Uplo, verb, nsample, nreps, flushelts, N, N); printf("\n Time for N=NX=%d : %e\n", N, t0); diff -Nru atlas-3.10.2/tune/blas/level1/axpbysrch.c atlas-3.10.3/tune/blas/level1/axpbysrch.c --- atlas-3.10.2/tune/blas/level1/axpbysrch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/level1/axpbysrch.c 2016-07-28 19:43:20.000000000 +0000 @@ -588,7 +588,7 @@ i = sprintf(ln, "make %caxpbytest urout=%s opt=\"", pre, fp->rout); if (fp->incX == 0) i += sprintf(ln+i, " -X 4 1 -1 2 -3"); if (fp->incY == 0) i += sprintf(ln+i, " -Y 4 1 -1 3 -2"); - if (pre == 'c' && pre == 'z') /* complex alphas */ + if (pre == 'c' || pre == 'z') /* complex alphas */ { if (fp->alpha == 0) /* imag = 0 */ i += sprintf(ln+i, " -a 3 1.0 0.0 -1.0 0.0 0.9 0.0"); diff -Nru atlas-3.10.2/tune/blas/level1/AXPY/zaxpy_avx.c atlas-3.10.3/tune/blas/level1/AXPY/zaxpy_avx.c --- atlas-3.10.2/tune/blas/level1/AXPY/zaxpy_avx.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/level1/AXPY/zaxpy_avx.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/level1/axpysrch.c atlas-3.10.3/tune/blas/level1/axpysrch.c --- atlas-3.10.2/tune/blas/level1/axpysrch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/level1/axpysrch.c 2016-07-28 19:43:20.000000000 +0000 @@ -574,7 +574,7 @@ i = sprintf(ln, "make %caxpytest urout=%s opt=\"", pre, fp->rout); if (fp->incX == 0) i += sprintf(ln+i, " -X 4 1 -1 2 -3"); if (fp->incY == 0) i += sprintf(ln+i, " -Y 4 1 -1 3 -2"); - if (pre == 'c' && pre == 'z') /* complex alphas */ + if (pre == 'c' || pre == 'z') /* complex alphas */ { if (fp->alpha == 0) /* imag = 0 */ i += sprintf(ln+i, " -a 3 1.0 0.0 -1.0 0.0 0.9 0.0"); diff -Nru atlas-3.10.2/tune/blas/level1/cpscsrch.c atlas-3.10.3/tune/blas/level1/cpscsrch.c --- atlas-3.10.2/tune/blas/level1/cpscsrch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/level1/cpscsrch.c 2016-07-28 19:43:20.000000000 +0000 @@ -583,7 +583,7 @@ i = sprintf(ln, "make %ccpsctest urout=%s opt=\"", pre, fp->rout); if (fp->incX == 0) i += sprintf(ln+i, " -X 4 1 -1 2 -3"); if (fp->incY == 0) i += sprintf(ln+i, " -Y 4 1 -1 3 -2"); - if (pre == 'c' && pre == 'z') /* complex alphas */ + if (pre == 'c' || pre == 'z') /* complex alphas */ { if (fp->alpha == 0) /* imag = 0 */ i += sprintf(ln+i, " -a 3 1.0 0.0 -1.0 0.0 0.9 0.0"); diff -Nru atlas-3.10.2/tune/blas/level1/IAMAX/ciamax_avx.c atlas-3.10.3/tune/blas/level1/IAMAX/ciamax_avx.c --- atlas-3.10.2/tune/blas/level1/IAMAX/ciamax_avx.c 2014-07-10 16:22:26.000000000 +0000 +++ atlas-3.10.3/tune/blas/level1/IAMAX/ciamax_avx.c 2016-07-28 19:43:21.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * Copyright (C) 2011 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/level1/rotsrch.c atlas-3.10.3/tune/blas/level1/rotsrch.c --- atlas-3.10.2/tune/blas/level1/rotsrch.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/level1/rotsrch.c 2016-07-28 19:43:20.000000000 +0000 @@ -590,7 +590,7 @@ i = sprintf(ln, "make %crottest urout=%s opt=\"", pre, fp->rout); if (fp->incX == 0) i += sprintf(ln+i, " -X 4 1 -1 2 -3"); if (fp->incY == 0) i += sprintf(ln+i, " -Y 4 1 -1 3 -2"); - if (pre == 'c' && pre == 'z') /* complex alphas */ + if (pre == 'c' || pre == 'z') /* complex alphas */ { if (fp->alpha == 0) /* imag = 0 */ i += sprintf(ln+i, " -a 3 1.0 0.0 -1.0 0.0 0.9 0.0"); diff -Nru atlas-3.10.2/tune/blas/level1/scalsrch.c atlas-3.10.3/tune/blas/level1/scalsrch.c --- atlas-3.10.2/tune/blas/level1/scalsrch.c 2014-07-10 16:22:25.000000000 +0000 +++ atlas-3.10.3/tune/blas/level1/scalsrch.c 2016-07-28 19:43:20.000000000 +0000 @@ -538,7 +538,7 @@ fn = fp->next; i = sprintf(ln, "make %cscaltest urout=%s opt=\"", pre, fp->rout); if (fp->incX == 0) i += sprintf(ln+i, " -X 4 1 -1 2 -3"); - if (pre == 'c' && pre == 'z') /* complex alphas */ + if (pre == 'c' || pre == 'z') /* complex alphas */ { if (fp->alpha == 0) /* imag = 0 */ i += sprintf(ln+i, " -a 3 1.0 0.0 -1.0 0.0 0.9 0.0"); diff -Nru atlas-3.10.2/tune/blas/level3/ATL_trsm.c atlas-3.10.3/tune/blas/level3/ATL_trsm.c --- atlas-3.10.2/tune/blas/level3/ATL_trsm.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/level3/ATL_trsm.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2004 Antoine P. Petitet * * Code contributers : Antoine P. Petitet, R. Clint Whaley diff -Nru atlas-3.10.2/tune/blas/level3/invtrsm.c atlas-3.10.3/tune/blas/level3/invtrsm.c --- atlas-3.10.2/tune/blas/level3/invtrsm.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/level3/invtrsm.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/blas/level3/tsmfc.c atlas-3.10.3/tune/blas/level3/tsmfc.c --- atlas-3.10.2/tune/blas/level3/tsmfc.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/blas/level3/tsmfc.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/lapack/lanbsrch.c atlas-3.10.3/tune/lapack/lanbsrch.c --- atlas-3.10.2/tune/lapack/lanbsrch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/lapack/lanbsrch.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2009 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/sysinfo/ATL_cputime.c atlas-3.10.3/tune/sysinfo/ATL_cputime.c --- atlas-3.10.2/tune/sysinfo/ATL_cputime.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/sysinfo/ATL_cputime.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/sysinfo/ATL_walltime.c atlas-3.10.3/tune/sysinfo/ATL_walltime.c --- atlas-3.10.2/tune/sysinfo/ATL_walltime.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/sysinfo/ATL_walltime.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/sysinfo/emit_buildinfo.c atlas-3.10.3/tune/sysinfo/emit_buildinfo.c --- atlas-3.10.2/tune/sysinfo/emit_buildinfo.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/sysinfo/emit_buildinfo.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without @@ -206,7 +206,7 @@ fprintf(fpout, "#define ATL_SYSINFO \"%s\"\n", SYS); fprintf(fpout, "#define ATL_DATE \"%s\"\n", DATE); fprintf(fpout, "#define ATL_UNAM \"%s\"\n", UNAM); - fprintf(fpout, "#define ATL_VERS \"3.10.2\"\n"); + fprintf(fpout, "#define ATL_VERS \"3.10.3\"\n"); } void CreateFile(char *file) diff -Nru atlas-3.10.2/tune/sysinfo/emit_lamch.c atlas-3.10.3/tune/sysinfo/emit_lamch.c --- atlas-3.10.2/tune/sysinfo/emit_lamch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/sysinfo/emit_lamch.c 2016-07-28 19:43:20.000000000 +0000 @@ -5,75 +5,7 @@ #include #include -#if defined(__MINGW32__) || defined(__MINGW64__) -int slashdrivesub(char *ln) -/* - * replaces \\c\ with c:\, returns change in string length - * this version required for older cygwins - */ -{ - char *sp, *lp=ln, ctmp; - int nrep=0; - do - { - sp = strstr(lp, "\\\\"); - if (sp && strlen(sp) > 3) - { - if (sp[2] == 'a' || sp[2] == 'b' || sp[2] == 'c' || sp[2] == 'd' || - sp[2] == 'e' || sp[2] == 'f' || sp[2] == 'g' || sp[2] == 'h') - { - if (sp[3] == '\\') - { - ctmp = sp[2]; - sp[0] = sp[2]; - sp[1] = ':'; - sp[2] = '\\'; - for (lp=sp+3; *lp = lp[1]; lp++); - lp = sp + 3; - nrep++; - } - else lp = sp + 2; - } - else lp = sp + 2; - } - else lp = sp + 2; - } - while (sp); - return(-nrep); -} - -int cygdrivesub(char *ln) -/* - * replaces \cygdrive\c\ with c:\, returns change in string length - * this version works cygnus version 1.1.0 - */ -{ - char *sp; - int i=0; - - while(sp = strstr(ln, "\\cygdrive\\")) - { - i++; - sp[0] = sp[10]; - sp[1] = ':'; - sp[2] = '\\'; - sp += 3; - while (*sp = sp[9]) sp++; - } - return( slashdrivesub(ln) - (i*9) ); -} - -void slashsub(char *ln) -/* - * changes forward slash of unix to backslash of windoze - */ -{ - int i; - for (i=0; ln[i]; i++) if (ln[i] == '/') ln[i] = '\\'; -} - -#endif int sComputeRound(void) /* * Blind translation of netlib LAPACK LAMCH's rounding computation @@ -117,25 +49,19 @@ return(FLT_MIN); } -void emit_slamch(char *path) +void emit_slamch(void) { FILE *fpout; char *name; volatile float f, under, over; int len = 16, bad; - if (path) - { - len += strlen(path); - name = malloc(len); - assert(name); - sprintf(name, "%s/atlas_slamch.h", path); - fpout = fopen(name, "w"); - assert(fpout); - free(name); - } - else - fpout = stdout; + #if defined(__MINGW32__) || defined(__MINGW64__) + fpout = fopen("res\\atlas_slamch.h", "w"); + #else + fpout = fopen("res/atlas_slamch.h", "w"); + #endif + assert(fpout); fprintf(fpout, "/* generated by %s */\n\n", __FILE__); fprintf(fpout, "#ifndef ATLAS_SLAMCH_H\n"); fprintf(fpout, " #define ATLAS_SLAMCH_H\n\n"); @@ -226,25 +152,19 @@ return(DBL_MIN); } -void emit_dlamch(char *path) +void emit_dlamch(void) { FILE *fpout; char *name; volatile double f, under, over; int len = 16, bad; - if (path) - { - len += strlen(path); - name = malloc(len); - assert(name); - sprintf(name, "%s/atlas_dlamch.h", path); - fpout = fopen(name, "w"); - assert(fpout); - free(name); - } - else - fpout = stdout; + #if defined(__MINGW32__) || defined(__MINGW64__) + fpout = fopen("res\\atlas_dlamch.h", "w"); + #else + fpout = fopen("res/atlas_dlamch.h", "w"); + #endif + assert(fpout); fprintf(fpout, "/* generated by %s */\n\n", __FILE__); fprintf(fpout, "#ifndef ATLAS_DLAMCH_H\n"); fprintf(fpout, " #define ATLAS_DLAMCH_H\n\n"); @@ -294,24 +214,7 @@ int main (int nargs, char **args) { - char *path = "res/"; - if (nargs > 1) - path = args[1]; - #if defined(__MINGW32__) || defined(__MINGW64__) - { - char *winpath; - winpath = malloc(sizeof(char)*(strlen(path)+1)); - assert(winpath); - strcpy(winpath, path); - slashsub(winpath); - cygdrivesub(winpath); - emit_dlamch(winpath); - emit_slamch(winpath); - free(winpath); - } - #else - emit_dlamch(path); - emit_slamch(path); - #endif + emit_dlamch(); + emit_slamch(); return(0); } diff -Nru atlas-3.10.2/tune/sysinfo/emit_typ.c atlas-3.10.3/tune/sysinfo/emit_typ.c --- atlas-3.10.2/tune/sysinfo/emit_typ.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/sysinfo/emit_typ.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/sysinfo/GetSysSum.c atlas-3.10.3/tune/sysinfo/GetSysSum.c --- atlas-3.10.2/tune/sysinfo/GetSysSum.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/sysinfo/GetSysSum.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/sysinfo/L1CacheSize.c atlas-3.10.3/tune/sysinfo/L1CacheSize.c --- atlas-3.10.2/tune/sysinfo/L1CacheSize.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/sysinfo/L1CacheSize.c 2016-07-28 19:43:20.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Automatically Tuned Linear Algebra Software v3.10.2 + * Automatically Tuned Linear Algebra Software v3.10.3 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without diff -Nru atlas-3.10.2/tune/sysinfo/masrch.c atlas-3.10.3/tune/sysinfo/masrch.c --- atlas-3.10.2/tune/sysinfo/masrch.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/sysinfo/masrch.c 2016-07-28 19:43:20.000000000 +0000 @@ -34,7 +34,7 @@ fprintf(fpout, " register int i;\n"); for (i=0; i < lat; i++) - fprintf(fpout, " m%d=dum[%d];\n", i, i); + fprintf(fpout, " m%d=dum[%d];\n", i, i%32); fprintf(fpout, " for (i=0; i < nrep; i++)\n {\n"); for (j=0; j < ur; j++) @@ -51,7 +51,7 @@ } fprintf(fpout, " }\n\n"); for (i=0; i < lat; i++) - fprintf(fpout, " dum[%d] = a%d;\n", i, i); + fprintf(fpout, " dum[%d] = a%d;\n", i%32, i); fprintf(fpout, " return(nrep*%d.0);\n", lfl*ur); fprintf(fpout, "}\n"); } diff -Nru atlas-3.10.2/tune/threads/tune_aff.c atlas-3.10.3/tune/threads/tune_aff.c --- atlas-3.10.2/tune/threads/tune_aff.c 2014-07-10 16:22:24.000000000 +0000 +++ atlas-3.10.3/tune/threads/tune_aff.c 2016-07-28 19:43:20.000000000 +0000 @@ -162,7 +162,15 @@ if (outfile) /* if this is a real run where we want to change things */ { +/* + * SMT kills parallel perf on AIX, so force affinity even if this + * timing looks bad! + */ + #ifdef ATL_OS_AIX + if (0) + #else if (tnoa*1.04 < taff) + #endif { FILE *fpout; printf(