Commit 453060a9 by Ian Lance Taylor

runtime: copy memory hash code from Go 1.7

    
    Rewrite the AES hashing code from gc assembler to C code using
    intrinsics.  The resulting code generates the same hash code for the
    same input as the gc code--that doesn't matter as such, but testing it
    ensures that the C code does something useful.
    
    Also change mips64pe32le to mips64p32le in configure script--noticed
    during CL review.
    
    Reviewed-on: https://go-review.googlesource.com/34022

From-SVN: r243445
parent b2264b09
2442fca7be8a4f51ddc91070fa69ef66e24593ac 78e3527fcaf4ffd33b22e39a56e5d076844302be
The first line of this file holds the git revision number of the last The first line of this file holds the git revision number of the last
merge done from the gofrontend repository. merge done from the gofrontend repository.
...@@ -1648,7 +1648,7 @@ Type::type_functions(Gogo* gogo, Named_type* name, Function_type* hash_fntype, ...@@ -1648,7 +1648,7 @@ Type::type_functions(Gogo* gogo, Named_type* name, Function_type* hash_fntype,
const char* equal_fnname; const char* equal_fnname;
if (this->compare_is_identity(gogo)) if (this->compare_is_identity(gogo))
{ {
hash_fnname = "__go_type_hash_identity"; hash_fnname = "runtime.memhash";
equal_fnname = "__go_type_equal_identity"; equal_fnname = "__go_type_equal_identity";
} }
else else
......
...@@ -422,6 +422,7 @@ endif ...@@ -422,6 +422,7 @@ endif
endif endif
runtime_files = \ runtime_files = \
runtime/aeshash.c \
runtime/go-assert.c \ runtime/go-assert.c \
runtime/go-breakpoint.c \ runtime/go-breakpoint.c \
runtime/go-caller.c \ runtime/go-caller.c \
......
...@@ -189,7 +189,7 @@ libgo_llgo_la_DEPENDENCIES = $(am__DEPENDENCIES_4) ...@@ -189,7 +189,7 @@ libgo_llgo_la_DEPENDENCIES = $(am__DEPENDENCIES_4)
@LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@am__objects_4 = \ @LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@am__objects_4 = \
@LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@ getncpu-bsd.lo @LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@ getncpu-bsd.lo
@LIBGO_IS_LINUX_TRUE@am__objects_4 = getncpu-linux.lo @LIBGO_IS_LINUX_TRUE@am__objects_4 = getncpu-linux.lo
am__objects_5 = go-assert.lo go-breakpoint.lo go-caller.lo \ am__objects_5 = aeshash.lo go-assert.lo go-breakpoint.lo go-caller.lo \
go-callers.lo go-cdiv.lo go-cgo.lo go-construct-map.lo \ go-callers.lo go-cdiv.lo go-cgo.lo go-construct-map.lo \
go-ffi.lo go-fieldtrack.lo go-matherr.lo go-memclr.lo \ go-ffi.lo go-fieldtrack.lo go-matherr.lo go-memclr.lo \
go-memcmp.lo go-memequal.lo go-memmove.lo go-nanotime.lo \ go-memcmp.lo go-memequal.lo go-memmove.lo go-nanotime.lo \
...@@ -767,6 +767,7 @@ toolexeclibgounicode_DATA = \ ...@@ -767,6 +767,7 @@ toolexeclibgounicode_DATA = \
@LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@runtime_getncpu_file = runtime/getncpu-bsd.c @LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@runtime_getncpu_file = runtime/getncpu-bsd.c
@LIBGO_IS_LINUX_TRUE@runtime_getncpu_file = runtime/getncpu-linux.c @LIBGO_IS_LINUX_TRUE@runtime_getncpu_file = runtime/getncpu-linux.c
runtime_files = \ runtime_files = \
runtime/aeshash.c \
runtime/go-assert.c \ runtime/go-assert.c \
runtime/go-breakpoint.c \ runtime/go-breakpoint.c \
runtime/go-caller.c \ runtime/go-caller.c \
...@@ -1446,6 +1447,7 @@ mostlyclean-compile: ...@@ -1446,6 +1447,7 @@ mostlyclean-compile:
distclean-compile: distclean-compile:
-rm -f *.tab.c -rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aeshash.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env_posix.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env_posix.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getncpu-bsd.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getncpu-bsd.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getncpu-irix.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getncpu-irix.Plo@am__quote@
...@@ -1573,6 +1575,13 @@ libgolibbegin_a-go-libmain.obj: runtime/go-libmain.c ...@@ -1573,6 +1575,13 @@ libgolibbegin_a-go-libmain.obj: runtime/go-libmain.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgolibbegin_a_CFLAGS) $(CFLAGS) -c -o libgolibbegin_a-go-libmain.obj `if test -f 'runtime/go-libmain.c'; then $(CYGPATH_W) 'runtime/go-libmain.c'; else $(CYGPATH_W) '$(srcdir)/runtime/go-libmain.c'; fi` @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgolibbegin_a_CFLAGS) $(CFLAGS) -c -o libgolibbegin_a-go-libmain.obj `if test -f 'runtime/go-libmain.c'; then $(CYGPATH_W) 'runtime/go-libmain.c'; else $(CYGPATH_W) '$(srcdir)/runtime/go-libmain.c'; fi`
aeshash.lo: runtime/aeshash.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT aeshash.lo -MD -MP -MF $(DEPDIR)/aeshash.Tpo -c -o aeshash.lo `test -f 'runtime/aeshash.c' || echo '$(srcdir)/'`runtime/aeshash.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/aeshash.Tpo $(DEPDIR)/aeshash.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/aeshash.c' object='aeshash.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o aeshash.lo `test -f 'runtime/aeshash.c' || echo '$(srcdir)/'`runtime/aeshash.c
go-assert.lo: runtime/go-assert.c go-assert.lo: runtime/go-assert.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-assert.lo -MD -MP -MF $(DEPDIR)/go-assert.Tpo -c -o go-assert.lo `test -f 'runtime/go-assert.c' || echo '$(srcdir)/'`runtime/go-assert.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-assert.lo -MD -MP -MF $(DEPDIR)/go-assert.Tpo -c -o go-assert.lo `test -f 'runtime/go-assert.c' || echo '$(srcdir)/'`runtime/go-assert.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/go-assert.Tpo $(DEPDIR)/go-assert.Plo @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/go-assert.Tpo $(DEPDIR)/go-assert.Plo
......
...@@ -13624,7 +13624,7 @@ esac ...@@ -13624,7 +13624,7 @@ esac
# supported by the gofrontend and all architectures supported by the # supported by the gofrontend and all architectures supported by the
# gc toolchain. # gc toolchain.
# N.B. Keep in sync with gcc/testsuite/go.test/go-test.exp (go-set-goarch). # N.B. Keep in sync with gcc/testsuite/go.test/go-test.exp (go-set-goarch).
ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64pe32le ppc ppc64 ppc64le s390 s390x sparc sparc64" ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64p32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
# All known GOARCH_FAMILY values. # All known GOARCH_FAMILY values.
ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 PPC PPC64 S390 S390X SPARC SPARC64" ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 PPC PPC64 S390 S390X SPARC SPARC64"
......
...@@ -197,7 +197,7 @@ AC_SUBST(USE_DEJAGNU) ...@@ -197,7 +197,7 @@ AC_SUBST(USE_DEJAGNU)
# supported by the gofrontend and all architectures supported by the # supported by the gofrontend and all architectures supported by the
# gc toolchain. # gc toolchain.
# N.B. Keep in sync with gcc/testsuite/go.test/go-test.exp (go-set-goarch). # N.B. Keep in sync with gcc/testsuite/go.test/go-test.exp (go-set-goarch).
ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64pe32le ppc ppc64 ppc64le s390 s390x sparc sparc64" ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64p32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
# All known GOARCH_FAMILY values. # All known GOARCH_FAMILY values.
ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 PPC PPC64 S390 S390X SPARC SPARC64" ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 PPC PPC64 S390 S390X SPARC SPARC64"
......
...@@ -23,12 +23,29 @@ import ( ...@@ -23,12 +23,29 @@ import (
//go:linkname efacevaleq runtime.efacevaleq //go:linkname efacevaleq runtime.efacevaleq
//go:linkname eqstring runtime.eqstring //go:linkname eqstring runtime.eqstring
//go:linkname cmpstring runtime.cmpstring //go:linkname cmpstring runtime.cmpstring
//
// Temporary to be called from C code.
//go:linkname alginit runtime.alginit
const ( const (
c0 = uintptr((8-sys.PtrSize)/4*2860486313 + (sys.PtrSize-4)/4*33054211828000289) c0 = uintptr((8-sys.PtrSize)/4*2860486313 + (sys.PtrSize-4)/4*33054211828000289)
c1 = uintptr((8-sys.PtrSize)/4*3267000013 + (sys.PtrSize-4)/4*23344194077549503) c1 = uintptr((8-sys.PtrSize)/4*3267000013 + (sys.PtrSize-4)/4*23344194077549503)
) )
var useAeshash bool
// in C code
func aeshashbody(p unsafe.Pointer, h, s uintptr, sched []byte) uintptr
func aeshash(p unsafe.Pointer, h, s uintptr) uintptr {
return aeshashbody(p, h, s, aeskeysched[:])
}
func aeshashstr(p unsafe.Pointer, h uintptr) uintptr {
ps := (*stringStruct)(p)
return aeshashbody(unsafe.Pointer(ps.str), h, uintptr(ps.len), aeskeysched[:])
}
func interhash(p unsafe.Pointer, h uintptr, size uintptr) uintptr { func interhash(p unsafe.Pointer, h uintptr, size uintptr) uintptr {
a := (*iface)(p) a := (*iface)(p)
tab := a.tab tab := a.tab
...@@ -198,7 +215,35 @@ func cmpstring(x, y string) int { ...@@ -198,7 +215,35 @@ func cmpstring(x, y string) int {
// Force the creation of function descriptors for equality and hash // Force the creation of function descriptors for equality and hash
// functions. These will be referenced directly by the compiler. // functions. These will be referenced directly by the compiler.
var _ = memhash
var _ = interhash var _ = interhash
var _ = interequal var _ = interequal
var _ = nilinterhash var _ = nilinterhash
var _ = nilinterequal var _ = nilinterequal
const hashRandomBytes = sys.PtrSize / 4 * 64
// used in asm_{386,amd64}.s to seed the hash function
var aeskeysched [hashRandomBytes]byte
// used in hash{32,64}.go to seed the hash function
var hashkey [4]uintptr
func alginit() {
// Install aes hash algorithm if we have the instructions we need
if (GOARCH == "386" || GOARCH == "amd64") &&
GOOS != "nacl" &&
cpuid_ecx&(1<<25) != 0 && // aes (aesenc)
cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb)
cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q})
useAeshash = true
// Initialize with random data so hash collisions will be hard to engineer.
getRandomData(aeskeysched[:])
return
}
getRandomData((*[len(hashkey) * sys.PtrSize]byte)(unsafe.Pointer(&hashkey))[:])
hashkey[0] |= 1 // make sure these numbers are odd
hashkey[1] |= 1
hashkey[2] |= 1
hashkey[3] |= 1
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Hashing algorithm inspired by
// xxhash: https://code.google.com/p/xxhash/
// cityhash: https://code.google.com/p/cityhash/
// +build 386 arm armbe m68k mipso32 mipsn32 mips mipsle ppc s390 sparc
package runtime
import "unsafe"
// For gccgo, use go:linkname to rename compiler-called functions to
// themselves, so that the compiler will export them.
//
//go:linkname memhash runtime.memhash
const (
// Constants for multiplication: four random odd 32-bit numbers.
m1 = 3168982561
m2 = 3339683297
m3 = 832293441
m4 = 2336365089
)
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
if GOARCH == "386" && GOOS != "nacl" && useAeshash {
return aeshash(p, seed, s)
}
h := uint32(seed + s*hashkey[0])
tail:
switch {
case s == 0:
case s < 4:
h ^= uint32(*(*byte)(p))
h ^= uint32(*(*byte)(add(p, s>>1))) << 8
h ^= uint32(*(*byte)(add(p, s-1))) << 16
h = rotl_15(h*m1) * m2
case s == 4:
h ^= readUnaligned32(p)
h = rotl_15(h*m1) * m2
case s <= 8:
h ^= readUnaligned32(p)
h = rotl_15(h*m1) * m2
h ^= readUnaligned32(add(p, s-4))
h = rotl_15(h*m1) * m2
case s <= 16:
h ^= readUnaligned32(p)
h = rotl_15(h*m1) * m2
h ^= readUnaligned32(add(p, 4))
h = rotl_15(h*m1) * m2
h ^= readUnaligned32(add(p, s-8))
h = rotl_15(h*m1) * m2
h ^= readUnaligned32(add(p, s-4))
h = rotl_15(h*m1) * m2
default:
v1 := h
v2 := uint32(seed * hashkey[1])
v3 := uint32(seed * hashkey[2])
v4 := uint32(seed * hashkey[3])
for s >= 16 {
v1 ^= readUnaligned32(p)
v1 = rotl_15(v1*m1) * m2
p = add(p, 4)
v2 ^= readUnaligned32(p)
v2 = rotl_15(v2*m2) * m3
p = add(p, 4)
v3 ^= readUnaligned32(p)
v3 = rotl_15(v3*m3) * m4
p = add(p, 4)
v4 ^= readUnaligned32(p)
v4 = rotl_15(v4*m4) * m1
p = add(p, 4)
s -= 16
}
h = v1 ^ v2 ^ v3 ^ v4
goto tail
}
h ^= h >> 17
h *= m3
h ^= h >> 13
h *= m4
h ^= h >> 16
return uintptr(h)
}
// Note: in order to get the compiler to issue rotl instructions, we
// need to constant fold the shift amount by hand.
// TODO: convince the compiler to issue rotl instructions after inlining.
func rotl_15(x uint32) uint32 {
return (x << 15) | (x >> (32 - 15))
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Hashing algorithm inspired by
// xxhash: https://code.google.com/p/xxhash/
// cityhash: https://code.google.com/p/cityhash/
// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x alpha arm64be ia64 mipso64 mipsn64 mips64p32 mips64p32le sparc64
package runtime
import "unsafe"
// For gccgo, use go:linkname to rename compiler-called functions to
// themselves, so that the compiler will export them.
//
//go:linkname memhash runtime.memhash
const (
// Constants for multiplication: four random odd 64-bit numbers.
m1 = 16877499708836156737
m2 = 2820277070424839065
m3 = 9497967016996688599
m4 = 15839092249703872147
)
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
if GOARCH == "amd64" && GOOS != "nacl" && useAeshash {
return aeshash(p, seed, s)
}
h := uint64(seed + s*hashkey[0])
tail:
switch {
case s == 0:
case s < 4:
h ^= uint64(*(*byte)(p))
h ^= uint64(*(*byte)(add(p, s>>1))) << 8
h ^= uint64(*(*byte)(add(p, s-1))) << 16
h = rotl_31(h*m1) * m2
case s <= 8:
h ^= uint64(readUnaligned32(p))
h ^= uint64(readUnaligned32(add(p, s-4))) << 32
h = rotl_31(h*m1) * m2
case s <= 16:
h ^= readUnaligned64(p)
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, s-8))
h = rotl_31(h*m1) * m2
case s <= 32:
h ^= readUnaligned64(p)
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, 8))
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, s-16))
h = rotl_31(h*m1) * m2
h ^= readUnaligned64(add(p, s-8))
h = rotl_31(h*m1) * m2
default:
v1 := h
v2 := uint64(seed * hashkey[1])
v3 := uint64(seed * hashkey[2])
v4 := uint64(seed * hashkey[3])
for s >= 32 {
v1 ^= readUnaligned64(p)
v1 = rotl_31(v1*m1) * m2
p = add(p, 8)
v2 ^= readUnaligned64(p)
v2 = rotl_31(v2*m2) * m3
p = add(p, 8)
v3 ^= readUnaligned64(p)
v3 = rotl_31(v3*m3) * m4
p = add(p, 8)
v4 ^= readUnaligned64(p)
v4 = rotl_31(v4*m4) * m1
p = add(p, 8)
s -= 32
}
h = v1 ^ v2 ^ v3 ^ v4
goto tail
}
h ^= h >> 29
h *= m3
h ^= h >> 32
return uintptr(h)
}
// Note: in order to get the compiler to issue rotl instructions, we
// need to constant fold the shift amount by hand.
// TODO: convince the compiler to issue rotl instructions after inlining.
func rotl_31(x uint64) uint64 {
return (x << 31) | (x >> (64 - 31))
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import (
"unsafe"
)
var urandom_dev = []byte("/dev/urandom\x00")
func getRandomData(r []byte) {
if startupRandomData != nil {
n := copy(r, startupRandomData)
extendRandom(r, n)
return
}
fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
closefd(fd)
extendRandom(r, int(n))
}
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
package runtime package runtime
import ( import (
"runtime/internal/sys"
"unsafe" "unsafe"
) )
...@@ -668,7 +669,6 @@ type forcegcstate struct { ...@@ -668,7 +669,6 @@ type forcegcstate struct {
// the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.go or os_linux_386.go). // the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.go or os_linux_386.go).
var startupRandomData []byte var startupRandomData []byte
/*
// extendRandom extends the random numbers in r[:n] to the whole slice r. // extendRandom extends the random numbers in r[:n] to the whole slice r.
// Treats n<0 as n==0. // Treats n<0 as n==0.
func extendRandom(r []byte, n int) { func extendRandom(r []byte, n int) {
...@@ -689,7 +689,6 @@ func extendRandom(r []byte, n int) { ...@@ -689,7 +689,6 @@ func extendRandom(r []byte, n int) {
} }
} }
} }
*/
// deferred subroutine calls // deferred subroutine calls
// This is the gccgo version. // This is the gccgo version.
...@@ -770,11 +769,12 @@ var ( ...@@ -770,11 +769,12 @@ var (
sched schedt sched schedt
// newprocs int32 // newprocs int32
// Information about what cpu features are available.
// Set on startup.
cpuid_ecx uint32
// Information about what cpu features are available.
// Set on startup in asm_{x86,amd64}.s.
// cpuid_ecx uint32
// cpuid_edx uint32 // cpuid_edx uint32
// cpuid_ebx7 uint32 // cpuid_ebx7 uint32
// lfenceBeforeRdtsc bool // lfenceBeforeRdtsc bool
......
...@@ -248,6 +248,12 @@ func funcPC(f interface{}) uintptr { ...@@ -248,6 +248,12 @@ func funcPC(f interface{}) uintptr {
return **(**uintptr)(i.data) return **(**uintptr)(i.data)
} }
// For gccgo, to communicate from the C code to the Go code.
//go:linkname setCpuidECX runtime.setCpuidECX
func setCpuidECX(v uint32) {
cpuid_ecx = v
}
// typedmemmove copies a typed value. // typedmemmove copies a typed value.
// For gccgo for now. // For gccgo for now.
//go:nosplit //go:nosplit
......
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x ppc s390 arm64be
package runtime
import "unsafe"
func readUnaligned32(p unsafe.Pointer) uint32 {
return *(*uint32)(p)
}
func readUnaligned64(p unsafe.Pointer) uint64 {
return *(*uint64)(p)
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build arm mips64 mips64le armbe m68k mipso32 mipsn32 mips mipsle sparc alpha ia64 mipso64 mipsn64 mips64p32 mips64p32le sparc64
package runtime
import "unsafe"
// Note: These routines perform the read with an unspecified endianness.
func readUnaligned32(p unsafe.Pointer) uint32 {
q := (*[4]byte)(p)
return uint32(q[0]) + uint32(q[1])<<8 + uint32(q[2])<<16 + uint32(q[3])<<24
}
func readUnaligned64(p unsafe.Pointer) uint64 {
q := (*[8]byte)(p)
return uint64(q[0]) + uint64(q[1])<<8 + uint64(q[2])<<16 + uint64(q[3])<<24 + uint64(q[4])<<32 + uint64(q[5])<<40 + uint64(q[6])<<48 + uint64(q[7])<<56
}
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Hash code using AES intrinsics.
#include "runtime.h"
uintptr aeshashbody(void*, uintptr, uintptr, Slice)
__asm__(GOSYM_PREFIX "runtime.aeshashbody");
uintptr aeshashbody(void*, uintptr, uintptr, Slice)
__attribute__((no_split_stack));
#if defined(__i386__) || defined(__x86_64__)
#include <emmintrin.h>
#include <tmmintrin.h>
#include <wmmintrin.h>
// Force appropriate CPU level. We won't call here unless the CPU
// supports it.
#pragma GCC target("ssse3", "aes")
#ifdef __x86_64__
// aeshashbody implements a hash function using AES instructions
// available in recent x86 processors. Note this is not encryption,
// just hashing.
//
// This is written to produce exactly the same results as the gc
// implementation, not because that matters, but just to ensure that
// this does something reasonable.
uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
__m128i mseed, mseed2, mseed3, mseed4, mseed5, mseed6, mseed7, mseed8;
__m128i mval, mval2, mval3, mval4, mval5, mval6, mval7, mval8;
// Start with hash seed.
mseed = _mm_cvtsi64_si128(seed);
// Get 16 bits of length.
mseed = _mm_insert_epi16(mseed, size, 4);
// Repeat length 4 times total.
mseed = _mm_shufflehi_epi16(mseed, 0);
// Save unscrambled seed.
mseed2 = mseed;
// XOR in per-process seed.
mseed ^= _mm_loadu_si128(aeskeysched.__values);
// Scramble seed.
mseed = _mm_aesenc_si128(mseed, mseed);
if (size <= 16) {
if (size == 0) {
// Return scrambled input seed.
return _mm_cvtsi128_si64(_mm_aesenc_si128(mseed, mseed));
} else if (size < 16) {
if ((((uintptr)(p) + 16) & 0xff0) != 0) {
static const uint64 masks[32]
__attribute__ ((aligned(16))) =
{
0x0000000000000000, 0x0000000000000000,
0x00000000000000ff, 0x0000000000000000,
0x000000000000ffff, 0x0000000000000000,
0x0000000000ffffff, 0x0000000000000000,
0x00000000ffffffff, 0x0000000000000000,
0x000000ffffffffff, 0x0000000000000000,
0x0000ffffffffffff, 0x0000000000000000,
0x00ffffffffffffff, 0x0000000000000000,
0xffffffffffffffff, 0x0000000000000000,
0xffffffffffffffff, 0x00000000000000ff,
0xffffffffffffffff, 0x000000000000ffff,
0xffffffffffffffff, 0x0000000000ffffff,
0xffffffffffffffff, 0x00000000ffffffff,
0xffffffffffffffff, 0x000000ffffffffff,
0xffffffffffffffff, 0x0000ffffffffffff,
0xffffffffffffffff, 0x00ffffffffffffff
};
// 16 bytes loaded at p won't cross a page
// boundary, so we can load directly.
mval = _mm_loadu_si128(p);
mval &= *(const __m128i*)(&masks[size*2]);
} else {
static const uint64 shifts[32]
__attribute__ ((aligned(16))) =
{
0x0000000000000000, 0x0000000000000000,
0xffffffffffffff0f, 0xffffffffffffffff,
0xffffffffffff0f0e, 0xffffffffffffffff,
0xffffffffff0f0e0d, 0xffffffffffffffff,
0xffffffff0f0e0d0c, 0xffffffffffffffff,
0xffffff0f0e0d0c0b, 0xffffffffffffffff,
0xffff0f0e0d0c0b0a, 0xffffffffffffffff,
0xff0f0e0d0c0b0a09, 0xffffffffffffffff,
0x0f0e0d0c0b0a0908, 0xffffffffffffffff,
0x0e0d0c0b0a090807, 0xffffffffffffff0f,
0x0d0c0b0a09080706, 0xffffffffffff0f0e,
0x0c0b0a0908070605, 0xffffffffff0f0e0d,
0x0b0a090807060504, 0xffffffff0f0e0d0c,
0x0a09080706050403, 0xffffff0f0e0d0c0b,
0x0908070605040302, 0xffff0f0e0d0c0b0a,
0x0807060504030201, 0xff0f0e0d0c0b0a09,
};
// address ends in 1111xxxx. Might be
// up against a page boundary, so load
// ending at last byte. Then shift
// bytes down using pshufb.
mval = _mm_loadu_si128((void*)((char*)p - 16 + size));
mval = _mm_shuffle_epi8(mval, *(const __m128i*)(&shifts[size*2]));
}
} else {
mval = _mm_loadu_si128(p);
}
// XOR data with seed.
mval ^= mseed;
// Scramble combo 3 times.
mval = _mm_aesenc_si128(mval, mval);
mval = _mm_aesenc_si128(mval, mval);
mval = _mm_aesenc_si128(mval, mval);
return _mm_cvtsi128_si64(mval);
} else if (size <= 32) {
// Make second starting seed.
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
// Load data to be hashed.
mval = _mm_loadu_si128(p);
mval2 = _mm_loadu_si128((void*)((char*)p + size - 16));
// XOR with seed.
mval ^= mseed;
mval2 ^= mseed2;
// Scramble 3 times.
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
// Combine results.
mval ^= mval2;
return _mm_cvtsi128_si64(mval);
} else if (size <= 64) {
// Make 3 more starting seeds.
mseed3 = mseed2;
mseed4 = mseed2;
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
mseed3 = _mm_aesenc_si128(mseed3, mseed3);
mseed4 = _mm_aesenc_si128(mseed4, mseed4);
mval = _mm_loadu_si128(p);
mval2 = _mm_loadu_si128((void*)((char*)p + 16));
mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
mval ^= mseed;
mval2 ^= mseed2;
mval3 ^= mseed3;
mval4 ^= mseed4;
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval ^= mval3;
mval2 ^= mval4;
mval ^= mval2;
return _mm_cvtsi128_si64(mval);
} else if (size <= 128) {
// Make 7 more starting seeds.
mseed3 = mseed2;
mseed4 = mseed2;
mseed5 = mseed2;
mseed6 = mseed2;
mseed7 = mseed2;
mseed8 = mseed2;
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
mseed5 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 64));
mseed6 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 80));
mseed7 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 96));
mseed8 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 112));
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
mseed3 = _mm_aesenc_si128(mseed3, mseed3);
mseed4 = _mm_aesenc_si128(mseed4, mseed4);
mseed5 = _mm_aesenc_si128(mseed5, mseed5);
mseed6 = _mm_aesenc_si128(mseed6, mseed6);
mseed7 = _mm_aesenc_si128(mseed7, mseed7);
mseed8 = _mm_aesenc_si128(mseed8, mseed8);
// Load data.
mval = _mm_loadu_si128(p);
mval2 = _mm_loadu_si128((void*)((char*)p + 16));
mval3 = _mm_loadu_si128((void*)((char*)p + 32));
mval4 = _mm_loadu_si128((void*)((char*)p + 48));
mval5 = _mm_loadu_si128((void*)((char*)p + size - 64));
mval6 = _mm_loadu_si128((void*)((char*)p + size - 48));
mval7 = _mm_loadu_si128((void*)((char*)p + size - 32));
mval8 = _mm_loadu_si128((void*)((char*)p + size - 16));
// XOR with seed.
mval ^= mseed;
mval2 ^= mseed2;
mval3 ^= mseed3;
mval4 ^= mseed4;
mval5 ^= mseed5;
mval6 ^= mseed6;
mval7 ^= mseed7;
mval8 ^= mseed8;
// Scramble 3 times.
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval5 = _mm_aesenc_si128(mval5, mval5);
mval6 = _mm_aesenc_si128(mval6, mval6);
mval7 = _mm_aesenc_si128(mval7, mval7);
mval8 = _mm_aesenc_si128(mval8, mval8);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval5 = _mm_aesenc_si128(mval5, mval5);
mval6 = _mm_aesenc_si128(mval6, mval6);
mval7 = _mm_aesenc_si128(mval7, mval7);
mval8 = _mm_aesenc_si128(mval8, mval8);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval5 = _mm_aesenc_si128(mval5, mval5);
mval6 = _mm_aesenc_si128(mval6, mval6);
mval7 = _mm_aesenc_si128(mval7, mval7);
mval8 = _mm_aesenc_si128(mval8, mval8);
// Combine results.
mval ^= mval5;
mval2 ^= mval6;
mval3 ^= mval7;
mval4 ^= mval8;
mval ^= mval3;
mval2 ^= mval4;
mval ^= mval2;
return _mm_cvtsi128_si64(mval);
} else {
// Make 7 more starting seeds.
mseed3 = mseed2;
mseed4 = mseed2;
mseed5 = mseed2;
mseed6 = mseed2;
mseed7 = mseed2;
mseed8 = mseed2;
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
mseed5 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 64));
mseed6 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 80));
mseed7 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 96));
mseed8 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 112));
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
mseed3 = _mm_aesenc_si128(mseed3, mseed3);
mseed4 = _mm_aesenc_si128(mseed4, mseed4);
mseed5 = _mm_aesenc_si128(mseed5, mseed5);
mseed6 = _mm_aesenc_si128(mseed6, mseed6);
mseed7 = _mm_aesenc_si128(mseed7, mseed7);
mseed8 = _mm_aesenc_si128(mseed8, mseed8);
// Start with last (possibly overlapping) block.
mval = _mm_loadu_si128((void*)((char*)p + size - 128));
mval2 = _mm_loadu_si128((void*)((char*)p + size - 112));
mval3 = _mm_loadu_si128((void*)((char*)p + size - 96));
mval4 = _mm_loadu_si128((void*)((char*)p + size - 80));
mval5 = _mm_loadu_si128((void*)((char*)p + size - 64));
mval6 = _mm_loadu_si128((void*)((char*)p + size - 48));
mval7 = _mm_loadu_si128((void*)((char*)p + size - 32));
mval8 = _mm_loadu_si128((void*)((char*)p + size - 16));
// XOR in seed.
mval ^= mseed;
mval2 ^= mseed2;
mval3 ^= mseed3;
mval4 ^= mseed4;
mval5 ^= mseed5;
mval6 ^= mseed6;
mval7 ^= mseed7;
mval8 ^= mseed8;
// Compute number of remaining 128-byte blocks.
size--;
size >>= 7;
do {
// Scramble state.
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval5 = _mm_aesenc_si128(mval5, mval5);
mval6 = _mm_aesenc_si128(mval6, mval6);
mval7 = _mm_aesenc_si128(mval7, mval7);
mval8 = _mm_aesenc_si128(mval8, mval8);
// Scramble state, XOR in a block.
mval = _mm_aesenc_si128(mval, _mm_loadu_si128(p));
mval2 = _mm_aesenc_si128(mval2, _mm_loadu_si128((void*)((char*)p + 16)));
mval3 = _mm_aesenc_si128(mval3, _mm_loadu_si128((void*)((char*)p + 32)));
mval4 = _mm_aesenc_si128(mval4, _mm_loadu_si128((void*)((char*)p + 48)));
mval5 = _mm_aesenc_si128(mval5, _mm_loadu_si128((void*)((char*)p + 64)));
mval6 = _mm_aesenc_si128(mval6, _mm_loadu_si128((void*)((char*)p + 80)));
mval7 = _mm_aesenc_si128(mval7, _mm_loadu_si128((void*)((char*)p + 96)));
mval8 = _mm_aesenc_si128(mval8, _mm_loadu_si128((void*)((char*)p + 112)));
p = (void*)((char*)p + 128);
} while (--size > 0);
// 3 more scrambles to finish.
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval5 = _mm_aesenc_si128(mval5, mval5);
mval6 = _mm_aesenc_si128(mval6, mval6);
mval7 = _mm_aesenc_si128(mval7, mval7);
mval8 = _mm_aesenc_si128(mval8, mval8);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval5 = _mm_aesenc_si128(mval5, mval5);
mval6 = _mm_aesenc_si128(mval6, mval6);
mval7 = _mm_aesenc_si128(mval7, mval7);
mval8 = _mm_aesenc_si128(mval8, mval8);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval5 = _mm_aesenc_si128(mval5, mval5);
mval6 = _mm_aesenc_si128(mval6, mval6);
mval7 = _mm_aesenc_si128(mval7, mval7);
mval8 = _mm_aesenc_si128(mval8, mval8);
mval ^= mval5;
mval2 ^= mval6;
mval3 ^= mval7;
mval4 ^= mval8;
mval ^= mval3;
mval2 ^= mval4;
mval ^= mval2;
return _mm_cvtsi128_si64(mval);
}
}
#else // !defined(__x86_64__)
// The 32-bit version of aeshashbody.
uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
__m128i mseed, mseed2, mseed3, mseed4;
__m128i mval, mval2, mval3, mval4;
// Start with hash seed.
mseed = _mm_cvtsi32_si128(seed);
// Get 16 bits of length.
mseed = _mm_insert_epi16(mseed, size, 4);
// Replace size with its low 2 bytes repeated 4 times.
mseed = _mm_shufflehi_epi16(mseed, 0);
// Save unscrambled seed.
mseed2 = mseed;
// XOR in per-process seed.
mseed ^= _mm_loadu_si128(aeskeysched.__values);
// Scramble seed.
mseed = _mm_aesenc_si128(mseed, mseed);
if (size <= 16) {
if (size == 0) {
// Return scrambled input seed.
return _mm_cvtsi128_si32(_mm_aesenc_si128(mseed, mseed));
} else if (size < 16) {
if ((((uintptr)(p) + 16) & 0xff0) != 0) {
static const uint64 masks[32]
__attribute__ ((aligned(16))) =
{
0x0000000000000000, 0x0000000000000000,
0x00000000000000ff, 0x0000000000000000,
0x000000000000ffff, 0x0000000000000000,
0x0000000000ffffff, 0x0000000000000000,
0x00000000ffffffff, 0x0000000000000000,
0x000000ffffffffff, 0x0000000000000000,
0x0000ffffffffffff, 0x0000000000000000,
0x00ffffffffffffff, 0x0000000000000000,
0xffffffffffffffff, 0x0000000000000000,
0xffffffffffffffff, 0x00000000000000ff,
0xffffffffffffffff, 0x000000000000ffff,
0xffffffffffffffff, 0x0000000000ffffff,
0xffffffffffffffff, 0x00000000ffffffff,
0xffffffffffffffff, 0x000000ffffffffff,
0xffffffffffffffff, 0x0000ffffffffffff,
0xffffffffffffffff, 0x00ffffffffffffff
};
// 16 bytes loaded at p won't cross a page
// boundary, so we can load it directly.
mval = _mm_loadu_si128(p);
mval &= *(const __m128i*)(&masks[size*2]);
} else {
static const uint64 shifts[32]
__attribute__ ((aligned(16))) =
{
0x0000000000000000, 0x0000000000000000,
0xffffffffffffff0f, 0xffffffffffffffff,
0xffffffffffff0f0e, 0xffffffffffffffff,
0xffffffffff0f0e0d, 0xffffffffffffffff,
0xffffffff0f0e0d0c, 0xffffffffffffffff,
0xffffff0f0e0d0c0b, 0xffffffffffffffff,
0xffff0f0e0d0c0b0a, 0xffffffffffffffff,
0xff0f0e0d0c0b0a09, 0xffffffffffffffff,
0x0f0e0d0c0b0a0908, 0xffffffffffffffff,
0x0e0d0c0b0a090807, 0xffffffffffffff0f,
0x0d0c0b0a09080706, 0xffffffffffff0f0e,
0x0c0b0a0908070605, 0xffffffffff0f0e0d,
0x0b0a090807060504, 0xffffffff0f0e0d0c,
0x0a09080706050403, 0xffffff0f0e0d0c0b,
0x0908070605040302, 0xffff0f0e0d0c0b0a,
0x0807060504030201, 0xff0f0e0d0c0b0a09,
};
// address ends in 1111xxxx. Might be
// up against a page boundary, so load
// ending at last byte. Then shift
// bytes down using pshufb.
mval = _mm_loadu_si128((void*)((char*)p - 16 + size));
mval = _mm_shuffle_epi8(mval, *(const __m128i*)(&shifts[size*2]));
}
} else {
mval = _mm_loadu_si128(p);
}
// Scramble input, XOR in seed.
mval = _mm_aesenc_si128(mval, mseed);
mval = _mm_aesenc_si128(mval, mval);
mval = _mm_aesenc_si128(mval, mval);
return _mm_cvtsi128_si32(mval);
} else if (size <= 32) {
// Make second starting seed.
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
// Load data to be hashed.
mval = _mm_loadu_si128(p);
mval2 = _mm_loadu_si128((void*)((char*)p + size - 16));
// Scramble 3 times.
mval = _mm_aesenc_si128(mval, mseed);
mval2 = _mm_aesenc_si128(mval2, mseed2);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
// Combine results.
mval ^= mval2;
return _mm_cvtsi128_si32(mval);
} else if (size <= 64) {
// Make 3 more starting seeds.
mseed3 = mseed2;
mseed4 = mseed2;
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
mseed3 = _mm_aesenc_si128(mseed3, mseed3);
mseed4 = _mm_aesenc_si128(mseed4, mseed4);
mval = _mm_loadu_si128(p);
mval2 = _mm_loadu_si128((void*)((char*)p + 16));
mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
mval = _mm_aesenc_si128(mval, mseed);
mval2 = _mm_aesenc_si128(mval2, mseed2);
mval3 = _mm_aesenc_si128(mval3, mseed3);
mval4 = _mm_aesenc_si128(mval4, mseed4);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval ^= mval3;
mval2 ^= mval4;
mval ^= mval2;
return _mm_cvtsi128_si32(mval);
} else {
// Make 3 more starting seeds.
mseed3 = mseed2;
mseed4 = mseed2;
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
mseed3 = _mm_aesenc_si128(mseed3, mseed3);
mseed4 = _mm_aesenc_si128(mseed4, mseed4);
// Start with last (possibly overlapping) block.
mval = _mm_loadu_si128((void*)((char*)p + size - 64));
mval2 = _mm_loadu_si128((void*)((char*)p + size - 48));
mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
// Scramble state once.
mval = _mm_aesenc_si128(mval, mseed);
mval2 = _mm_aesenc_si128(mval2, mseed2);
mval3 = _mm_aesenc_si128(mval3, mseed3);
mval4 = _mm_aesenc_si128(mval4, mseed4);
// Compute number of remaining 64-byte blocks.
size--;
size >>= 6;
do {
// Scramble state, XOR in a block.
mval = _mm_aesenc_si128(mval, _mm_loadu_si128(p));
mval2 = _mm_aesenc_si128(mval2, _mm_loadu_si128((void*)((char*)p + 16)));
mval3 = _mm_aesenc_si128(mval3, _mm_loadu_si128((void*)((char*)p + 32)));
mval4 = _mm_aesenc_si128(mval4, _mm_loadu_si128((void*)((char*)p + 48)));
// Scramble state.
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
p = (void*)((char*)p + 64);
} while (--size > 0);
// 2 more scrambles to finish.
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval = _mm_aesenc_si128(mval, mval);
mval2 = _mm_aesenc_si128(mval2, mval2);
mval3 = _mm_aesenc_si128(mval3, mval3);
mval4 = _mm_aesenc_si128(mval4, mval4);
mval ^= mval3;
mval2 ^= mval4;
mval ^= mval2;
return _mm_cvtsi128_si32(mval);
}
}
#endif // !defined(__x86_64__)
#else // !defined(__i386__) && !defined(__x86_64__)
uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
// We should never get here on a non-x86 system.
runtime_throw("impossible call to aeshashbody");
}
#endif // !defined(__i386__) && !defined(__x86_64__)
...@@ -61,6 +61,7 @@ initfn (int argc, char **argv, char** env __attribute__ ((unused))) ...@@ -61,6 +61,7 @@ initfn (int argc, char **argv, char** env __attribute__ ((unused)))
runtime_isarchive = true; runtime_isarchive = true;
runtime_cpuinit ();
runtime_initsig(true); runtime_initsig(true);
a = (struct args *) malloc (sizeof *a); a = (struct args *) malloc (sizeof *a);
......
...@@ -47,6 +47,7 @@ main (int argc, char **argv) ...@@ -47,6 +47,7 @@ main (int argc, char **argv)
runtime_isstarted = true; runtime_isstarted = true;
__go_end = (uintptr)_end; __go_end = (uintptr)_end;
runtime_cpuinit ();
runtime_check (); runtime_check ();
runtime_args (argc, (byte **) argv); runtime_args (argc, (byte **) argv);
runtime_osinit (); runtime_osinit ();
......
...@@ -9,44 +9,14 @@ ...@@ -9,44 +9,14 @@
#include "runtime.h" #include "runtime.h"
#include "go-type.h" #include "go-type.h"
/* An identity hash function for a type. This is used for types where /* The hash functions for types that can compare as identity is
we can simply use the type value itself as a hash code. This is written in Go. */
true of, e.g., integers and pointers. */
uintptr_t extern uintptr runtime_memhash(void *, uintptr, uintptr)
__go_type_hash_identity (const void *key, uintptr_t seed, uintptr_t key_size) __asm__ (GOSYM_PREFIX "runtime.memhash");
{
uintptr_t ret;
uintptr_t i;
const unsigned char *p;
if (key_size <= 8)
{
union
{
uint64 v;
unsigned char a[8];
} u;
u.v = 0;
#ifdef WORDS_BIGENDIAN
__builtin_memcpy (&u.a[8 - key_size], key, key_size);
#else
__builtin_memcpy (&u.a[0], key, key_size);
#endif
if (sizeof (uintptr_t) >= 8)
return (uintptr_t) u.v ^ seed;
else
return (uintptr_t) ((u.v >> 32) ^ (u.v & 0xffffffff)) ^ seed;
}
ret = seed;
for (i = 0, p = (const unsigned char *) key; i < key_size; i++, p++)
ret = ret * 33 + *p;
return ret;
}
const FuncVal __go_type_hash_identity_descriptor = const FuncVal __go_type_hash_identity_descriptor =
{ (void *) __go_type_hash_identity }; { (void *) runtime_memhash };
/* An identity equality function for a type. This is used for types /* An identity equality function for a type. This is used for types
where we can check for equality by checking that the values have where we can check for equality by checking that the values have
......
...@@ -362,7 +362,6 @@ extern _Bool ...@@ -362,7 +362,6 @@ extern _Bool
__go_type_descriptors_equal(const struct __go_type_descriptor*, __go_type_descriptors_equal(const struct __go_type_descriptor*,
const struct __go_type_descriptor*); const struct __go_type_descriptor*);
extern uintptr_t __go_type_hash_identity (const void *, uintptr_t, uintptr_t);
extern const FuncVal __go_type_hash_identity_descriptor; extern const FuncVal __go_type_hash_identity_descriptor;
extern _Bool __go_type_equal_identity (const void *, const void *, uintptr_t); extern _Bool __go_type_equal_identity (const void *, const void *, uintptr_t);
extern const FuncVal __go_type_equal_identity_descriptor; extern const FuncVal __go_type_equal_identity_descriptor;
......
...@@ -455,6 +455,7 @@ runtime_schedinit(void) ...@@ -455,6 +455,7 @@ runtime_schedinit(void)
// runtime_symtabinit(); // runtime_symtabinit();
runtime_mallocinit(); runtime_mallocinit();
mcommoninit(m); mcommoninit(m);
runtime_alginit(); // maps must not be used before this call
// Initialize the itable value for newErrorCString, // Initialize the itable value for newErrorCString,
// so that the next time it gets called, possibly // so that the next time it gets called, possibly
......
...@@ -265,6 +265,8 @@ struct __go_func_type; ...@@ -265,6 +265,8 @@ struct __go_func_type;
void runtime_args(int32, byte**) void runtime_args(int32, byte**)
__asm__ (GOSYM_PREFIX "runtime.args"); __asm__ (GOSYM_PREFIX "runtime.args");
void runtime_osinit(); void runtime_osinit();
void runtime_alginit(void)
__asm__ (GOSYM_PREFIX "runtime.alginit");
void runtime_goargs(void) void runtime_goargs(void)
__asm__ (GOSYM_PREFIX "runtime.goargs"); __asm__ (GOSYM_PREFIX "runtime.goargs");
void runtime_goenvs(void); void runtime_goenvs(void);
...@@ -592,3 +594,7 @@ extern void *getitab(const struct __go_type_descriptor *, ...@@ -592,3 +594,7 @@ extern void *getitab(const struct __go_type_descriptor *,
const struct __go_type_descriptor *, const struct __go_type_descriptor *,
_Bool) _Bool)
__asm__ (GOSYM_PREFIX "runtime.getitab"); __asm__ (GOSYM_PREFIX "runtime.getitab");
extern void runtime_cpuinit(void);
extern void setCpuidECX(uint32)
__asm__ (GOSYM_PREFIX "runtime.setCpuidECX");
...@@ -6,6 +6,10 @@ ...@@ -6,6 +6,10 @@
#include <signal.h> #include <signal.h>
#include <unistd.h> #include <unistd.h>
#if defined(__i386__) || defined(__x86_64__)
#include <cpuid.h>
#endif
#include "config.h" #include "config.h"
#include "runtime.h" #include "runtime.h"
...@@ -204,3 +208,18 @@ go_errno() ...@@ -204,3 +208,18 @@ go_errno()
{ {
return (intgo)errno; return (intgo)errno;
} }
// CPU-specific initialization.
// Fetch CPUID info on x86.
void
runtime_cpuinit()
{
#if defined(__i386__) || defined(__x86_64__)
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
setCpuidECX(ecx);
}
#endif
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment