runtime: move schedt type and sched var from C to Go

This doesn't change any actual code, it just starts using the Go definition of the schedt type and the sched variable rather than the C definitions. The schedt type is tweaked slightly for gccgo. We aren't going to release goroutine stacks, so we don't need separate gfreeStack and gfreeNostack lists. We only have one size of defer function, so we don't need a list of 5 different pools. Reviewed-on: https://go-review.googlesource.com/33364 From-SVN: r242600

runtime: move schedt type and sched var from C to Go
This doesn't change any actual code, it just starts using the Go definition of the schedt type and the sched variable rather than the C definitions. The schedt type is tweaked slightly for gccgo. We aren't going to release goroutine stacks, so we don't need separate gfreeStack and gfreeNostack lists. We only have one size of defer function, so we don't need a list of 5 different pools. Reviewed-on: https://go-review.googlesource.com/33364 From-SVN: r242600
092dd2bc · Ian Lance Taylor · 70e73d3c · 092dd2bc · 092dd2bc · 092dd2bc
Commit 092dd2bc authored Nov 18, 2016 by Ian Lance Taylor
Hide whitespace changes
Inline Side-by-side

Showing with 243 additions and 269 deletions

gcc/go/gofrontend/MERGE
+1 -1

libgo/go/runtime/runtime2.go
+9 -12

libgo/go/runtime/stubs.go
+6 -0

libgo/runtime/proc.c
+227 -256

No files found.
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
-fc4ca600b2fc6de81fd3c4014542d6a50593db1a
+bf4762823c4543229867436399be3ae30b4d13bb
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -550,9 +550,6 @@ const (
 	_MaxGomaxprocs = 1 << 8
 )
-/*
-Commented out for gccgo for now.
 type schedt struct {
 	// accessed atomically. keep at top to ensure alignment on 32-bit systems.
 	goidgen  uint64
@@ -578,18 +575,17 @@ type schedt struct {
 	runqsize int32
 	// Global cache of dead G's.
-	gflock       mutex
+	gflock mutex
-	gfreeStack   *g
+	gfree  *g
-	gfreeNoStack *g
+	ngfree int32
-	ngfree       int32
 	// Central cache of sudog structs.
 	sudoglock  mutex
 	sudogcache *sudog
-	// Central pool of available defer structs of different sizes.
+	// Central pool of available defer structs.
 	deferlock mutex
-	deferpool [5]*_defer
+	deferpool *_defer
 	gcwaiting  uint32 // gc is waiting to run
 	stopwait   int32
@@ -608,7 +604,6 @@ type schedt struct {
 	procresizetime int64 // nanotime() of last change to gomaxprocs
 	totaltime      int64 // ∫gomaxprocs dt up to procresizetime
 }
-*/
 // The m.locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread.
 // The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active.
@@ -772,8 +767,10 @@ var (
 	ncpu int32
-//	forcegc     forcegcstate
+	//	forcegc     forcegcstate
-//	sched       schedt
+	sched schedt
 //	newprocs    int32
 // Information about what cpu features are available.

--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -520,3 +520,9 @@ func dumpregs(*_siginfo_t, unsafe.Pointer)
 // Temporary for gccgo until we port panic.go.
 func startpanic()
+// Temporary for gccgo until we port proc.go.
+//go:linkname getsched runtime.getsched
+func getsched() *schedt {
+	return &sched
+}
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -351,48 +351,18 @@ runtime_mcall(void (*pfn)(G*))
 //
 // Design doc at http://golang.org/s/go11sched.
-typedef struct Sched Sched;
+typedef struct schedt Sched;
-struct Sched {
-	Lock;
-	uint64	goidgen;
-	M*	midle;	 // idle m's waiting for work
-	int32	nmidle;	 // number of idle m's waiting for work
-	int32	nmidlelocked; // number of locked m's waiting for work
-	int32	mcount;	 // number of m's that have been created
-	int32	maxmcount;	// maximum number of m's allowed (or die)
-	P*	pidle;  // idle P's
-	uint32	npidle;
-	uint32	nmspinning;
-	// Global runnable queue.
-	G*	runqhead;
-	G*	runqtail;
-	int32	runqsize;
-	// Global cache of dead G's.
-	Lock	gflock;
-	G*	gfree;
-	uint32	gcwaiting;	// gc is waiting to run
-	int32	stopwait;
-	Note	stopnote;
-	uint32	sysmonwait;
-	Note	sysmonnote;
-	uint64	lastpoll;
-	int32	profilehz;	// cpu profiling rate
-};
 enum
 {
-	// Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once.
+	// Number of goroutine ids to grab from runtime_sched->goidgen to local per-P cache at once.
 	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
 	GoidCacheBatch = 16,
 };
-Sched	runtime_sched;
+extern Sched* runtime_getsched() __asm__ (GOSYM_PREFIX "runtime.getsched");
+static Sched*	runtime_sched;
 int32	runtime_gomaxprocs;
 uint32	runtime_needextram = 1;
 M	runtime_m0;
@@ -471,6 +441,8 @@ runtime_schedinit(void)
 	const byte *p;
 	Eface i;
+	runtime_sched = runtime_getsched();
 	m = &runtime_m0;
 	g = &runtime_g0;
 	m->g0 = g;
@@ -479,7 +451,7 @@ runtime_schedinit(void)
 	initcontext();
-	runtime_sched.maxmcount = 10000;
+	runtime_sched->maxmcount = 10000;
 	runtime_precisestack = 0;
 	// runtime_symtabinit();
@@ -500,7 +472,7 @@ runtime_schedinit(void)
 	runtime_goenvs();
 	runtime_parsedebugvars();
-	runtime_sched.lastpoll = runtime_nanotime();
+	runtime_sched->lastpoll = runtime_nanotime();
 	procs = 1;
 	s = runtime_getenv("GOMAXPROCS");
 	p = s.str;
@@ -747,8 +719,8 @@ static void
 checkmcount(void)
 {
 	// sched lock is held
-	if(runtime_sched.mcount > runtime_sched.maxmcount) {
+	if(runtime_sched->mcount > runtime_sched->maxmcount) {
-		runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
+		runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched->maxmcount);
 		runtime_throw("thread exhaustion");
 	}
 }
@@ -782,8 +754,8 @@ mcommoninit(M *mp)
 	mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
-	mp->id = runtime_sched.mcount++;
+	mp->id = runtime_sched->mcount++;
 	checkmcount();
 	runtime_mpreinit(mp);
@@ -793,7 +765,7 @@ mcommoninit(M *mp)
 	// runtime_NumCgoCall() iterates over allm w/o schedlock,
 	// so we need to publish it safely.
 	runtime_atomicstorep(&runtime_allm, mp);
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 }
 // Mark gp ready to run.
@@ -808,7 +780,7 @@ runtime_ready(G *gp)
 	}
 	gp->atomicstatus = _Grunnable;
 	runqput((P*)g->m->p, gp);
-	if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0)  // TODO: fast atomic
+	if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0)  // TODO: fast atomic
 		wakep();
 	g->m->locks--;
 }
@@ -828,15 +800,15 @@ runtime_gcprocs(void)
 	// Figure out how many CPUs to use during GC.
 	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	n = runtime_gomaxprocs;
 	if(n > runtime_ncpu)
 		n = runtime_ncpu > 0 ? runtime_ncpu : 1;
 	if(n > MaxGcproc)
 		n = MaxGcproc;
-	if(n > runtime_sched.nmidle+1) // one M is currently running
+	if(n > runtime_sched->nmidle+1) // one M is currently running
-		n = runtime_sched.nmidle+1;
+		n = runtime_sched->nmidle+1;
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	return n;
 }
@@ -845,14 +817,14 @@ needaddgcproc(void)
 {
 	int32 n;
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	n = runtime_gomaxprocs;
 	if(n > runtime_ncpu)
 		n = runtime_ncpu;
 	if(n > MaxGcproc)
 		n = MaxGcproc;
-	n -= runtime_sched.nmidle+1; // one M is currently running
+	n -= runtime_sched->nmidle+1; // one M is currently running
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	return n > 0;
 }
@@ -862,7 +834,7 @@ runtime_helpgc(int32 nproc)
 	M *mp;
 	int32 n, pos;
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	pos = 0;
 	for(n = 1; n < nproc; n++) {  // one M is currently running
 		if(runtime_allp[pos]->mcache == g->m->mcache)
@@ -875,7 +847,7 @@ runtime_helpgc(int32 nproc)
 		pos++;
 		runtime_notewakeup(&mp->park);
 	}
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 }
 // Similar to stoptheworld but best-effort and can be called several times.
@@ -893,8 +865,8 @@ runtime_freezetheworld(void)
 	// so try several times
 	for(i = 0; i < 5; i++) {
 		// this should tell the scheduler to not start any new goroutines
-		runtime_sched.stopwait = 0x7fffffff;
+		runtime_sched->stopwait = 0x7fffffff;
-		runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
+		runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
 		// this should stop running goroutines
 		if(!preemptall())
 			break;  // no running goroutines
@@ -914,34 +886,34 @@ runtime_stopTheWorldWithSema(void)
 	P *p;
 	bool wait;
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
-	runtime_sched.stopwait = runtime_gomaxprocs;
+	runtime_sched->stopwait = runtime_gomaxprocs;
-	runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
+	runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
 	preemptall();
 	// stop current P
 	((P*)g->m->p)->status = _Pgcstop;
-	runtime_sched.stopwait--;
+	runtime_sched->stopwait--;
 	// try to retake all P's in _Psyscall status
 	for(i = 0; i < runtime_gomaxprocs; i++) {
 		p = runtime_allp[i];
 		s = p->status;
 		if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
-			runtime_sched.stopwait--;
+			runtime_sched->stopwait--;
 	}
 	// stop idle P's
 	while((p = pidleget()) != nil) {
 		p->status = _Pgcstop;
-		runtime_sched.stopwait--;
+		runtime_sched->stopwait--;
 	}
-	wait = runtime_sched.stopwait > 0;
+	wait = runtime_sched->stopwait > 0;
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	// wait for remaining P's to stop voluntarily
 	if(wait) {
-		runtime_notesleep(&runtime_sched.stopnote);
+		runtime_notesleep(&runtime_sched->stopnote);
-		runtime_noteclear(&runtime_sched.stopnote);
+		runtime_noteclear(&runtime_sched->stopnote);
 	}
-	if(runtime_sched.stopwait)
+	if(runtime_sched->stopwait)
 		runtime_throw("stoptheworld: not stopped");
 	for(i = 0; i < runtime_gomaxprocs; i++) {
 		p = runtime_allp[i];
@@ -968,13 +940,13 @@ runtime_startTheWorldWithSema(void)
 	gp = runtime_netpoll(false);  // non-blocking
 	injectglist(gp);
 	add = needaddgcproc();
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	if(newprocs) {
 		procresize(newprocs);
 		newprocs = 0;
 	} else
 		procresize(runtime_gomaxprocs);
-	runtime_sched.gcwaiting = 0;
+	runtime_sched->gcwaiting = 0;
 	p1 = nil;
 	while((p = pidleget()) != nil) {
@@ -988,11 +960,11 @@ runtime_startTheWorldWithSema(void)
 		p->link = (uintptr)p1;
 		p1 = p;
 	}
-	if(runtime_sched.sysmonwait) {
+	if(runtime_sched->sysmonwait) {
-		runtime_sched.sysmonwait = false;
+		runtime_sched->sysmonwait = false;
-		runtime_notewakeup(&runtime_sched.sysmonnote);
+		runtime_notewakeup(&runtime_sched->sysmonnote);
 	}
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	while(p1) {
 		p = p1;
@@ -1285,7 +1257,7 @@ runtime_newextram(void)
 	mp->locked = _LockInternal;
 	mp->lockedg = gp;
 	gp->lockedm = mp;
-	gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
+	gp->goid = runtime_xadd64(&runtime_sched->goidgen, 1);
 	// put on allg for garbage collector
 	allgadd(gp);
@@ -1439,13 +1411,13 @@ stopm(void)
 		runtime_throw("stopm holding p");
 	if(m->spinning) {
 		m->spinning = false;
-		runtime_xadd(&runtime_sched.nmspinning, -1);
+		runtime_xadd(&runtime_sched->nmspinning, -1);
 	}
 retry:
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	mput(m);
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	runtime_notesleep(&m->park);
 	m = g->m;
 	runtime_noteclear(&m->park);
@@ -1473,18 +1445,18 @@ startm(P *p, bool spinning)
 	M *mp;
 	void (*fn)(void);
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	if(p == nil) {
 		p = pidleget();
 		if(p == nil) {
-			runtime_unlock(&runtime_sched);
+			runtime_unlock(&runtime_sched->lock);
 			if(spinning)
-				runtime_xadd(&runtime_sched.nmspinning, -1);
+				runtime_xadd(&runtime_sched->nmspinning, -1);
 			return;
 		}
 	}
 	mp = mget();
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	if(mp == nil) {
 		fn = nil;
 		if(spinning)
@@ -1506,39 +1478,39 @@ static void
 handoffp(P *p)
 {
 	// if it has local work, start it straight away
-	if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
+	if(p->runqhead != p->runqtail || runtime_sched->runqsize) {
 		startm(p, false);
 		return;
 	}
 	// no local work, check that there are no spinning/idle M's,
 	// otherwise our help is not required
-	if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 &&  // TODO: fast atomic
+	if(runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) == 0 &&  // TODO: fast atomic
-		runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
+		runtime_cas(&runtime_sched->nmspinning, 0, 1)) {
 		startm(p, true);
 		return;
 	}
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
-	if(runtime_sched.gcwaiting) {
+	if(runtime_sched->gcwaiting) {
 		p->status = _Pgcstop;
-		if(--runtime_sched.stopwait == 0)
+		if(--runtime_sched->stopwait == 0)
-			runtime_notewakeup(&runtime_sched.stopnote);
+			runtime_notewakeup(&runtime_sched->stopnote);
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 		return;
 	}
-	if(runtime_sched.runqsize) {
+	if(runtime_sched->runqsize) {
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 		startm(p, false);
 		return;
 	}
 	// If this is the last running P and nobody is polling network,
 	// need to wakeup another M to poll network.
-	if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
+	if(runtime_sched->npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched->lastpoll) != 0) {
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 		startm(p, false);
 		return;
 	}
 	pidleput(p);
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 }
 // Tries to add one more P to execute G's.
@@ -1547,7 +1519,7 @@ static void
 wakep(void)
 {
 	// be conservative about spinning threads
-	if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
+	if(!runtime_cas(&runtime_sched->nmspinning, 0, 1))
 		return;
 	startm(nil, true);
 }
@@ -1606,18 +1578,18 @@ gcstopm(void)
 {
 	P *p;
-	if(!runtime_sched.gcwaiting)
+	if(!runtime_sched->gcwaiting)
 		runtime_throw("gcstopm: not waiting for gc");
 	if(g->m->spinning) {
 		g->m->spinning = false;
-		runtime_xadd(&runtime_sched.nmspinning, -1);
+		runtime_xadd(&runtime_sched->nmspinning, -1);
 	}
 	p = releasep();
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	p->status = _Pgcstop;
-	if(--runtime_sched.stopwait == 0)
+	if(--runtime_sched->stopwait == 0)
-		runtime_notewakeup(&runtime_sched.stopnote);
+		runtime_notewakeup(&runtime_sched->stopnote);
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	stopm();
 }
@@ -1639,7 +1611,7 @@ execute(G *gp)
 	gp->m = g->m;
 	// Check whether the profiler needs to be turned on or off.
-	hz = runtime_sched.profilehz;
+	hz = runtime_sched->profilehz;
 	if(g->m->profilehz != hz)
 		runtime_resetcpuprofiler(hz);
@@ -1656,7 +1628,7 @@ findrunnable(void)
 	int32 i;
 top:
-	if(runtime_sched.gcwaiting) {
+	if(runtime_sched->gcwaiting) {
 		gcstopm();
 		goto top;
 	}
@@ -1667,10 +1639,10 @@ top:
 	if(gp)
 		return gp;
 	// global runq
-	if(runtime_sched.runqsize) {
+	if(runtime_sched->runqsize) {
-		runtime_lock(&runtime_sched);
+		runtime_lock(&runtime_sched->lock);
 		gp = globrunqget((P*)g->m->p, 0);
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 		if(gp)
 			return gp;
 	}
@@ -1684,15 +1656,15 @@ top:
 	// If number of spinning M's >= number of busy P's, block.
 	// This is necessary to prevent excessive CPU consumption
 	// when GOMAXPROCS>>1 but the program parallelism is low.
-	if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle))  // TODO: fast atomic
+	if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched->nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched->npidle))  // TODO: fast atomic
 		goto stop;
 	if(!g->m->spinning) {
 		g->m->spinning = true;
-		runtime_xadd(&runtime_sched.nmspinning, 1);
+		runtime_xadd(&runtime_sched->nmspinning, 1);
 	}
 	// random steal from other P's
 	for(i = 0; i < 2*runtime_gomaxprocs; i++) {
-		if(runtime_sched.gcwaiting)
+		if(runtime_sched->gcwaiting)
 			goto top;
 		p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
 		if(p == (P*)g->m->p)
@@ -1704,30 +1676,30 @@ top:
 	}
 stop:
 	// return P and block
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
-	if(runtime_sched.gcwaiting) {
+	if(runtime_sched->gcwaiting) {
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 		goto top;
 	}
-	if(runtime_sched.runqsize) {
+	if(runtime_sched->runqsize) {
 		gp = globrunqget((P*)g->m->p, 0);
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 		return gp;
 	}
 	p = releasep();
 	pidleput(p);
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	if(g->m->spinning) {
 		g->m->spinning = false;
-		runtime_xadd(&runtime_sched.nmspinning, -1);
+		runtime_xadd(&runtime_sched->nmspinning, -1);
 	}
 	// check all runqueues once again
 	for(i = 0; i < runtime_gomaxprocs; i++) {
 		p = runtime_allp[i];
 		if(p && p->runqhead != p->runqtail) {
-			runtime_lock(&runtime_sched);
+			runtime_lock(&runtime_sched->lock);
 			p = pidleget();
-			runtime_unlock(&runtime_sched);
+			runtime_unlock(&runtime_sched->lock);
 			if(p) {
 				acquirep(p);
 				goto top;
@@ -1736,17 +1708,17 @@ stop:
 		}
 	}
 	// poll network
-	if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
+	if(runtime_xchg64(&runtime_sched->lastpoll, 0) != 0) {
 		if(g->m->p)
 			runtime_throw("findrunnable: netpoll with p");
 		if(g->m->spinning)
 			runtime_throw("findrunnable: netpoll with spinning");
 		gp = runtime_netpoll(true);  // block until new work is available
-		runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
+		runtime_atomicstore64(&runtime_sched->lastpoll, runtime_nanotime());
 		if(gp) {
-			runtime_lock(&runtime_sched);
+			runtime_lock(&runtime_sched->lock);
 			p = pidleget();
-			runtime_unlock(&runtime_sched);
+			runtime_unlock(&runtime_sched->lock);
 			if(p) {
 				acquirep(p);
 				injectglist((G*)gp->schedlink);
@@ -1767,15 +1739,15 @@ resetspinning(void)
 	if(g->m->spinning) {
 		g->m->spinning = false;
-		nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
+		nmspinning = runtime_xadd(&runtime_sched->nmspinning, -1);
 		if(nmspinning < 0)
 			runtime_throw("findrunnable: negative nmspinning");
 	} else
-		nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
+		nmspinning = runtime_atomicload(&runtime_sched->nmspinning);
 	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
 	// so see if we need to wakeup another P here.
-	if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
+	if (nmspinning == 0 && runtime_atomicload(&runtime_sched->npidle) > 0)
 		wakep();
 }
@@ -1789,16 +1761,16 @@ injectglist(G *glist)
 	if(glist == nil)
 		return;
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	for(n = 0; glist; n++) {
 		gp = glist;
 		glist = (G*)gp->schedlink;
 		gp->atomicstatus = _Grunnable;
 		globrunqput(gp);
 	}
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
-	for(; n && runtime_sched.npidle; n--)
+	for(; n && runtime_sched->npidle; n--)
 		startm(nil, false);
 }
@@ -1814,7 +1786,7 @@ schedule(void)
 		runtime_throw("schedule: holding locks");
 top:
-	if(runtime_sched.gcwaiting) {
+	if(runtime_sched->gcwaiting) {
 		gcstopm();
 		goto top;
 	}
@@ -1826,10 +1798,10 @@ top:
 	tick = ((P*)g->m->p)->schedtick;
 	// This is a fancy way to say tick%61==0,
 	// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
-	if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
+	if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched->runqsize > 0) {
-		runtime_lock(&runtime_sched);
+		runtime_lock(&runtime_sched->lock);
 		gp = globrunqget((P*)g->m->p, 1);
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 		if(gp)
 			resetspinning();
 	}
@@ -1959,9 +1931,9 @@ runtime_gosched0(G *gp)
 	gp->atomicstatus = _Grunnable;
 	gp->m = nil;
 	m->curg = nil;
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	globrunqput(gp);
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	if(m->lockedg) {
 		stoplockedm();
 		execute(gp);  // Never returns.
@@ -2074,25 +2046,25 @@ doentersyscall(uintptr pc, uintptr sp)
 	g->atomicstatus = _Gsyscall;
-	if(runtime_atomicload(&runtime_sched.sysmonwait)) {  // TODO: fast atomic
+	if(runtime_atomicload(&runtime_sched->sysmonwait)) {  // TODO: fast atomic
-		runtime_lock(&runtime_sched);
+		runtime_lock(&runtime_sched->lock);
-		if(runtime_atomicload(&runtime_sched.sysmonwait)) {
+		if(runtime_atomicload(&runtime_sched->sysmonwait)) {
-			runtime_atomicstore(&runtime_sched.sysmonwait, 0);
+			runtime_atomicstore(&runtime_sched->sysmonwait, 0);
-			runtime_notewakeup(&runtime_sched.sysmonnote);
+			runtime_notewakeup(&runtime_sched->sysmonnote);
 		}
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 	}
 	g->m->mcache = nil;
 	((P*)(g->m->p))->m = 0;
 	runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
-	if(runtime_atomicload(&runtime_sched.gcwaiting)) {
+	if(runtime_atomicload(&runtime_sched->gcwaiting)) {
-		runtime_lock(&runtime_sched);
+		runtime_lock(&runtime_sched->lock);
-		if (runtime_sched.stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
+		if (runtime_sched->stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
-			if(--runtime_sched.stopwait == 0)
+			if(--runtime_sched->stopwait == 0)
-				runtime_notewakeup(&runtime_sched.stopnote);
+				runtime_notewakeup(&runtime_sched->stopnote);
 		}
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 	}
 	g->m->locks--;
@@ -2201,7 +2173,7 @@ exitsyscallfast(void)
 	gp = g;
 	// Freezetheworld sets stopwait but does not retake P's.
-	if(runtime_sched.stopwait) {
+	if(runtime_sched->stopwait) {
 		gp->m->p = 0;
 		return false;
 	}
@@ -2215,14 +2187,14 @@ exitsyscallfast(void)
 	}
 	// Try to get any other idle P.
 	gp->m->p = 0;
-	if(runtime_sched.pidle) {
+	if(runtime_sched->pidle) {
-		runtime_lock(&runtime_sched);
+		runtime_lock(&runtime_sched->lock);
 		p = pidleget();
-		if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
+		if(p && runtime_atomicload(&runtime_sched->sysmonwait)) {
-			runtime_atomicstore(&runtime_sched.sysmonwait, 0);
+			runtime_atomicstore(&runtime_sched->sysmonwait, 0);
-			runtime_notewakeup(&runtime_sched.sysmonnote);
+			runtime_notewakeup(&runtime_sched->sysmonnote);
 		}
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 		if(p) {
 			acquirep(p);
 			return true;
@@ -2243,15 +2215,15 @@ exitsyscall0(G *gp)
 	gp->atomicstatus = _Grunnable;
 	gp->m = nil;
 	m->curg = nil;
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	p = pidleget();
 	if(p == nil)
 		globrunqput(gp);
-	else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
+	else if(runtime_atomicload(&runtime_sched->sysmonwait)) {
-		runtime_atomicstore(&runtime_sched.sysmonwait, 0);
+		runtime_atomicstore(&runtime_sched->sysmonwait, 0);
-		runtime_notewakeup(&runtime_sched.sysmonnote);
+		runtime_notewakeup(&runtime_sched->sysmonnote);
 	}
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	if(p) {
 		acquirep(p);
 		execute(gp);  // Never returns.
@@ -2308,7 +2280,7 @@ syscall_runtime_AfterFork(void)
 {
 	int32 hz;
-	hz = runtime_sched.profilehz;
+	hz = runtime_sched->profilehz;
 	if(hz != 0)
 		runtime_resetcpuprofiler(hz);
 	runtime_m()->locks--;
@@ -2400,7 +2372,7 @@ __go_go(void (*fn)(void*), void* arg)
 	newg->gopc = (uintptr)__builtin_return_address(0);
 	newg->atomicstatus = _Grunnable;
 	if(p->goidcache == p->goidcacheend) {
-		p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch);
+		p->goidcache = runtime_xadd64(&runtime_sched->goidgen, GoidCacheBatch);
 		p->goidcacheend = p->goidcache + GoidCacheBatch;
 	}
 	newg->goid = p->goidcache++;
@@ -2421,7 +2393,7 @@ __go_go(void (*fn)(void*), void* arg)
 		runqput(p, vnewg);
-		if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
+		if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
 			wakep();
 		g->m->locks--;
 		return vnewg;
@@ -2462,15 +2434,15 @@ gfput(P *p, G *gp)
 	p->gfree = gp;
 	p->gfreecnt++;
 	if(p->gfreecnt >= 64) {
-		runtime_lock(&runtime_sched.gflock);
+		runtime_lock(&runtime_sched->gflock);
 		while(p->gfreecnt >= 32) {
 			p->gfreecnt--;
 			gp = p->gfree;
 			p->gfree = (G*)gp->schedlink;
-			gp->schedlink = (uintptr)runtime_sched.gfree;
+			gp->schedlink = (uintptr)runtime_sched->gfree;
-			runtime_sched.gfree = gp;
+			runtime_sched->gfree = gp;
 		}
-		runtime_unlock(&runtime_sched.gflock);
+		runtime_unlock(&runtime_sched->gflock);
 	}
 }
@@ -2483,16 +2455,16 @@ gfget(P *p)
 retry:
 	gp = p->gfree;
-	if(gp == nil && runtime_sched.gfree) {
+	if(gp == nil && runtime_sched->gfree) {
-		runtime_lock(&runtime_sched.gflock);
+		runtime_lock(&runtime_sched->gflock);
-		while(p->gfreecnt < 32 && runtime_sched.gfree) {
+		while(p->gfreecnt < 32 && runtime_sched->gfree) {
 			p->gfreecnt++;
-			gp = runtime_sched.gfree;
+			gp = runtime_sched->gfree;
-			runtime_sched.gfree = (G*)gp->schedlink;
+			runtime_sched->gfree = (G*)gp->schedlink;
 			gp->schedlink = (uintptr)p->gfree;
 			p->gfree = gp;
 		}
-		runtime_unlock(&runtime_sched.gflock);
+		runtime_unlock(&runtime_sched->gflock);
 		goto retry;
 	}
 	if(gp) {
@@ -2508,15 +2480,15 @@ gfpurge(P *p)
 {
 	G *gp;
-	runtime_lock(&runtime_sched.gflock);
+	runtime_lock(&runtime_sched->gflock);
 	while(p->gfreecnt) {
 		p->gfreecnt--;
 		gp = p->gfree;
 		p->gfree = (G*)gp->schedlink;
-		gp->schedlink = (uintptr)runtime_sched.gfree;
+		gp->schedlink = (uintptr)runtime_sched->gfree;
-		runtime_sched.gfree = gp;
+		runtime_sched->gfree = gp;
 	}
-	runtime_unlock(&runtime_sched.gflock);
+	runtime_unlock(&runtime_sched->gflock);
 }
 void
@@ -2546,13 +2518,13 @@ runtime_GOMAXPROCS(intgo n)
 	if(n > _MaxGomaxprocs)
 		n = _MaxGomaxprocs;
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	ret = (intgo)runtime_gomaxprocs;
 	if(n <= 0 || n == ret) {
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 		return ret;
 	}
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	runtime_acquireWorldsema();
 	g->m->gcing = 1;
@@ -2653,7 +2625,7 @@ runtime_gcount(void)
 int32
 runtime_mcount(void)
 {
-	return runtime_sched.mcount;
+	return runtime_sched->mcount;
 }
 static struct {
@@ -2754,9 +2726,9 @@ runtime_setcpuprofilerate_m(int32 hz)
 	prof.hz = hz;
 	runtime_atomicstore(&prof.lock, 0);
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
-	runtime_sched.profilehz = hz;
+	runtime_sched->profilehz = hz;
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	if(hz != 0)
 		runtime_resetcpuprofiler(hz);
@@ -2809,22 +2781,22 @@ procresize(int32 new)
 			p->runqtail--;
 			gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
 			// push onto head of global queue
-			gp->schedlink = (uintptr)runtime_sched.runqhead;
+			gp->schedlink = runtime_sched->runqhead;
-			runtime_sched.runqhead = gp;
+			runtime_sched->runqhead = (uintptr)gp;
-			if(runtime_sched.runqtail == nil)
+			if(runtime_sched->runqtail == 0)
-				runtime_sched.runqtail = gp;
+				runtime_sched->runqtail = (uintptr)gp;
-			runtime_sched.runqsize++;
+			runtime_sched->runqsize++;
 		}
 	}
 	// fill local queues with at most nelem(p->runq)/2 goroutines
 	// start at 1 because current M already executes some G and will acquire allp[0] below,
 	// so if we have a spare G we want to put it into allp[1].
-	for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) {
+	for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched->runqsize > 0; i++) {
-		gp = runtime_sched.runqhead;
+		gp = (G*)runtime_sched->runqhead;
-		runtime_sched.runqhead = (G*)gp->schedlink;
+		runtime_sched->runqhead = gp->schedlink;
-		if(runtime_sched.runqhead == nil)
+		if(runtime_sched->runqhead == 0)
-			runtime_sched.runqtail = nil;
+			runtime_sched->runqtail = 0;
-		runtime_sched.runqsize--;
+		runtime_sched->runqsize--;
 		runqput(runtime_allp[i%new], gp);
 	}
@@ -2899,11 +2871,11 @@ releasep(void)
 static void
 incidlelocked(int32 v)
 {
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
-	runtime_sched.nmidlelocked += v;
+	runtime_sched->nmidlelocked += v;
 	if(v > 0)
 		checkdead();
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 }
 // Check for deadlock situation.
@@ -2923,7 +2895,7 @@ checkdead(void)
 	}
 	// -1 for sysmon
-	run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
+	run = runtime_sched->mcount - runtime_sched->nmidle - runtime_sched->nmidlelocked - 1 - countextra();
 	if(run > 0)
 		return;
 	// If we are dying because of a signal caught on an already idle thread,
@@ -2934,7 +2906,7 @@ checkdead(void)
 		return;
 	if(run < 0) {
 		runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
-			runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
+			runtime_sched->nmidle, runtime_sched->nmidlelocked, runtime_sched->mcount);
 		runtime_throw("checkdead: inconsistent counts");
 	}
 	grunning = 0;
@@ -2978,23 +2950,23 @@ sysmon(void)
 			delay = 10*1000;
 		runtime_usleep(delay);
 		if(runtime_debug.schedtrace <= 0 &&
-			(runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) {  // TODO: fast atomic
+			(runtime_sched->gcwaiting || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs)) {  // TODO: fast atomic
-			runtime_lock(&runtime_sched);
+			runtime_lock(&runtime_sched->lock);
-			if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
+			if(runtime_atomicload(&runtime_sched->gcwaiting) || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs) {
-				runtime_atomicstore(&runtime_sched.sysmonwait, 1);
+				runtime_atomicstore(&runtime_sched->sysmonwait, 1);
-				runtime_unlock(&runtime_sched);
+				runtime_unlock(&runtime_sched->lock);
-				runtime_notesleep(&runtime_sched.sysmonnote);
+				runtime_notesleep(&runtime_sched->sysmonnote);
-				runtime_noteclear(&runtime_sched.sysmonnote);
+				runtime_noteclear(&runtime_sched->sysmonnote);
 				idle = 0;
 				delay = 20;
 			} else
-				runtime_unlock(&runtime_sched);
+				runtime_unlock(&runtime_sched->lock);
 		}
 		// poll network if not polled for more than 10ms
-		lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
+		lastpoll = runtime_atomicload64(&runtime_sched->lastpoll);
 		now = runtime_nanotime();
 		if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
-			runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
+			runtime_cas64(&runtime_sched->lastpoll, lastpoll, now);
 			gp = runtime_netpoll(false);  // non-blocking
 			if(gp) {
 				// Need to decrement number of idle locked M's
@@ -3060,7 +3032,7 @@ retake(int64 now)
 			// but on the other hand we want to retake them eventually
 			// because they can prevent the sysmon thread from deep sleep.
 			if(p->runqhead == p->runqtail &&
-				runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 &&
+				runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) > 0 &&
 				pd->syscallwhen + 10*1000*1000 > now)
 				continue;
 			// Need to decrement number of idle locked M's
@@ -3117,14 +3089,14 @@ runtime_schedtrace(bool detailed)
 	if(starttime == 0)
 		starttime = now;
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
-		(now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
+		(now-starttime)/1000000, runtime_gomaxprocs, runtime_sched->npidle, runtime_sched->mcount,
-		runtime_sched.nmidle, runtime_sched.runqsize);
+		runtime_sched->nmidle, runtime_sched->runqsize);
 	if(detailed) {
 		runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
-			runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
+			runtime_sched->gcwaiting, runtime_sched->nmidlelocked, runtime_sched->nmspinning,
-			runtime_sched.stopwait, runtime_sched.sysmonwait);
+			runtime_sched->stopwait, runtime_sched->sysmonwait);
 	}
 	// We must be careful while reading data from P's, M's and G's.
 	// Even if we hold schedlock, most data can be changed concurrently.
@@ -3153,7 +3125,7 @@ runtime_schedtrace(bool detailed)
 		}
 	}
 	if(!detailed) {
-		runtime_unlock(&runtime_sched);
+		runtime_unlock(&runtime_sched->lock);
 		return;
 	}
 	for(mp = runtime_allm; mp; mp = mp->alllink) {
@@ -3185,7 +3157,7 @@ runtime_schedtrace(bool detailed)
 			lockedm ? lockedm->id : -1);
 	}
 	runtime_unlock(&allglock);
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 }
 // Put mp on midle list.
@@ -3193,9 +3165,9 @@ runtime_schedtrace(bool detailed)
 static void
 mput(M *mp)
 {
-	mp->schedlink = (uintptr)runtime_sched.midle;
+	mp->schedlink = runtime_sched->midle;
-	runtime_sched.midle = mp;
+	runtime_sched->midle = (uintptr)mp;
-	runtime_sched.nmidle++;
+	runtime_sched->nmidle++;
 	checkdead();
 }
@@ -3206,9 +3178,9 @@ mget(void)
 {
 	M *mp;
-	if((mp = runtime_sched.midle) != nil){
+	if((mp = (M*)runtime_sched->midle) != nil){
-		runtime_sched.midle = (M*)mp->schedlink;
+		runtime_sched->midle = mp->schedlink;
-		runtime_sched.nmidle--;
+		runtime_sched->nmidle--;
 	}
 	return mp;
 }
@@ -3219,12 +3191,12 @@ static void
 globrunqput(G *gp)
 {
 	gp->schedlink = 0;
-	if(runtime_sched.runqtail)
+	if(runtime_sched->runqtail)
-		runtime_sched.runqtail->schedlink = (uintptr)gp;
+		((G*)runtime_sched->runqtail)->schedlink = (uintptr)gp;
 	else
-		runtime_sched.runqhead = gp;
+		runtime_sched->runqhead = (uintptr)gp;
-	runtime_sched.runqtail = gp;
+	runtime_sched->runqtail = (uintptr)gp;
-	runtime_sched.runqsize++;
+	runtime_sched->runqsize++;
 }
 // Put a batch of runnable goroutines on the global runnable queue.
@@ -3233,12 +3205,12 @@ static void
 globrunqputbatch(G *ghead, G *gtail, int32 n)
 {
 	gtail->schedlink = 0;
-	if(runtime_sched.runqtail)
+	if(runtime_sched->runqtail)
-		runtime_sched.runqtail->schedlink = (uintptr)ghead;
+		((G*)runtime_sched->runqtail)->schedlink = (uintptr)ghead;
 	else
-		runtime_sched.runqhead = ghead;
+		runtime_sched->runqhead = (uintptr)ghead;
-	runtime_sched.runqtail = gtail;
+	runtime_sched->runqtail = (uintptr)gtail;
-	runtime_sched.runqsize += n;
+	runtime_sched->runqsize += n;
 }
 // Try get a batch of G's from the global runnable queue.
@@ -3249,24 +3221,24 @@ globrunqget(P *p, int32 max)
 	G *gp, *gp1;
 	int32 n;
-	if(runtime_sched.runqsize == 0)
+	if(runtime_sched->runqsize == 0)
 		return nil;
-	n = runtime_sched.runqsize/runtime_gomaxprocs+1;
+	n = runtime_sched->runqsize/runtime_gomaxprocs+1;
-	if(n > runtime_sched.runqsize)
+	if(n > runtime_sched->runqsize)
-		n = runtime_sched.runqsize;
+		n = runtime_sched->runqsize;
 	if(max > 0 && n > max)
 		n = max;
 	if((uint32)n > nelem(p->runq)/2)
 		n = nelem(p->runq)/2;
-	runtime_sched.runqsize -= n;
+	runtime_sched->runqsize -= n;
-	if(runtime_sched.runqsize == 0)
+	if(runtime_sched->runqsize == 0)
-		runtime_sched.runqtail = nil;
+		runtime_sched->runqtail = 0;
-	gp = runtime_sched.runqhead;
+	gp = (G*)runtime_sched->runqhead;
-	runtime_sched.runqhead = (G*)gp->schedlink;
+	runtime_sched->runqhead = gp->schedlink;
 	n--;
 	while(n--) {
-		gp1 = runtime_sched.runqhead;
+		gp1 = (G*)runtime_sched->runqhead;
-		runtime_sched.runqhead = (G*)gp1->schedlink;
+		runtime_sched->runqhead = gp1->schedlink;
 		runqput(p, gp1);
 	}
 	return gp;
@@ -3277,9 +3249,9 @@ globrunqget(P *p, int32 max)
 static void
 pidleput(P *p)
 {
-	p->link = (uintptr)runtime_sched.pidle;
+	p->link = runtime_sched->pidle;
-	runtime_sched.pidle = p;
+	runtime_sched->pidle = (uintptr)p;
-	runtime_xadd(&runtime_sched.npidle, 1);  // TODO: fast atomic
+	runtime_xadd(&runtime_sched->npidle, 1);  // TODO: fast atomic
 }
 // Try get a p from pidle list.
@@ -3289,10 +3261,10 @@ pidleget(void)
 {
 	P *p;
-	p = runtime_sched.pidle;
+	p = (P*)runtime_sched->pidle;
 	if(p) {
-		runtime_sched.pidle = (P*)p->link;
+		runtime_sched->pidle = p->link;
-		runtime_xadd(&runtime_sched.npidle, -1);  // TODO: fast atomic
+		runtime_xadd(&runtime_sched->npidle, -1);  // TODO: fast atomic
 	}
 	return p;
 }
@@ -3341,9 +3313,9 @@ runqputslow(P *p, G *gp, uint32 h, uint32 t)
 	for(i=0; i<n; i++)
 		batch[i]->schedlink = (uintptr)batch[i+1];
 	// Now put the batch on global queue.
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
 	globrunqputbatch(batch[0], batch[n], n+1);
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	return true;
 }
@@ -3495,11 +3467,11 @@ runtime_setmaxthreads(intgo in)
 {
 	intgo out;
-	runtime_lock(&runtime_sched);
+	runtime_lock(&runtime_sched->lock);
-	out = (intgo)runtime_sched.maxmcount;
+	out = (intgo)runtime_sched->maxmcount;
-	runtime_sched.maxmcount = (int32)in;
+	runtime_sched->maxmcount = (int32)in;
 	checkmcount();
-	runtime_unlock(&runtime_sched);
+	runtime_unlock(&runtime_sched->lock);
 	return out;
 }
@@ -3558,7 +3530,6 @@ sync_atomic_runtime_procUnpin()
 void
 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
 {
-	enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
 	enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
 }
@@ -3567,7 +3538,7 @@ runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj
 bool
 runtime_gcwaiting(void)
 {
-	return runtime_sched.gcwaiting;
+	return runtime_sched->gcwaiting;
 }
 // os_beforeExit is called from os.Exit(0).
@@ -3602,7 +3573,7 @@ sync_runtime_canSpin(intgo i)
 	// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
 	// As opposed to runtime mutex we don't do passive spinning here,
 	// because there can be work on global runq on on other Ps.
-	if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched.npidle+runtime_sched.nmspinning)+1) {
+	if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched->npidle+runtime_sched->nmspinning)+1) {
 		return false;
 	}
 	p = (P*)g->m->p;