Commit e3d0ee4a by Andrew Stubbs Committed by Andrew Stubbs

Move gcn-run heap into GPU memory.

2019-11-13  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/gcn/gcn-run.c (heap_region): New global variable.
	(struct hsa_runtime_fn_info): Add hsa_memory_assign_agent_fn.
	(init_hsa_runtime_functions): Initialize hsa_memory_assign_agent.
	(get_kernarg_region): Move contents to ....
	(get_memory_region): .... here.
	(get_heap_region): New function.
	(init_device): Initialize the heap_region.
	(device_malloc): Add region parameter.
	(struct kernargs): Move heap ....
	(heap): ... to global scope.
	(main): Allocate heap separate to kernargs.

From-SVN: r278131
parent 2dbad62d
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
* config/gcn/gcn-run.c (heap_region): New global variable.
(struct hsa_runtime_fn_info): Add hsa_memory_assign_agent_fn.
(init_hsa_runtime_functions): Initialize hsa_memory_assign_agent.
(get_kernarg_region): Move contents to ....
(get_memory_region): .... here.
(get_heap_region): New function.
(init_device): Initialize the heap_region.
(device_malloc): Add region parameter.
(struct kernargs): Move heap ....
(heap): ... to global scope.
(main): Allocate heap separate to kernargs.
2019-11-13 Jan Hubicka <hubicka@ucw.cz> 2019-11-13 Jan Hubicka <hubicka@ucw.cz>
* ipa-prop.c (ipa_print_node_jump_functions, * ipa-prop.c (ipa_print_node_jump_functions,
...@@ -72,6 +72,7 @@ uint64_t main_kernel = 0; ...@@ -72,6 +72,7 @@ uint64_t main_kernel = 0;
hsa_executable_t executable = { 0 }; hsa_executable_t executable = { 0 };
hsa_region_t kernargs_region = { 0 }; hsa_region_t kernargs_region = { 0 };
hsa_region_t heap_region = { 0 };
uint32_t kernarg_segment_size = 0; uint32_t kernarg_segment_size = 0;
uint32_t group_segment_size = 0; uint32_t group_segment_size = 0;
uint32_t private_segment_size = 0; uint32_t private_segment_size = 0;
...@@ -135,6 +136,8 @@ struct hsa_runtime_fn_info ...@@ -135,6 +136,8 @@ struct hsa_runtime_fn_info
hsa_signal_t *signal); hsa_signal_t *signal);
hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size, hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
void **ptr); void **ptr);
hsa_status_t (*hsa_memory_assign_agent_fn) (void *ptr, hsa_agent_t agent,
hsa_access_permission_t access);
hsa_status_t (*hsa_memory_copy_fn) (void *dst, const void *src, hsa_status_t (*hsa_memory_copy_fn) (void *dst, const void *src,
size_t size); size_t size);
hsa_status_t (*hsa_memory_free_fn) (void *ptr); hsa_status_t (*hsa_memory_free_fn) (void *ptr);
...@@ -204,6 +207,7 @@ init_hsa_runtime_functions (void) ...@@ -204,6 +207,7 @@ init_hsa_runtime_functions (void)
DLSYM_FN (hsa_executable_freeze) DLSYM_FN (hsa_executable_freeze)
DLSYM_FN (hsa_signal_create) DLSYM_FN (hsa_signal_create)
DLSYM_FN (hsa_memory_allocate) DLSYM_FN (hsa_memory_allocate)
DLSYM_FN (hsa_memory_assign_agent)
DLSYM_FN (hsa_memory_copy) DLSYM_FN (hsa_memory_copy)
DLSYM_FN (hsa_memory_free) DLSYM_FN (hsa_memory_free)
DLSYM_FN (hsa_signal_destroy) DLSYM_FN (hsa_signal_destroy)
...@@ -282,7 +286,8 @@ get_gpu_agent (hsa_agent_t agent, void *data __attribute__ ((unused))) ...@@ -282,7 +286,8 @@ get_gpu_agent (hsa_agent_t agent, void *data __attribute__ ((unused)))
suitable one has been found. */ suitable one has been found. */
static hsa_status_t static hsa_status_t
get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused))) get_memory_region (hsa_region_t region, hsa_region_t *retval,
hsa_region_global_flag_t kind)
{ {
/* Reject non-global regions. */ /* Reject non-global regions. */
hsa_region_segment_t segment; hsa_region_segment_t segment;
...@@ -294,9 +299,9 @@ get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused))) ...@@ -294,9 +299,9 @@ get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused)))
hsa_region_global_flag_t flags; hsa_region_global_flag_t flags;
hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS, hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
&flags); &flags);
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) if (flags & kind)
{ {
kernargs_region = region; *retval = region;
return HSA_STATUS_INFO_BREAK; return HSA_STATUS_INFO_BREAK;
} }
...@@ -304,6 +309,20 @@ get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused))) ...@@ -304,6 +309,20 @@ get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused)))
return HSA_STATUS_SUCCESS; return HSA_STATUS_SUCCESS;
} }
static hsa_status_t
get_kernarg_region (hsa_region_t region, void *data __attribute__((unused)))
{
return get_memory_region (region, &kernargs_region,
HSA_REGION_GLOBAL_FLAG_KERNARG);
}
static hsa_status_t
get_heap_region (hsa_region_t region, void *data __attribute__((unused)))
{
return get_memory_region (region, &heap_region,
HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED);
}
/* Initialize the HSA Runtime library and GPU device. */ /* Initialize the HSA Runtime library and GPU device. */
static void static void
...@@ -338,6 +357,13 @@ init_device () ...@@ -338,6 +357,13 @@ init_device ()
NULL), NULL),
status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK, status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK,
"Locate kernargs memory"); "Locate kernargs memory");
/* Select a memory region for the kernel heap.
The call-back function, get_heap_region, does the selection. */
XHSA_CMP (hsa_fns.hsa_agent_iterate_regions_fn (device, get_heap_region,
NULL),
status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK,
"Locate device memory");
} }
...@@ -593,10 +619,10 @@ found_main:; ...@@ -593,10 +619,10 @@ found_main:;
__flat_scalar GCN address space). */ __flat_scalar GCN address space). */
static void * static void *
device_malloc (size_t size) device_malloc (size_t size, hsa_region_t region)
{ {
void *result; void *result;
XHSA (hsa_fns.hsa_memory_allocate_fn (kernargs_region, size, &result), XHSA (hsa_fns.hsa_memory_allocate_fn (region, size, &result),
"Allocate device memory"); "Allocate device memory");
return result; return result;
} }
...@@ -634,14 +660,14 @@ struct kernargs ...@@ -634,14 +660,14 @@ struct kernargs
} queue[1024]; } queue[1024];
unsigned int consumed; unsigned int consumed;
} output_data; } output_data;
struct heap
{
int64_t size;
char data[0];
} heap;
}; };
struct heap
{
int64_t size;
char data[0];
} heap;
/* Print any console output from the kernel. /* Print any console output from the kernel.
We print all entries from "consumed" to the next entry without a "written" We print all entries from "consumed" to the next entry without a "written"
flag, or "next_output" is reached. The buffer is circular, but the flag, or "next_output" is reached. The buffer is circular, but the
...@@ -811,13 +837,19 @@ main (int argc, char *argv[]) ...@@ -811,13 +837,19 @@ main (int argc, char *argv[])
/* Allocate device memory for both function parameters and the argv /* Allocate device memory for both function parameters and the argv
data. */ data. */
size_t heap_size = 10 * 1024 * 1024; /* 10MB. */ struct kernargs *kernargs = device_malloc (sizeof (*kernargs),
struct kernargs *kernargs = device_malloc (sizeof (*kernargs) + heap_size); kernargs_region);
struct argdata struct argdata
{ {
int64_t argv_data[kernel_argc]; int64_t argv_data[kernel_argc];
char strings[args_size]; char strings[args_size];
} *args = device_malloc (sizeof (struct argdata)); } *args = device_malloc (sizeof (struct argdata), kernargs_region);
size_t heap_size = 10 * 1024 * 1024; /* 10MB. */
struct heap *heap = device_malloc (heap_size, heap_region);
XHSA (hsa_fns.hsa_memory_assign_agent_fn (heap, device,
HSA_ACCESS_PERMISSION_RW),
"Assign heap to device agent");
/* Write the data to the target. */ /* Write the data to the target. */
kernargs->argc = kernel_argc; kernargs->argc = kernel_argc;
...@@ -837,8 +869,8 @@ main (int argc, char *argv[]) ...@@ -837,8 +869,8 @@ main (int argc, char *argv[])
memcpy (&args->strings[offset], kernel_argv[i], arg_len + 1); memcpy (&args->strings[offset], kernel_argv[i], arg_len + 1);
offset += arg_len; offset += arg_len;
} }
kernargs->heap_ptr = (int64_t) &kernargs->heap; kernargs->heap_ptr = (int64_t) heap;
kernargs->heap.size = heap_size; hsa_fns.hsa_memory_copy_fn (&heap->size, &heap_size, sizeof (heap_size));
/* Run constructors on the GPU. */ /* Run constructors on the GPU. */
run (init_array_kernel, kernargs); run (init_array_kernel, kernargs);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment