build_gcn.py 11.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Building a Graph Convolutional Network
=====================
20 21
**Author**: `Yulun Yao <https://yulunyao.io/>`_, \
            `Chien-Yu Lin <https://homes.cs.washington.edu/~cyulin/>`_
22 23 24 25 26 27 28 29 30

This article is an introductory tutorial to build a Graph Convolutional Network (GCN) with Relay.

In this tutorial, we will run our GCN on Cora dataset to demonstrate.

Cora dataset is a common benchmark for Graph Neural Networks (GNN) and frameworks that support GNN training and inference.

We directly load the dataset from DGL library to do the apples to apples comparison against DGL.

31
Please refer to DGL doc for DGL installation at
32 33
https://docs.dgl.ai/install/index.html

34 35
and refer to PyTorch guide for PyTorch installation at
https://pytorch.org/get-started/locally/
36 37
"""

38 39 40 41 42 43 44 45 46 47 48

######################################################################
# Define GCN in DGL with PyTorch backend
# ------------------
#
# DGL example: https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn
# This part reuses the code from the above example
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
49
import networkx as nx
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
from dgl.nn.pytorch import GraphConv

class GCN(nn.Module):
    def __init__(self,
                 g,
                 n_infeat,
                 n_hidden,
                 n_classes,
                 n_layers,
                 activation):
        super(GCN, self).__init__()
        self.g = g
        self.layers = nn.ModuleList()
        self.layers.append(GraphConv(n_infeat, n_hidden, activation=activation))
        for i in range(n_layers - 1):
            self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
        self.layers.append(GraphConv(n_hidden, n_classes))

    def forward(self, features):
        h = features
        for i, layer in enumerate(self.layers):
            # handle api changes for differnt DGL version
            if dgl.__version__ > '0.3':
                h = layer(self.g, h)
            else:
                h = layer(h, self.g)
        return h


######################################################################
# Define the functions to load dataset and evaluate accuracy
# ------------------
# You may substitute this part with your own dataset, here we load data from DGL
from dgl.data import load_data
from collections import namedtuple

def load_dataset(dataset="cora"):
    args = namedtuple("args", ["dataset"])
    data = load_data(args(dataset))

    # Remove self-loops to avoid duplicate passing of a node's feature to itself
    g = data.graph
92
    g.remove_edges_from(nx.selfloop_edges(g))
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
    g.add_edges_from(zip(g.nodes, g.nodes))

    return g, data


def evaluate(data, logits):
    test_mask = data.test_mask # the test set which isn't included in the training phase

    pred = logits.argmax(axis=1)
    acc = ((pred == data.labels) * test_mask).sum() / test_mask.sum()

    return acc


######################################################################
# Load the data and set up model parameters
# ------------------
"""
Parameters
----------
dataset: str
114
    Name of dataset. You can choose from ['cora', 'citeseer', 'pubmed'].
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174

num_layer: int
    number of hidden layers

num_hidden: int
    number of the hidden units in the hidden layer

infeat_dim: int
    dimension of the input features

num_classes: int
    dimension of model output (Number of classes)
"""
dataset = "cora"

g, data = load_dataset(dataset)

num_layers = 1
num_hidden = 16
infeat_dim = data.features.shape[1]
num_classes = data.num_labels

######################################################################
# Set up the DGL-PyTorch model and get the golden results
# ------------------
#
# The weights are trained with https://github.com/dmlc/dgl/blob/master/examples/pytorch/gcn/train.py
from tvm.contrib.download import download_testdata
from dgl import DGLGraph

features = torch.FloatTensor(data.features)
dgl_g = DGLGraph(g)

torch_model = GCN(dgl_g,
                  infeat_dim,
                  num_hidden,
                  num_classes,
                  num_layers,
                  F.relu)

# Download the pretrained weights
model_url = "https://homes.cs.washington.edu/~cyulin/media/gnn_model/gcn_%s.torch"%(dataset)
model_path = download_testdata(model_url, "gcn_%s.pickle"%(dataset), module='gcn_model')

# Load the weights into the model
torch_model.load_state_dict(torch.load(model_path))


######################################################################
# Run the DGL model and test for accuracy
# ------------------
torch_model.eval()
with torch.no_grad():
    logits_torch = torch_model(features)
print("Print the first five outputs from DGL-PyTorch execution\n", logits_torch[:5])

acc = evaluate(data, logits_torch.numpy())
print("Test accuracy of DGL results: {:.2%}".format(acc))


175
######################################################################
176
# Define Graph Convolution Layer in Relay
177 178 179 180 181 182 183 184 185 186 187 188 189 190
# ----------------------------
# To run GCN on TVM, we first need to implement Graph Convolution Layer.
#
# You may refer to https://github.com/dmlc/dgl/blob/master/python/dgl/nn/mxnet/conv.py for a GraphConv Layer implemented in DGL with MXNet Backend
#
# The layer is defined with below operations, note that we apply two transposes to keep adjacency matrix on right hand side of sparse_dense operator,
# this method is temporary and will be updated in next few weeks when we have sparse matrix transpose and support for left sparse operator.
#
#  .. math::
#
#            \mbox{GraphConv}(A, H, W)   = A * H * W
#                                        = ((H * W)^t * A^t)^t
#                                        = ((W^t * H^t) * A^t)^t
from tvm import relay
191 192 193 194 195 196 197 198 199 200 201 202
from tvm.contrib import graph_runtime
import tvm

def GraphConv(layer_name,
              input_dim,
              output_dim,
              adj,
              input,
              norm=None,
              bias=True,
              activation=None):
    """
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
    Parameters
    ----------
    layer_name: str
    Name of layer

    input_dim: int
    Input dimension per node feature

    output_dim: int,
    Output dimension per node feature

    adj: namedtuple,
    Graph representation (Adjacency Matrix) in Sparse Format (`data`, `indices`, `indptr`),
    where `data` has shape [num_nonzeros], indices` has shape [num_nonzeros], `indptr` has shape [num_nodes + 1]

    input: relay.Expr,
    Input feature to current layer with shape [num_nodes, input_dim]

    norm: relay.Expr,
    Norm passed to this layer to normalize features before and after Convolution.

224 225 226
    bias: bool
    Set bias to True to add bias when doing GCN layer

227 228 229 230 231 232 233 234 235 236
    activation: <function relay.op.nn>,
    Activation function applies to the output. e.g. relay.nn.{relu, sigmoid, log_softmax, softmax, leaky_relu}

    Returns
    ----------
    output: tvm.relay.Expr
    The Output Tensor for this layer [num_nodes, output_dim]
    """
    if norm is not None:
        input = relay.multiply(input, norm)
237 238 239 240

    weight = relay.var(layer_name + ".weight", shape=(input_dim, output_dim))
    weight_t = relay.transpose(weight)
    dense = relay.nn.dense(weight_t, input)
241
    output = relay.nn.sparse_dense(dense, adj)
242
    output_t = relay.transpose(output)
243
    if norm is not None:
244 245 246 247
        output_t = relay.multiply(output_t, norm)
    if bias is True:
        _bias = relay.var(layer_name + ".bias", shape=(output_dim, 1))
        output_t = relay.nn.bias_add(output_t, _bias, axis=-1)
248
    if activation is not None:
249 250
        output_t = activation(output_t)
    return output_t
251 252

######################################################################
253
# Prepare the parameters needed in the GraphConv layers
254
# ------------------
255
#
256 257 258
import numpy as np
import networkx as nx

259
def prepare_params(g, data):
260
    params = {}
261
    params['infeats'] = data.features.astype('float32') # Only support float32 as feature for now
262 263 264

    # Generate adjacency matrix
    adjacency = nx.to_scipy_sparse_matrix(g)
265
    params['g_data'] = adjacency.data.astype('float32')
266 267 268 269 270 271 272 273 274 275
    params['indices'] = adjacency.indices.astype('int32')
    params['indptr'] = adjacency.indptr.astype('int32')

    # Normalization w.r.t. node degrees
    degs = [g.in_degree[i] for i in range(g.number_of_nodes())]
    params['norm'] = np.power(degs, -0.5).astype('float32')
    params['norm'] = params['norm'].reshape((params['norm'].shape[0], 1))

    return params

276
params = prepare_params(g, data)
277

278
# Check shape of features and the validity of adjacency matrix
279
assert len(params['infeats'].shape) == 2
280 281
assert params['g_data'] is not None and params['indices'] is not None and params['indptr'] is not None
assert params['infeats'].shape[0] == params['indptr'].shape[0] - 1
282 283 284 285 286

######################################################################
# Put layers together
# ------------------

287 288
# Define input features, norms, adjacency matrix in Relay
infeats = relay.var("infeats", shape=data.features.shape)
289
norm = relay.Constant(tvm.nd.array(params['norm']))
290
g_data = relay.Constant(tvm.nd.array(params['g_data']))
291 292 293 294
indices = relay.Constant(tvm.nd.array(params['indices']))
indptr = relay.Constant(tvm.nd.array(params['indptr']))

Adjacency = namedtuple('Adjacency', ['data', 'indices', 'indptr'])
295
adj = Adjacency(g_data, indices, indptr)
296

297 298
# Construct the 2-layer GCN
layers = []
299
layers.append(GraphConv(
300 301 302 303 304 305 306
    layer_name="layers.0",
    input_dim=infeat_dim,
    output_dim=num_hidden,
    adj=adj,
    input=infeats,
    norm=norm,
    activation=relay.nn.relu
307 308
))
layers.append(GraphConv(
309 310 311 312 313 314 315
    layer_name="layers.1",
    input_dim=num_hidden,
    output_dim=num_classes,
    adj=adj,
    input=layers[-1],
    norm=norm,
    activation=None
316 317
))

318 319
# Analyze free variables and generate Relay function
output = layers[-1]
320 321 322
func = relay.Function(relay.analysis.free_vars(output), output)

######################################################################
323
# Compile and run with TVM
324
# ------------------
325 326 327 328
# Export the weigths from PyTorch model to Python Dict
model_params = {}
for param_tensor in torch_model.state_dict():
    model_params[param_tensor] = torch_model.state_dict()[param_tensor].numpy()
329

330 331 332
for i in range(num_layers+1):
    params["layers.%d.weight"%(i)] = model_params["layers.%d.weight"%(i)]
    params["layers.%d.bias"%(i)] = model_params["layers.%d.bias"%(i)]
333

334 335 336 337
# Set the TVM build target
target = 'llvm' # Currently only support `llvm` as target

# Build with Relay
338 339 340
with relay.build_config(opt_level=0): # Currently only support opt_level=0
    graph, lib, params = relay.build(func, target, params=params)

341
# Generate graph runtime
342 343 344 345
ctx = tvm.context(target, 0)
m = graph_runtime.create(graph, lib, ctx)
m.set_input(**params)

346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
######################################################################
# Run the TVM model, test for accuracy and verify with DGL
# ------------------
m.run()
logits_tvm = m.get_output(0).asnumpy()
print("Print the first five outputs from TVM execution\n", logits_tvm[:5])

labels = data.labels
test_mask = data.test_mask

acc = evaluate(data, logits_tvm)
print("Test accuracy of TVM results: {:.2%}".format(acc))

# Verify the results with the DGL model
tvm.testing.assert_allclose(logits_torch, logits_tvm, atol=1e-3)