• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pim-book / programmers-introduction-to-mathematics / #987

pending completion
#987

push

travis-ci

GitHub
build(deps): bump decode-uri-component in /waves/javascript_demo

910 of 910 relevant lines covered (100.0%)

1.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

100.0
/neural_network/neural_network.py
1
import math
1✔
2
import random
1✔
3

4

5
class CachedNodeData:
1✔
6
    '''A simple cache for node-specific data used in evaluation and training
7
    for a single labeled example.
8

9
    For each field, the description of the field assumes the following:
10

11
    1. The node in question is a function f(w_1, w_2, ..., w_k, z_1, z_2, ..., z_m),
12
    where w_i are tunable parameters, and z_i are the inputs to the function
13
    being computed. For example, a linear node would have w_i be the weights, z_i
14
    be the inputs, k = m, with f defined as
15

16
        (z_1, ..., z_m) -> sum(w_i * z_i for i in range(m)).
17

18
    2. The global error function computed by the network is
19

20
        E(a_1, ..., a_M, x_1, ..., x_N, y),
21

22
    where the a_i are the collection of tunable parameters of all the nodes in the
23
    network, and the x_i are the inputs to the graph, and y is the expected label.
24

25
    3. A specific labeled example ((x_1, ..., x_N), y) to the entire graph is
26
    fixed, along with the current set of all tunable parameters. All derivatives
27
    are evaluated at these values.
28

29
    The values stored in the cache are:
30

31
    - output: the output of this node for the specific input
32
    - local_gradient: [∂f/∂z_1, ∂f/∂z_2, ..., ∂f/∂z_m]
33
    - global_gradient: ∂E/∂f
34
    - local_parameter_gradient: [∂f/∂w_1, ∂f/∂w_2, ..., ∂f/∂w_k]
35
    - global_parameter_gradient: [∂E/∂w_1, ∂E/∂w_2, ..., ∂E/∂w_k]
36
    '''
37

38
    def __init__(self, output=None, local_gradient=None, global_gradient=None,
1✔
39
                local_parameter_gradient=None, global_parameter_gradient=None):
40
        self.output = output
1✔
41
        self.local_gradient = local_gradient
1✔
42
        self.global_gradient = global_gradient
1✔
43
        self.local_parameter_gradient = local_parameter_gradient
1✔
44
        self.global_parameter_gradient = global_parameter_gradient
1✔
45

46
    def __repr__(self):
47
        return (
48
            "CachedNodeData(output=" + repr( self.output ) + ", " +
49
            "local_gradient=" + repr( self.local_gradient ) + ", " +
50
            "global_gradient=" + repr( self.global_gradient ) + ", " +
51
            "local_parameter_gradient=" + repr( self.local_parameter_gradient ) + ", " +
52
            "global_parameter_gradient=" + repr( self.global_parameter_gradient ) + ")"
53
        )
54

55

56
class Node:
1✔
57
    '''A node of a computation graph.
58

59
    Attributes
60

61
    arguments: a list of inputs to the Node, which are other Nodes whose
62
    outputs are inputs to the computation performed by this Node.
63

64
    successors: a list of Nodes that contain this node as input. Used for
65
    a Node to compute its error as a function of the errors of the Nodes that
66
    contain this Node as an argument.
67

68
    parameters: A list containing the tunable parameters of this node.
69

70
    has_parameters: A bool that is True if and only if there is a parameters
71
    list set.
72

73
    cache: values that are stored during evaluation and training to avoid
74
    recomputing intermediate values in the network.
75

76
    Children of this class implement
77

78
    compute_output: [float] -> float
79
    compute_local_gradient: None -> [float]
80
    compute_local_parameter_gradient: None -> [float]
81
    compute_global_parameter_gradient: None -> [float]
82

83
    The output of compute_*_parameter_gradient corresponds to the values of
84
    self.parameters index by index. The output of compute_local gradient
85
    corresponds to the input vector, index by index.
86

87
    If the Node is a terminal node (one that computes error for a training example)
88
    it must also have a method compute_error: [float], int -> float
89
    '''
90

91
    def __init__(self, *arguments):
1✔
92
        self.has_parameters = False  # if this is True, child class must set self.parameters
1✔
93
        self.parameters = []
1✔
94
        self.arguments = arguments
1✔
95
        self.successors = []
1✔
96
        self.cache = CachedNodeData()
1✔
97

98
        # link argument successors to self
99
        for argument in self.arguments:
1✔
100
            argument.successors.append(self)
1✔
101

102
        '''Argument nodes z_i will query this node f(z_1, ..., z_k) for ∂f/∂z_i, so we need to keep
103
        track of the index for each argument node.'''
104
        self.argument_to_index = {node: index for (
1✔
105
            index, node) in enumerate(arguments)}
106

107
    def do_gradient_descent_step(self, step_size):
1✔
108
        '''The core gradient step subroutine: compute the gradient for each of this node's
109
        tunable parameters, step away from the gradient.'''
110
        if self.has_parameters:
1✔
111
            for i, gradient_entry in enumerate(self.global_parameter_gradient):
1✔
112
                # step away from the gradient
113
                self.parameters[i] -= step_size * gradient_entry
1✔
114

115
    '''Gradient computations which don't depend on the node's definition.'''
116

117
    def compute_global_gradient(self):
1✔
118
        '''Compute the derivative of the entire network with respect to this node.
119

120
        This method must be overridden for any output nodes that are terminal during
121
        training, such as an error node.
122
        '''
123
        return sum(
1✔
124
            successor.global_gradient *
125
            successor.local_gradient_for_argument(self)
126
            for successor in self.successors
127
        )
128

129
    def local_gradient_for_argument(self, argument):
1✔
130
        '''Return the derivative of this node with respect to a particular argument.'''
131
        argument_index = self.argument_to_index[argument]
1✔
132
        return self.local_gradient[argument_index]
1✔
133

134
    '''Cache lookups, computing on cache miss.'''
135

136
    def evaluate(self, inputs):
1✔
137
        if self.cache.output is None:
1✔
138
            self.cache.output = self.compute_output(inputs)
1✔
139
        return self.cache.output
1✔
140

141
    @property
1✔
142
    def output(self):
143
        if self.cache.output is None:
1✔
144
            raise Exception("Tried to query output not present in cache.")
1✔
145
        return self.cache.output
1✔
146

147
    @property
1✔
148
    def local_gradient(self):
149
        if self.cache.local_gradient is None:
1✔
150
            self.cache.local_gradient = self.compute_local_gradient()
1✔
151
        return self.cache.local_gradient
1✔
152

153
    @property
1✔
154
    def global_gradient(self):
155
        if self.cache.global_gradient is None:
1✔
156
            self.cache.global_gradient = self.compute_global_gradient()
1✔
157
        return self.cache.global_gradient
1✔
158

159
    @property
1✔
160
    def local_parameter_gradient(self):
161
        if self.cache.local_parameter_gradient is None:
1✔
162
            self.cache.local_parameter_gradient = self.compute_local_parameter_gradient()
1✔
163
        return self.cache.local_parameter_gradient
1✔
164

165
    @property
1✔
166
    def global_parameter_gradient(self):
167
        if self.cache.global_parameter_gradient is None:
1✔
168
            self.cache.global_parameter_gradient = self.compute_global_parameter_gradient()
1✔
169
        return self.cache.global_parameter_gradient
1✔
170

171
    def compute_output(self, inputs):
1✔
172
        raise NotImplementedError()
173

174
    def compute_local_parameter_gradient(self):
1✔
175
        raise NotImplementedError()
176

177
    def compute_local_gradient(self):
1✔
178
        raise NotImplementedError()
179

180
    def compute_global_parameter_gradient(self):
1✔
181
        raise NotImplementedError()
182

183

184
class InputNode(Node):
1✔
185
    '''A Node representing an input to the computation graph.'''
186
    def __init__(self, input_index):
1✔
187
        super().__init__()
1✔
188
        self.input_index = input_index
1✔
189

190
    def compute_output(self, inputs):
1✔
191
        return inputs[self.input_index]
1✔
192

193
    @staticmethod
1✔
194
    def make_input_nodes(count):
195
        '''A helper function so the user doesn't have to keep track of
196
           the input indexes.
197
        '''
198
        return [InputNode(i) for i in range(count)]
1✔
199

200
    def pretty_print(self, tabs=0):
1✔
201
        prefix = "  " * tabs
1✔
202
        return "{}InputNode({}) output = {:.2f}".format(
1✔
203
            prefix, self.input_index, self.output)
204

205

206
class ReluNode(Node):
1✔
207
    '''A node for a rectified linear unit (ReLU), i.e. the one-input,
208
    one-output function relu(x) = max(0, x).
209
    '''
210

211
    def compute_output(self, inputs):
1✔
212
        argument_value = self.arguments[0].evaluate(inputs)
1✔
213
        return max(0, argument_value)
1✔
214

215
    def compute_local_gradient(self):
1✔
216
        last_input = self.arguments[0].output
1✔
217
        return [1 if last_input > 0 else 0]
1✔
218

219
    def compute_local_parameter_gradient(self):
1✔
220
        return []  # No tunable parameters
1✔
221

222
    def compute_global_parameter_gradient(self):
1✔
223
        return []  # No tunable parameters
1✔
224

225
    def pretty_print(self, tabs=0):
1✔
226
        prefix = "  " * tabs
1✔
227
        return "{}Relu output={:.2f}\n{}\n".format(
1✔
228
            prefix,
229
            self.output,
230
            self.arguments[0].pretty_print(tabs + 1))
231

232

233
class SigmoidNode(Node):
1✔
234
    '''A node for a classical sigmoid unit, i.e. the one-input,
235
    one-output function s(x) = e^x / (e^x + 1)
236
    '''
237

238
    def compute_output(self, inputs):
1✔
239
        argument_value = self.arguments[0].evaluate(inputs)
1✔
240
        exp_value = math.exp(argument_value)
1✔
241
        return exp_value / (exp_value + 1)
1✔
242

243
    def compute_local_gradient(self):
1✔
244
        last_output = self.output
1✔
245
        return [(1 - last_output) * last_output]
1✔
246

247
    def compute_local_parameter_gradient(self):
1✔
248
        return []  # No tunable parameters
1✔
249

250
    def compute_global_parameter_gradient(self):
1✔
251
        return []  # No tunable parameters
1✔
252

253
    def pretty_print(self, tabs=0):
1✔
254
        prefix = "  " * tabs
1✔
255
        return "{}Sigmoid output={:.2f}\n{}\n".format(
1✔
256
            prefix,
257
            self.output,
258
            self.arguments[0].pretty_print(tabs + 1))
259

260

261
class ConstantNode(Node):
1✔
262
    '''A constant (untrainable) node, used as the input to the "bias" entry
263
       of a linear node.'''
264

265
    def compute_output(self, inputs):
1✔
266
        return 1
1✔
267

268
    def pretty_print(self, tabs=0):
1✔
269
        prefix = "  " * tabs
1✔
270
        return "{}Constant(1)".format(prefix)
1✔
271

272

273
class LinearNode(Node):
1✔
274
    '''A node for a linear node, i.e., the function with n inputs and n weights that
275
       computes sum(w * x for (w, x) in zip(weights, inputs)).'''
276

277
    def __init__(self, arguments, initial_weights=None):
1✔
278
        '''If the initial_weights are provided, they must be one longer
279
        than the number of arguments, and the first entry must correspond
280
        to the bias.
281
        '''
282
        super().__init__(ConstantNode(), *arguments)  # first arg is the bias
1✔
283
        self.initialize_weights(initial_weights)
1✔
284
        self.has_parameters = True
1✔
285
        self.parameters = self.weights  # name alias
1✔
286

287
    def initialize_weights(self, initial_weights):
1✔
288
        arglen = len(self.arguments)
1✔
289
        if initial_weights:
1✔
290
            if len(initial_weights) != arglen:
1✔
291
                raise Exception(
1✔
292
                    "Invalid initial_weights length {:d}".format(len(initial_weights)))
293
            self.weights = initial_weights
1✔
294
        else:
295
            # set the initial weights randomly, according to a heuristic distribution
296
            weight_bound = 1.0 / math.sqrt(arglen)
1✔
297
            self.weights = [
1✔
298
                random.uniform(-weight_bound, weight_bound) for _ in range(arglen)]
299

300
    def compute_output(self, inputs):
1✔
301
        return sum(
1✔
302
            w * x.evaluate(inputs)
303
            for (w, x) in zip(self.weights, self.arguments)
304
        )
305

306
    def compute_local_gradient(self):
1✔
307
        return self.weights
1✔
308

309
    def compute_local_parameter_gradient(self):
1✔
310
        return [arg.output for arg in self.arguments]
1✔
311

312
    def compute_global_parameter_gradient(self):
1✔
313
        return [
1✔
314
            self.global_gradient *
315
            self.local_parameter_gradient_for_argument(argument)
316
            for argument in self.arguments
317
        ]
318

319
    def local_parameter_gradient_for_argument(self, argument):
1✔
320
        '''Return the derivative of this node with respect to the weight
321
        associated with a particular argument.'''
322
        argument_index = self.argument_to_index[argument]
1✔
323
        return self.local_parameter_gradient[argument_index]
1✔
324

325
    def pretty_print(self, tabs=0):
1✔
326
        argument_strs = '\n'.join(arg.pretty_print(tabs + 1)
1✔
327
                                  for arg in self.arguments)
328
        prefix = "  " * tabs
1✔
329
        weights = ','.join(['%.2f' % w for w in self.weights])
1✔
330
        gradient = ','.join(
1✔
331
            ['%.2f' % w for w in self.global_parameter_gradient])
332
        return "{}Linear weights={} gradient={} output={:.2f}\n{}\n".format(
1✔
333
            prefix, weights, gradient, self.output, argument_strs)
334

335

336
class L2ErrorNode(Node):
1✔
337
    '''A node computing the squared deviation error function.
338

339
    The function is f(z(x), y) = (z(x) - y)^2, where (x, y) is a labeled
340
    example and z(x) is the rest of the computation graph.
341
    '''
342

343
    def compute_error(self, inputs, label):
1✔
344
        argument_value = self.arguments[0].evaluate(inputs)
1✔
345
        self.label = label  # cache the label
1✔
346
        return (argument_value - label) ** 2
1✔
347

348
    def compute_local_gradient(self):
1✔
349
        last_input = self.arguments[0].output
1✔
350
        return [2 * (last_input - self.label)]
1✔
351

352
    def compute_global_gradient(self):
1✔
353
        return 1
1✔
354

355

356
class NeuralNetwork:
1✔
357
    '''A wrapper class for a computation graph, which encapsulates the
358
    backpropagation algorithm and training.
359
    '''
360
    def __init__(self, terminal_node, input_nodes, error_node=None, step_size=None):
1✔
361
        self.terminal_node = terminal_node
1✔
362
        self.input_nodes = input_nodes
1✔
363
        self.error_node = error_node or L2ErrorNode(self.terminal_node)
1✔
364
        self.step_size = step_size or 1e-2
1✔
365
        self.reset()
1✔
366

367
    def for_each(self, func):
1✔
368
        '''Walk the graph and apply func to each node.'''
369
        nodes_to_process = set([self.error_node])
1✔
370
        processed = set()
1✔
371

372
        while nodes_to_process:
1✔
373
            node = nodes_to_process.pop()
1✔
374
            func(node)
1✔
375
            processed.add(node)
1✔
376
            nodes_to_process |= set(node.arguments) - processed
1✔
377

378
    def reset(self):
1✔
379
        def reset_one(node):
1✔
380
            node.cache = CachedNodeData()
1✔
381
        self.for_each(reset_one)
1✔
382

383
    def evaluate(self, inputs):
1✔
384
        '''Evaluate the computation graph on a single set of inputs.'''
385
        self.reset()
1✔
386
        return self.terminal_node.evaluate(inputs)
1✔
387

388
    def compute_error(self, inputs, label):
1✔
389
        '''Compute the error for a given labeled example.'''
390
        self.reset()
1✔
391
        return self.error_node.compute_error(inputs, label)
1✔
392

393
    def backpropagation_step(self, inputs, label, step_size=None):
1✔
394
        self.compute_error(inputs, label)
1✔
395
        self.for_each(lambda node: node.do_gradient_descent_step(step_size))
1✔
396

397
    def train(self, dataset, max_steps=10000):
1✔
398
        '''Train the neural network on a dataset.
399

400
        Args:
401
            dataset: a list of pairs ([float], int) where the first entry is
402
            the data point and the second is the label.
403
            max_steps: the number of steps to train for.
404

405
        Returns:
406
            None (self is modified)
407
        '''
408
        for i in range(max_steps):
1✔
409
            inputs, label = random.choice(dataset)
1✔
410
            self.backpropagation_step(inputs, label, self.step_size)
1✔
411

412
            if i % int(max_steps / 10) == 0:
1✔
413
                print('{:2.1f}%'.format(100 * i / max_steps))
1✔
414

415
    def error_on_dataset(self, dataset):
1✔
416
        errors = 0
1✔
417
        total = len(dataset)
1✔
418

419
        for (example, label) in dataset:
1✔
420
            if round(self.evaluate(example)) != label:
1✔
421
                errors += 1
1✔
422

423
        return errors / total
1✔
424

425
    def pretty_print(self):
1✔
426
        return self.terminal_node.pretty_print()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc