#987

Build # #987

Build Type

push

travis-ci

Committed by GitHub

Commit Message

build(deps): bump decode-uri-component in /waves/javascript_demo

Run Details

910 of 910 relevant lines covered (100.0%)

1.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

100.0

/neural_network/neural_network.py

import math
import random


class CachedNodeData:
    '''A simple cache for node-specific data used in evaluation and training
    for a single labeled example.

    For each field, the description of the field assumes the following:

    1. The node in question is a function f(w_1, w_2, ..., w_k, z_1, z_2, ..., z_m),
    where w_i are tunable parameters, and z_i are the inputs to the function
    being computed. For example, a linear node would have w_i be the weights, z_i
    be the inputs, k = m, with f defined as

        (z_1, ..., z_m) -> sum(w_i * z_i for i in range(m)).

    2. The global error function computed by the network is

        E(a_1, ..., a_M, x_1, ..., x_N, y),

    where the a_i are the collection of tunable parameters of all the nodes in the
    network, and the x_i are the inputs to the graph, and y is the expected label.

    3. A specific labeled example ((x_1, ..., x_N), y) to the entire graph is
    fixed, along with the current set of all tunable parameters. All derivatives
    are evaluated at these values.

    The values stored in the cache are:

    - output: the output of this node for the specific input
    - local_gradient: [∂f/∂z_1, ∂f/∂z_2, ..., ∂f/∂z_m]
    - global_gradient: ∂E/∂f
    - local_parameter_gradient: [∂f/∂w_1, ∂f/∂w_2, ..., ∂f/∂w_k]
    - global_parameter_gradient: [∂E/∂w_1, ∂E/∂w_2, ..., ∂E/∂w_k]
    '''

    def __init__(self, output=None, local_gradient=None, global_gradient=None,
                local_parameter_gradient=None, global_parameter_gradient=None):
        self.output = output
        self.local_gradient = local_gradient
        self.global_gradient = global_gradient
        self.local_parameter_gradient = local_parameter_gradient
        self.global_parameter_gradient = global_parameter_gradient

    def __repr__(self):
        return (
            "CachedNodeData(output=" + repr( self.output ) + ", " +
            "local_gradient=" + repr( self.local_gradient ) + ", " +
            "global_gradient=" + repr( self.global_gradient ) + ", " +
            "local_parameter_gradient=" + repr( self.local_parameter_gradient ) + ", " +
            "global_parameter_gradient=" + repr( self.global_parameter_gradient ) + ")"
        )


class Node:
    '''A node of a computation graph.

    Attributes

    arguments: a list of inputs to the Node, which are other Nodes whose
    outputs are inputs to the computation performed by this Node.

    successors: a list of Nodes that contain this node as input. Used for
    a Node to compute its error as a function of the errors of the Nodes that
    contain this Node as an argument.

    parameters: A list containing the tunable parameters of this node.

    has_parameters: A bool that is True if and only if there is a parameters
    list set.

    cache: values that are stored during evaluation and training to avoid
    recomputing intermediate values in the network.

    Children of this class implement

    compute_output: [float] -> float
    compute_local_gradient: None -> [float]
    compute_local_parameter_gradient: None -> [float]
    compute_global_parameter_gradient: None -> [float]

    The output of compute_*_parameter_gradient corresponds to the values of
    self.parameters index by index. The output of compute_local gradient
    corresponds to the input vector, index by index.

    If the Node is a terminal node (one that computes error for a training example)
    it must also have a method compute_error: [float], int -> float
    '''

    def __init__(self, *arguments):
        self.has_parameters = False  # if this is True, child class must set self.parameters
        self.parameters = []
        self.arguments = arguments
        self.successors = []
        self.cache = CachedNodeData()

        # link argument successors to self
        for argument in self.arguments:
            argument.successors.append(self)

        '''Argument nodes z_i will query this node f(z_1, ..., z_k) for ∂f/∂z_i, so we need to keep
        track of the index for each argument node.'''
        self.argument_to_index = {node: index for (
            index, node) in enumerate(arguments)}

    def do_gradient_descent_step(self, step_size):
        '''The core gradient step subroutine: compute the gradient for each of this node's
        tunable parameters, step away from the gradient.'''
        if self.has_parameters:
            for i, gradient_entry in enumerate(self.global_parameter_gradient):
                # step away from the gradient
                self.parameters[i] -= step_size * gradient_entry

    '''Gradient computations which don't depend on the node's definition.'''

    def compute_global_gradient(self):
        '''Compute the derivative of the entire network with respect to this node.

        This method must be overridden for any output nodes that are terminal during
        training, such as an error node.
        '''
        return sum(
            successor.global_gradient *
            successor.local_gradient_for_argument(self)
            for successor in self.successors
        )

    def local_gradient_for_argument(self, argument):
        '''Return the derivative of this node with respect to a particular argument.'''
        argument_index = self.argument_to_index[argument]
        return self.local_gradient[argument_index]

    '''Cache lookups, computing on cache miss.'''

    def evaluate(self, inputs):
        if self.cache.output is None:
            self.cache.output = self.compute_output(inputs)
        return self.cache.output

    @property
    def output(self):
        if self.cache.output is None:
            raise Exception("Tried to query output not present in cache.")
        return self.cache.output

    @property
    def local_gradient(self):
        if self.cache.local_gradient is None:
            self.cache.local_gradient = self.compute_local_gradient()
        return self.cache.local_gradient

    @property
    def global_gradient(self):
        if self.cache.global_gradient is None:
            self.cache.global_gradient = self.compute_global_gradient()
        return self.cache.global_gradient

    @property
    def local_parameter_gradient(self):
        if self.cache.local_parameter_gradient is None:
            self.cache.local_parameter_gradient = self.compute_local_parameter_gradient()
        return self.cache.local_parameter_gradient

    @property
    def global_parameter_gradient(self):
        if self.cache.global_parameter_gradient is None:
            self.cache.global_parameter_gradient = self.compute_global_parameter_gradient()
        return self.cache.global_parameter_gradient

    def compute_output(self, inputs):
        raise NotImplementedError()

    def compute_local_parameter_gradient(self):
        raise NotImplementedError()

    def compute_local_gradient(self):
        raise NotImplementedError()

    def compute_global_parameter_gradient(self):
        raise NotImplementedError()


class InputNode(Node):
    '''A Node representing an input to the computation graph.'''
    def __init__(self, input_index):
        super().__init__()
        self.input_index = input_index

    def compute_output(self, inputs):
        return inputs[self.input_index]

    @staticmethod
    def make_input_nodes(count):
        '''A helper function so the user doesn't have to keep track of
           the input indexes.
        '''
        return [InputNode(i) for i in range(count)]

    def pretty_print(self, tabs=0):
        prefix = "  " * tabs
        return "{}InputNode({}) output = {:.2f}".format(
            prefix, self.input_index, self.output)


class ReluNode(Node):
    '''A node for a rectified linear unit (ReLU), i.e. the one-input,
    one-output function relu(x) = max(0, x).
    '''

    def compute_output(self, inputs):
        argument_value = self.arguments[0].evaluate(inputs)
        return max(0, argument_value)

    def compute_local_gradient(self):
        last_input = self.arguments[0].output
        return [1 if last_input > 0 else 0]

    def compute_local_parameter_gradient(self):
        return []  # No tunable parameters

    def compute_global_parameter_gradient(self):
        return []  # No tunable parameters

    def pretty_print(self, tabs=0):
        prefix = "  " * tabs
        return "{}Relu output={:.2f}\n{}\n".format(
            prefix,
            self.output,
            self.arguments[0].pretty_print(tabs + 1))


class SigmoidNode(Node):
    '''A node for a classical sigmoid unit, i.e. the one-input,
    one-output function s(x) = e^x / (e^x + 1)
    '''

    def compute_output(self, inputs):
        argument_value = self.arguments[0].evaluate(inputs)
        exp_value = math.exp(argument_value)
        return exp_value / (exp_value + 1)

    def compute_local_gradient(self):
        last_output = self.output
        return [(1 - last_output) * last_output]

    def compute_local_parameter_gradient(self):
        return []  # No tunable parameters

    def compute_global_parameter_gradient(self):
        return []  # No tunable parameters

    def pretty_print(self, tabs=0):
        prefix = "  " * tabs
        return "{}Sigmoid output={:.2f}\n{}\n".format(
            prefix,
            self.output,
            self.arguments[0].pretty_print(tabs + 1))


class ConstantNode(Node):
    '''A constant (untrainable) node, used as the input to the "bias" entry
       of a linear node.'''

    def compute_output(self, inputs):
        return 1

    def pretty_print(self, tabs=0):
        prefix = "  " * tabs
        return "{}Constant(1)".format(prefix)


class LinearNode(Node):
    '''A node for a linear node, i.e., the function with n inputs and n weights that
       computes sum(w * x for (w, x) in zip(weights, inputs)).'''

    def __init__(self, arguments, initial_weights=None):
        '''If the initial_weights are provided, they must be one longer
        than the number of arguments, and the first entry must correspond
        to the bias.
        '''
        super().__init__(ConstantNode(), *arguments)  # first arg is the bias
        self.initialize_weights(initial_weights)
        self.has_parameters = True
        self.parameters = self.weights  # name alias

    def initialize_weights(self, initial_weights):
        arglen = len(self.arguments)
        if initial_weights:
            if len(initial_weights) != arglen:
                raise Exception(
                    "Invalid initial_weights length {:d}".format(len(initial_weights)))
            self.weights = initial_weights
        else:
            # set the initial weights randomly, according to a heuristic distribution
            weight_bound = 1.0 / math.sqrt(arglen)
            self.weights = [
                random.uniform(-weight_bound, weight_bound) for _ in range(arglen)]

    def compute_output(self, inputs):
        return sum(
            w * x.evaluate(inputs)
            for (w, x) in zip(self.weights, self.arguments)
        )

    def compute_local_gradient(self):
        return self.weights

    def compute_local_parameter_gradient(self):
        return [arg.output for arg in self.arguments]

    def compute_global_parameter_gradient(self):
        return [
            self.global_gradient *
            self.local_parameter_gradient_for_argument(argument)
            for argument in self.arguments
        ]

    def local_parameter_gradient_for_argument(self, argument):
        '''Return the derivative of this node with respect to the weight
        associated with a particular argument.'''
        argument_index = self.argument_to_index[argument]
        return self.local_parameter_gradient[argument_index]

    def pretty_print(self, tabs=0):
        argument_strs = '\n'.join(arg.pretty_print(tabs + 1)
                                  for arg in self.arguments)
        prefix = "  " * tabs
        weights = ','.join(['%.2f' % w for w in self.weights])
        gradient = ','.join(
            ['%.2f' % w for w in self.global_parameter_gradient])
        return "{}Linear weights={} gradient={} output={:.2f}\n{}\n".format(
            prefix, weights, gradient, self.output, argument_strs)


class L2ErrorNode(Node):
    '''A node computing the squared deviation error function.

    The function is f(z(x), y) = (z(x) - y)^2, where (x, y) is a labeled
    example and z(x) is the rest of the computation graph.
    '''

    def compute_error(self, inputs, label):
        argument_value = self.arguments[0].evaluate(inputs)
        self.label = label  # cache the label
        return (argument_value - label) ** 2

    def compute_local_gradient(self):
        last_input = self.arguments[0].output
        return [2 * (last_input - self.label)]

    def compute_global_gradient(self):
        return 1


class NeuralNetwork:
    '''A wrapper class for a computation graph, which encapsulates the
    backpropagation algorithm and training.
    '''
    def __init__(self, terminal_node, input_nodes, error_node=None, step_size=None):
        self.terminal_node = terminal_node
        self.input_nodes = input_nodes
        self.error_node = error_node or L2ErrorNode(self.terminal_node)
        self.step_size = step_size or 1e-2
        self.reset()

    def for_each(self, func):
        '''Walk the graph and apply func to each node.'''
        nodes_to_process = set([self.error_node])
        processed = set()

        while nodes_to_process:
            node = nodes_to_process.pop()
            func(node)
            processed.add(node)
            nodes_to_process |= set(node.arguments) - processed

    def reset(self):
        def reset_one(node):
            node.cache = CachedNodeData()
        self.for_each(reset_one)

    def evaluate(self, inputs):
        '''Evaluate the computation graph on a single set of inputs.'''
        self.reset()
        return self.terminal_node.evaluate(inputs)

    def compute_error(self, inputs, label):
        '''Compute the error for a given labeled example.'''
        self.reset()
        return self.error_node.compute_error(inputs, label)

    def backpropagation_step(self, inputs, label, step_size=None):
        self.compute_error(inputs, label)
        self.for_each(lambda node: node.do_gradient_descent_step(step_size))

    def train(self, dataset, max_steps=10000):
        '''Train the neural network on a dataset.

        Args:
            dataset: a list of pairs ([float], int) where the first entry is
            the data point and the second is the label.
            max_steps: the number of steps to train for.

        Returns:
            None (self is modified)
        '''
        for i in range(max_steps):
            inputs, label = random.choice(dataset)
            self.backpropagation_step(inputs, label, self.step_size)

            if i % int(max_steps / 10) == 0:
                print('{:2.1f}%'.format(100 * i / max_steps))

    def error_on_dataset(self, dataset):
        errors = 0
        total = len(dataset)

        for (example, label) in dataset:
            if round(self.evaluate(example)) != label:
                errors += 1

        return errors / total

    def pretty_print(self):
        return self.terminal_node.pretty_print()

1	import math	1✔
2	import random	1✔
3
4
5	class CachedNodeData:	1✔
6	'''A simple cache for node-specific data used in evaluation and training
7	for a single labeled example.
8
9	For each field, the description of the field assumes the following:
10
11	1. The node in question is a function f(w_1, w_2, ..., w_k, z_1, z_2, ..., z_m),
12	where w_i are tunable parameters, and z_i are the inputs to the function
13	being computed. For example, a linear node would have w_i be the weights, z_i
14	be the inputs, k = m, with f defined as
15
16	(z_1, ..., z_m) -> sum(w_i * z_i for i in range(m)).
17
18	2. The global error function computed by the network is
19
20	E(a_1, ..., a_M, x_1, ..., x_N, y),
21
22	where the a_i are the collection of tunable parameters of all the nodes in the
23	network, and the x_i are the inputs to the graph, and y is the expected label.
24
25	3. A specific labeled example ((x_1, ..., x_N), y) to the entire graph is
26	fixed, along with the current set of all tunable parameters. All derivatives
27	are evaluated at these values.
28
29	The values stored in the cache are:
30
31	- output: the output of this node for the specific input
32	- local_gradient: [∂f/∂z_1, ∂f/∂z_2, ..., ∂f/∂z_m]
33	- global_gradient: ∂E/∂f
34	- local_parameter_gradient: [∂f/∂w_1, ∂f/∂w_2, ..., ∂f/∂w_k]
35	- global_parameter_gradient: [∂E/∂w_1, ∂E/∂w_2, ..., ∂E/∂w_k]
36	'''
37
38	def __init__(self, output=None, local_gradient=None, global_gradient=None,	1✔
39	local_parameter_gradient=None, global_parameter_gradient=None):
40	self.output = output	1✔
41	self.local_gradient = local_gradient	1✔
42	self.global_gradient = global_gradient	1✔
43	self.local_parameter_gradient = local_parameter_gradient	1✔
44	self.global_parameter_gradient = global_parameter_gradient	1✔
45
46	def __repr__(self):
47	return (
48	"CachedNodeData(output=" + repr( self.output ) + ", " +
49	"local_gradient=" + repr( self.local_gradient ) + ", " +
50	"global_gradient=" + repr( self.global_gradient ) + ", " +
51	"local_parameter_gradient=" + repr( self.local_parameter_gradient ) + ", " +
52	"global_parameter_gradient=" + repr( self.global_parameter_gradient ) + ")"
53	)
54
55
56	class Node:	1✔
57	'''A node of a computation graph.
58
59	Attributes
60
61	arguments: a list of inputs to the Node, which are other Nodes whose
62	outputs are inputs to the computation performed by this Node.
63
64	successors: a list of Nodes that contain this node as input. Used for
65	a Node to compute its error as a function of the errors of the Nodes that
66	contain this Node as an argument.
67
68	parameters: A list containing the tunable parameters of this node.
69
70	has_parameters: A bool that is True if and only if there is a parameters
71	list set.
72
73	cache: values that are stored during evaluation and training to avoid
74	recomputing intermediate values in the network.
75
76	Children of this class implement
77
78	compute_output: [float] -> float
79	compute_local_gradient: None -> [float]
80	compute_local_parameter_gradient: None -> [float]
81	compute_global_parameter_gradient: None -> [float]
82
83	The output of compute_*_parameter_gradient corresponds to the values of
84	self.parameters index by index. The output of compute_local gradient
85	corresponds to the input vector, index by index.
86
87	If the Node is a terminal node (one that computes error for a training example)
88	it must also have a method compute_error: [float], int -> float
89	'''
90
91	def __init__(self, *arguments):	1✔
92	self.has_parameters = False # if this is True, child class must set self.parameters	1✔
93	self.parameters = []	1✔
94	self.arguments = arguments	1✔
95	self.successors = []	1✔
96	self.cache = CachedNodeData()	1✔
97
98	# link argument successors to self
99	for argument in self.arguments:	1✔
100	argument.successors.append(self)	1✔
101
102	'''Argument nodes z_i will query this node f(z_1, ..., z_k) for ∂f/∂z_i, so we need to keep
103	track of the index for each argument node.'''
104	self.argument_to_index = {node: index for (	1✔
105	index, node) in enumerate(arguments)}
106
107	def do_gradient_descent_step(self, step_size):	1✔
108	'''The core gradient step subroutine: compute the gradient for each of this node's
109	tunable parameters, step away from the gradient.'''
110	if self.has_parameters:	1✔
111	for i, gradient_entry in enumerate(self.global_parameter_gradient):	1✔
112	# step away from the gradient
113	self.parameters[i] -= step_size * gradient_entry	1✔
114
115	'''Gradient computations which don't depend on the node's definition.'''
116
117	def compute_global_gradient(self):	1✔
118	'''Compute the derivative of the entire network with respect to this node.
119
120	This method must be overridden for any output nodes that are terminal during
121	training, such as an error node.
122	'''
123	return sum(	1✔
124	successor.global_gradient *
125	successor.local_gradient_for_argument(self)
126	for successor in self.successors
127	)
128
129	def local_gradient_for_argument(self, argument):	1✔
130	'''Return the derivative of this node with respect to a particular argument.'''
131	argument_index = self.argument_to_index[argument]	1✔
132	return self.local_gradient[argument_index]	1✔
133
134	'''Cache lookups, computing on cache miss.'''
135
136	def evaluate(self, inputs):	1✔
137	if self.cache.output is None:	1✔
138	self.cache.output = self.compute_output(inputs)	1✔
139	return self.cache.output	1✔
140
141	@property	1✔
142	def output(self):
143	if self.cache.output is None:	1✔
144	raise Exception("Tried to query output not present in cache.")	1✔
145	return self.cache.output	1✔
146
147	@property	1✔
148	def local_gradient(self):
149	if self.cache.local_gradient is None:	1✔
150	self.cache.local_gradient = self.compute_local_gradient()	1✔
151	return self.cache.local_gradient	1✔
152
153	@property	1✔
154	def global_gradient(self):
155	if self.cache.global_gradient is None:	1✔
156	self.cache.global_gradient = self.compute_global_gradient()	1✔
157	return self.cache.global_gradient	1✔
158
159	@property	1✔
160	def local_parameter_gradient(self):
161	if self.cache.local_parameter_gradient is None:	1✔
162	self.cache.local_parameter_gradient = self.compute_local_parameter_gradient()	1✔
163	return self.cache.local_parameter_gradient	1✔
164
165	@property	1✔
166	def global_parameter_gradient(self):
167	if self.cache.global_parameter_gradient is None:	1✔
168	self.cache.global_parameter_gradient = self.compute_global_parameter_gradient()	1✔
169	return self.cache.global_parameter_gradient	1✔
170
171	def compute_output(self, inputs):	1✔
172	raise NotImplementedError()
173
174	def compute_local_parameter_gradient(self):	1✔
175	raise NotImplementedError()
176
177	def compute_local_gradient(self):	1✔
178	raise NotImplementedError()
179
180	def compute_global_parameter_gradient(self):	1✔
181	raise NotImplementedError()
182
183
184	class InputNode(Node):	1✔
185	'''A Node representing an input to the computation graph.'''
186	def __init__(self, input_index):	1✔
187	super().__init__()	1✔
188	self.input_index = input_index	1✔
189
190	def compute_output(self, inputs):	1✔
191	return inputs[self.input_index]	1✔
192
193	@staticmethod	1✔
194	def make_input_nodes(count):
195	'''A helper function so the user doesn't have to keep track of
196	the input indexes.
197	'''
198	return [InputNode(i) for i in range(count)]	1✔
199
200	def pretty_print(self, tabs=0):	1✔
201	prefix = " " * tabs	1✔
202	return "{}InputNode({}) output = {:.2f}".format(	1✔
203	prefix, self.input_index, self.output)
204
205
206	class ReluNode(Node):	1✔
207	'''A node for a rectified linear unit (ReLU), i.e. the one-input,
208	one-output function relu(x) = max(0, x).
209	'''
210
211	def compute_output(self, inputs):	1✔
212	argument_value = self.arguments[0].evaluate(inputs)	1✔
213	return max(0, argument_value)	1✔
214
215	def compute_local_gradient(self):	1✔
216	last_input = self.arguments[0].output	1✔
217	return [1 if last_input > 0 else 0]	1✔
218
219	def compute_local_parameter_gradient(self):	1✔
220	return [] # No tunable parameters	1✔
221
222	def compute_global_parameter_gradient(self):	1✔
223	return [] # No tunable parameters	1✔
224
225	def pretty_print(self, tabs=0):	1✔
226	prefix = " " * tabs	1✔
227	return "{}Relu output={:.2f}\n{}\n".format(	1✔
228	prefix,
229	self.output,
230	self.arguments[0].pretty_print(tabs + 1))
231
232
233	class SigmoidNode(Node):	1✔
234	'''A node for a classical sigmoid unit, i.e. the one-input,
235	one-output function s(x) = e^x / (e^x + 1)
236	'''
237
238	def compute_output(self, inputs):	1✔
239	argument_value = self.arguments[0].evaluate(inputs)	1✔
240	exp_value = math.exp(argument_value)	1✔
241	return exp_value / (exp_value + 1)	1✔
242
243	def compute_local_gradient(self):	1✔
244	last_output = self.output	1✔
245	return [(1 - last_output) * last_output]	1✔
246
247	def compute_local_parameter_gradient(self):	1✔
248	return [] # No tunable parameters	1✔
249
250	def compute_global_parameter_gradient(self):	1✔
251	return [] # No tunable parameters	1✔
252
253	def pretty_print(self, tabs=0):	1✔
254	prefix = " " * tabs	1✔
255	return "{}Sigmoid output={:.2f}\n{}\n".format(	1✔
256	prefix,
257	self.output,
258	self.arguments[0].pretty_print(tabs + 1))
259
260
261	class ConstantNode(Node):	1✔
262	'''A constant (untrainable) node, used as the input to the "bias" entry
263	of a linear node.'''
264
265	def compute_output(self, inputs):	1✔
266	return 1	1✔
267
268	def pretty_print(self, tabs=0):	1✔
269	prefix = " " * tabs	1✔
270	return "{}Constant(1)".format(prefix)	1✔
271
272
273	class LinearNode(Node):	1✔
274	'''A node for a linear node, i.e., the function with n inputs and n weights that
275	computes sum(w * x for (w, x) in zip(weights, inputs)).'''
276
277	def __init__(self, arguments, initial_weights=None):	1✔
278	'''If the initial_weights are provided, they must be one longer
279	than the number of arguments, and the first entry must correspond
280	to the bias.
281	'''
282	super().__init__(ConstantNode(), *arguments) # first arg is the bias	1✔
283	self.initialize_weights(initial_weights)	1✔
284	self.has_parameters = True	1✔
285	self.parameters = self.weights # name alias	1✔
286
287	def initialize_weights(self, initial_weights):	1✔
288	arglen = len(self.arguments)	1✔
289	if initial_weights:	1✔
290	if len(initial_weights) != arglen:	1✔
291	raise Exception(	1✔
292	"Invalid initial_weights length {:d}".format(len(initial_weights)))
293	self.weights = initial_weights	1✔
294	else:
295	# set the initial weights randomly, according to a heuristic distribution
296	weight_bound = 1.0 / math.sqrt(arglen)	1✔
297	self.weights = [	1✔
298	random.uniform(-weight_bound, weight_bound) for _ in range(arglen)]
299
300	def compute_output(self, inputs):	1✔
301	return sum(	1✔
302	w * x.evaluate(inputs)
303	for (w, x) in zip(self.weights, self.arguments)
304	)
305
306	def compute_local_gradient(self):	1✔
307	return self.weights	1✔
308
309	def compute_local_parameter_gradient(self):	1✔
310	return [arg.output for arg in self.arguments]	1✔
311
312	def compute_global_parameter_gradient(self):	1✔
313	return [	1✔
314	self.global_gradient *
315	self.local_parameter_gradient_for_argument(argument)
316	for argument in self.arguments
317	]
318
319	def local_parameter_gradient_for_argument(self, argument):	1✔
320	'''Return the derivative of this node with respect to the weight
321	associated with a particular argument.'''
322	argument_index = self.argument_to_index[argument]	1✔
323	return self.local_parameter_gradient[argument_index]	1✔
324
325	def pretty_print(self, tabs=0):	1✔
326	argument_strs = '\n'.join(arg.pretty_print(tabs + 1)	1✔
327	for arg in self.arguments)
328	prefix = " " * tabs	1✔
329	weights = ','.join(['%.2f' % w for w in self.weights])	1✔
330	gradient = ','.join(	1✔
331	['%.2f' % w for w in self.global_parameter_gradient])
332	return "{}Linear weights={} gradient={} output={:.2f}\n{}\n".format(	1✔
333	prefix, weights, gradient, self.output, argument_strs)
334
335
336	class L2ErrorNode(Node):	1✔
337	'''A node computing the squared deviation error function.
338
339	The function is f(z(x), y) = (z(x) - y)^2, where (x, y) is a labeled
340	example and z(x) is the rest of the computation graph.
341	'''
342
343	def compute_error(self, inputs, label):	1✔
344	argument_value = self.arguments[0].evaluate(inputs)	1✔
345	self.label = label # cache the label	1✔
346	return (argument_value - label) ** 2	1✔
347
348	def compute_local_gradient(self):	1✔
349	last_input = self.arguments[0].output	1✔
350	return [2 * (last_input - self.label)]	1✔
351
352	def compute_global_gradient(self):	1✔
353	return 1	1✔
354
355
356	class NeuralNetwork:	1✔
357	'''A wrapper class for a computation graph, which encapsulates the
358	backpropagation algorithm and training.
359	'''
360	def __init__(self, terminal_node, input_nodes, error_node=None, step_size=None):	1✔
361	self.terminal_node = terminal_node	1✔
362	self.input_nodes = input_nodes	1✔
363	self.error_node = error_node or L2ErrorNode(self.terminal_node)	1✔
364	self.step_size = step_size or 1e-2	1✔
365	self.reset()	1✔
366
367	def for_each(self, func):	1✔
368	'''Walk the graph and apply func to each node.'''
369	nodes_to_process = set([self.error_node])	1✔
370	processed = set()	1✔
371
372	while nodes_to_process:	1✔
373	node = nodes_to_process.pop()	1✔
374	func(node)	1✔
375	processed.add(node)	1✔
376	nodes_to_process \|= set(node.arguments) - processed	1✔
377
378	def reset(self):	1✔
379	def reset_one(node):	1✔
380	node.cache = CachedNodeData()	1✔
381	self.for_each(reset_one)	1✔
382
383	def evaluate(self, inputs):	1✔
384	'''Evaluate the computation graph on a single set of inputs.'''
385	self.reset()	1✔
386	return self.terminal_node.evaluate(inputs)	1✔
387
388	def compute_error(self, inputs, label):	1✔
389	'''Compute the error for a given labeled example.'''
390	self.reset()	1✔
391	return self.error_node.compute_error(inputs, label)	1✔
392
393	def backpropagation_step(self, inputs, label, step_size=None):	1✔
394	self.compute_error(inputs, label)	1✔
395	self.for_each(lambda node: node.do_gradient_descent_step(step_size))	1✔
396
397	def train(self, dataset, max_steps=10000):	1✔
398	'''Train the neural network on a dataset.
399
400	Args:
401	dataset: a list of pairs ([float], int) where the first entry is
402	the data point and the second is the label.
403	max_steps: the number of steps to train for.
404
405	Returns:
406	None (self is modified)
407	'''
408	for i in range(max_steps):	1✔
409	inputs, label = random.choice(dataset)	1✔
410	self.backpropagation_step(inputs, label, self.step_size)	1✔
411
412	if i % int(max_steps / 10) == 0:	1✔
413	print('{:2.1f}%'.format(100 * i / max_steps))	1✔
414
415	def error_on_dataset(self, dataset):	1✔
416	errors = 0	1✔
417	total = len(dataset)	1✔
418
419	for (example, label) in dataset:	1✔
420	if round(self.evaluate(example)) != label:	1✔
421	errors += 1	1✔
422
423	return errors / total	1✔
424
425	def pretty_print(self):	1✔
426	return self.terminal_node.pretty_print()	1✔

pim-book / programmers-introduction-to-mathematics / #987

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous