27471184649

Committed 13 Jun 2026 03:37PM UTC coverage: 0.0% (-79.4%) from 79.431%

Build # 27471184649

Build Type

push

github

Committed by

MatthewGerber

Commit Message

* Change machine type.
* Try xdist.

Coverage Stats

0 of 5484 relevant lines covered (0.0%)

0.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/src/rlai/state_value/function_approximation/__init__.py

from argparse import ArgumentParser
from typing import List, Tuple, Optional

import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
from numpy.random import RandomState

from rlai.core import MdpState, Environment
from rlai.docs import rl_text
from rlai.models.feature_extraction import StationaryFeatureScaler
from rlai.state_value import StateValueEstimator, ValueEstimator
from rlai.state_value.function_approximation.models import StateFunctionApproximationModel
from rlai.state_value.function_approximation.models.feature_extraction import StateFeatureExtractor
from rlai.utils import parse_arguments, load_class


@rl_text(chapter='Value Estimation', page=195)
class ApproximateValueEstimator(ValueEstimator):
    """
    Approximate value estimator.
    """

    def update(
            self,
            value: float,
            weight: Optional[float] = None
    ):
        """
        Update the value estimate.

        :param value: New value.
        :param weight: Weight.
        """

        self.estimator.add_sample(self.state, value, weight)
        self.estimator.update_count += 1

    def get_value(
            self
    ) -> float:
        """
        Get current estimated value.

        :return: Value.
        """

        return self.estimator.evaluate(self.state)

    def __init__(
            self,
            estimator: 'ApproximateStateValueEstimator',
            state: MdpState
    ):
        """
        Initialize the estimator.

        :param estimator: State-action value estimator.
        :param state: State.
        """

        self.estimator = estimator
        self.state = state


@rl_text(chapter='Value Estimation', page=195)
class ApproximateStateValueEstimator(StateValueEstimator):
    """
    Approximate state-value estimator.
    """

    @classmethod
    def get_argument_parser(
            cls
    ) -> ArgumentParser:
        """
        Get argument parser.

        :return: Argument parser.
        """

        parser = ArgumentParser(
            prog=f'{cls.__module__}.{cls.__name__}',
            parents=[super().get_argument_parser()],
            allow_abbrev=False,
            add_help=False
        )

        parser.add_argument(
            '--function-approximation-model',
            type=str,
            help='Fully-qualified type name of function approximation model.'
        )

        parser.add_argument(
            '--feature-extractor',
            type=str,
            help='Fully-qualified type name of feature extractor.'
        )

        parser.add_argument(
            '--scale-outcomes',
            action='store_true',
            help='Whether to scale (standardize) outcomes before fitting the function approximation model.'
        )

        return parser

    @classmethod
    def init_from_arguments(
            cls,
            args: List[str],
            random_state: RandomState,
            environment: Environment
    ) -> Tuple[StateValueEstimator, List[str]]:
        """
        Initialize a state-value estimator from arguments.

        :param args: Arguments.
        :param random_state: Random state.
        :param environment: Environment.
        :return: 2-tuple of a state-value estimator and a list of unparsed arguments.
        """

        parsed_args, unparsed_args = parse_arguments(cls, args)

        # load feature extractor
        feature_extractor_class = load_class(parsed_args.feature_extractor)
        fex, unparsed_args = feature_extractor_class.init_from_arguments(
            args=unparsed_args,
            environment=environment
        )
        del parsed_args.feature_extractor

        # load model
        model_class = load_class(parsed_args.function_approximation_model)
        model, unparsed_args = model_class.init_from_arguments(
            args=unparsed_args,
            random_state=random_state,
            fit_intercept=not fex.extracts_intercept()
        )
        del parsed_args.function_approximation_model

        # initialize estimator
        estimator = cls(
            model=model,
            feature_extractor=fex,
            **vars(parsed_args)
        )

        return estimator, unparsed_args

    def __init__(
            self,
            model: StateFunctionApproximationModel,
            feature_extractor: StateFeatureExtractor,
            scale_outcomes: bool
    ):
        """
        Initialize the estimator.

        :param model: Model.
        :param feature_extractor: Feature extractor.
        :param scale_outcomes: Whether to scale state-value outcomes before fitting the estimator model.
        """

        super().__init__()

        self.model = model
        self.feature_extractor = feature_extractor
        self.scale_outcomes = scale_outcomes

        self.experience_states: List[MdpState] = []
        self.experience_values: List[float] = []
        self.weights: Optional[np.ndarray] = None
        self.experience_pending: bool = False
        self.value_scaler = StationaryFeatureScaler()

    def add_sample(
            self,
            state: MdpState,
            value: float,
            weight: Optional[float]
    ):
        """
        Add a sample of experience to the estimator. The collection of samples will be used to fit the function
        approximation model when `improve` is called.

        :param state: State.
        :param value: Value.
        :param weight: Weight.
        """

        self.experience_states.append(state)
        self.experience_values.append(value)

        if weight is not None:
            if self.weights is None:
                self.weights = np.array([weight])
            else:
                self.weights = np.append(self.weights, [weight], axis=0)

        self.experience_pending = True

    def improve(
            self
    ):
        """
        Improve an agent's policy using the current sample of experience collected through calls to `add_sample`.

        :return: Number of states improved.
        """

        # if we have pending experience, then fit the model and reset the data.
        if self.experience_pending:

            state_feature_matrix = self.extract_features(self.experience_states, True)

            outcomes = np.array(self.experience_values)
            if self.scale_outcomes:
                outcomes = self.value_scaler.scale_features(outcomes.reshape(-1, 1), True).flatten()

            # feature extractors may return a matrix with no columns if extraction was not possible
            if state_feature_matrix.shape[1] > 0:
                self.model.fit(
                    feature_matrix=state_feature_matrix,
                    outcomes=outcomes,
                    weights=self.weights
                )

            self.experience_states.clear()
            self.experience_values.clear()
            self.weights = None
            self.experience_pending = False

    def evaluate(
            self,
            state: MdpState
    ) -> float:
        """
        Evaluate the estimator's function approximation model at a state.

        :param state: State.
        :return: Estimate.
        """

        # extract feature matrix
        state_feature_matrix = self.extract_features([state], False)

        # feature extractors may return a matrix with no columns if extraction was not possible
        if state_feature_matrix.shape[1] == 0:  # pragma no cover
            return 0.0

        state_values = self.model.evaluate(state_feature_matrix)

        # invert the state value back to the original space if we're scaling
        if self.scale_outcomes:
            state_values = self.value_scaler.invert_scaled_features(state_values.reshape((-1, 1))).flatten()

        assert len(state_values) == 1

        return float(state_values[0])

    def extract_features(
            self,
            states: List[MdpState],
            refit_scaler: bool
    ) -> np.ndarray:
        """
        Extract features for states.

        :param states: States.
        :param refit_scaler: Whether to refit the feature scaler before scaling the extracted features. This is
        only appropriate in settings where nonstationarity is desired (e.g., during training). During evaluation, the
        scaler should remain fixed, which means this should be False.
        :return: State-feature matrix (#states, #features).
        """

        return self.feature_extractor.extract(states, refit_scaler)

    def plot(
            self,
            pdf: Optional[PdfPages]
    ):
        """
        Plot the current estimator.

        :param pdf: PDF to plot to, or None to show directly.
        """

        self.model.plot(True, pdf)

    def reset_for_new_run(
            self,
            state: MdpState
    ):
        """
        Reset for new run.
        """

        self.feature_extractor.reset_for_new_run(state)

    def __getitem__(
            self,
            state: MdpState
    ) -> ApproximateValueEstimator:
        """
        Get the value estimator for a state.

        :param state: State.
        :return: Value estimator.
        """

        return ApproximateValueEstimator(self, state)

    def __len__(
            self
    ) -> int:
        """
        Get number of states defined by the estimator.

        :return: Number of states.
        """

        # a bit of a hack, as we don't actually track the number of states.
        return 1

    def __contains__(
            self,
            state: MdpState
    ) -> bool:
        """
        Check whether a state is defined by the estimator.

        :param state: State.
        :return: True if defined and False otherwise.
        """

        return True

    def __eq__(
            self,
            other: object
    ) -> bool:
        """
        Check whether the estimator equals another.

        :param other: Other estimator.
        :return: True if equal and False otherwise.
        """

        if not isinstance(other, ApproximateStateValueEstimator):
            raise ValueError(f'Expected {ApproximateStateValueEstimator}')

        return self.model == other.model

    def __ne__(
            self,
            other: object
    ) -> bool:
        """
        Check whether the estimator does not equal another.

        :param other: Other estimator.
        :return: True if not equal and False otherwise.
        """

        return not (self == other)

1	from argparse import ArgumentParser	×
2	from typing import List, Tuple, Optional	×
3
4	import numpy as np	×
5	from matplotlib.backends.backend_pdf import PdfPages	×
6	from numpy.random import RandomState	×
7
8	from rlai.core import MdpState, Environment	×
9	from rlai.docs import rl_text	×
10	from rlai.models.feature_extraction import StationaryFeatureScaler	×
11	from rlai.state_value import StateValueEstimator, ValueEstimator	×
12	from rlai.state_value.function_approximation.models import StateFunctionApproximationModel	×
13	from rlai.state_value.function_approximation.models.feature_extraction import StateFeatureExtractor	×
14	from rlai.utils import parse_arguments, load_class	×
15
16
17	@rl_text(chapter='Value Estimation', page=195)	×
18	class ApproximateValueEstimator(ValueEstimator):	×
19	"""
20	Approximate value estimator.
21	"""
22
23	def update(	×
24	self,
25	value: float,
26	weight: Optional[float] = None
27	):
28	"""
29	Update the value estimate.
30
31	:param value: New value.
32	:param weight: Weight.
33	"""
34
35	self.estimator.add_sample(self.state, value, weight)	×
36	self.estimator.update_count += 1	×
37
38	def get_value(	×
39	self
40	) -> float:
41	"""
42	Get current estimated value.
43
44	:return: Value.
45	"""
46
47	return self.estimator.evaluate(self.state)	×
48
49	def __init__(	×
50	self,
51	estimator: 'ApproximateStateValueEstimator',
52	state: MdpState
53	):
54	"""
55	Initialize the estimator.
56
57	:param estimator: State-action value estimator.
58	:param state: State.
59	"""
60
61	self.estimator = estimator	×
62	self.state = state	×
63
64
65	@rl_text(chapter='Value Estimation', page=195)	×
66	class ApproximateStateValueEstimator(StateValueEstimator):	×
67	"""
68	Approximate state-value estimator.
69	"""
70
71	@classmethod	×
72	def get_argument_parser(	×
73	cls
74	) -> ArgumentParser:
75	"""
76	Get argument parser.
77
78	:return: Argument parser.
79	"""
80
81	parser = ArgumentParser(	×
82	prog=f'{cls.__module__}.{cls.__name__}',
83	parents=[super().get_argument_parser()],
84	allow_abbrev=False,
85	add_help=False
86	)
87
88	parser.add_argument(	×
89	'--function-approximation-model',
90	type=str,
91	help='Fully-qualified type name of function approximation model.'
92	)
93
94	parser.add_argument(	×
95	'--feature-extractor',
96	type=str,
97	help='Fully-qualified type name of feature extractor.'
98	)
99
100	parser.add_argument(	×
101	'--scale-outcomes',
102	action='store_true',
103	help='Whether to scale (standardize) outcomes before fitting the function approximation model.'
104	)
105
106	return parser	×
107
108	@classmethod	×
109	def init_from_arguments(	×
110	cls,
111	args: List[str],
112	random_state: RandomState,
113	environment: Environment
114	) -> Tuple[StateValueEstimator, List[str]]:
115	"""
116	Initialize a state-value estimator from arguments.
117
118	:param args: Arguments.
119	:param random_state: Random state.
120	:param environment: Environment.
121	:return: 2-tuple of a state-value estimator and a list of unparsed arguments.
122	"""
123
124	parsed_args, unparsed_args = parse_arguments(cls, args)	×
125
126	# load feature extractor
127	feature_extractor_class = load_class(parsed_args.feature_extractor)	×
128	fex, unparsed_args = feature_extractor_class.init_from_arguments(	×
129	args=unparsed_args,
130	environment=environment
131	)
132	del parsed_args.feature_extractor	×
133
134	# load model
135	model_class = load_class(parsed_args.function_approximation_model)	×
136	model, unparsed_args = model_class.init_from_arguments(	×
137	args=unparsed_args,
138	random_state=random_state,
139	fit_intercept=not fex.extracts_intercept()
140	)
141	del parsed_args.function_approximation_model	×
142
143	# initialize estimator
144	estimator = cls(	×
145	model=model,
146	feature_extractor=fex,
147	**vars(parsed_args)
148	)
149
150	return estimator, unparsed_args	×
151
152	def __init__(	×
153	self,
154	model: StateFunctionApproximationModel,
155	feature_extractor: StateFeatureExtractor,
156	scale_outcomes: bool
157	):
158	"""
159	Initialize the estimator.
160
161	:param model: Model.
162	:param feature_extractor: Feature extractor.
163	:param scale_outcomes: Whether to scale state-value outcomes before fitting the estimator model.
164	"""
165
166	super().__init__()	×
167
168	self.model = model	×
169	self.feature_extractor = feature_extractor	×
170	self.scale_outcomes = scale_outcomes	×
171
172	self.experience_states: List[MdpState] = []	×
173	self.experience_values: List[float] = []	×
174	self.weights: Optional[np.ndarray] = None	×
175	self.experience_pending: bool = False	×
176	self.value_scaler = StationaryFeatureScaler()	×
177
178	def add_sample(	×
179	self,
180	state: MdpState,
181	value: float,
182	weight: Optional[float]
183	):
184	"""
185	Add a sample of experience to the estimator. The collection of samples will be used to fit the function
186	approximation model when `improve` is called.
187
188	:param state: State.
189	:param value: Value.
190	:param weight: Weight.
191	"""
192
193	self.experience_states.append(state)	×
194	self.experience_values.append(value)	×
195
196	if weight is not None:	×
197	if self.weights is None:	×
198	self.weights = np.array([weight])	×
199	else:
200	self.weights = np.append(self.weights, [weight], axis=0)	×
201
202	self.experience_pending = True	×
203
204	def improve(	×
205	self
206	):
207	"""
208	Improve an agent's policy using the current sample of experience collected through calls to `add_sample`.
209
210	:return: Number of states improved.
211	"""
212
213	# if we have pending experience, then fit the model and reset the data.
214	if self.experience_pending:	×
215
216	state_feature_matrix = self.extract_features(self.experience_states, True)	×
217
218	outcomes = np.array(self.experience_values)	×
219	if self.scale_outcomes:	×
220	outcomes = self.value_scaler.scale_features(outcomes.reshape(-1, 1), True).flatten()	×
221
222	# feature extractors may return a matrix with no columns if extraction was not possible
223	if state_feature_matrix.shape[1] > 0:	×
224	self.model.fit(	×
225	feature_matrix=state_feature_matrix,
226	outcomes=outcomes,
227	weights=self.weights
228	)
229
230	self.experience_states.clear()	×
231	self.experience_values.clear()	×
232	self.weights = None	×
233	self.experience_pending = False	×
234
235	def evaluate(	×
236	self,
237	state: MdpState
238	) -> float:
239	"""
240	Evaluate the estimator's function approximation model at a state.
241
242	:param state: State.
243	:return: Estimate.
244	"""
245
246	# extract feature matrix
247	state_feature_matrix = self.extract_features([state], False)	×
248
249	# feature extractors may return a matrix with no columns if extraction was not possible
250	if state_feature_matrix.shape[1] == 0: # pragma no cover
251	return 0.0
252
253	state_values = self.model.evaluate(state_feature_matrix)	×
254
255	# invert the state value back to the original space if we're scaling
256	if self.scale_outcomes:	×
257	state_values = self.value_scaler.invert_scaled_features(state_values.reshape((-1, 1))).flatten()	×
258
259	assert len(state_values) == 1	×
260
261	return float(state_values[0])	×
262
263	def extract_features(	×
264	self,
265	states: List[MdpState],
266	refit_scaler: bool
267	) -> np.ndarray:
268	"""
269	Extract features for states.
270
271	:param states: States.
272	:param refit_scaler: Whether to refit the feature scaler before scaling the extracted features. This is
273	only appropriate in settings where nonstationarity is desired (e.g., during training). During evaluation, the
274	scaler should remain fixed, which means this should be False.
275	:return: State-feature matrix (#states, #features).
276	"""
277
278	return self.feature_extractor.extract(states, refit_scaler)	×
279
280	def plot(	×
281	self,
282	pdf: Optional[PdfPages]
283	):
284	"""
285	Plot the current estimator.
286
287	:param pdf: PDF to plot to, or None to show directly.
288	"""
289
290	self.model.plot(True, pdf)	×
291
292	def reset_for_new_run(	×
293	self,
294	state: MdpState
295	):
296	"""
297	Reset for new run.
298	"""
299
300	self.feature_extractor.reset_for_new_run(state)	×
301
302	def __getitem__(	×
303	self,
304	state: MdpState
305	) -> ApproximateValueEstimator:
306	"""
307	Get the value estimator for a state.
308
309	:param state: State.
310	:return: Value estimator.
311	"""
312
313	return ApproximateValueEstimator(self, state)	×
314
315	def __len__(	×
316	self
317	) -> int:
318	"""
319	Get number of states defined by the estimator.
320
321	:return: Number of states.
322	"""
323
324	# a bit of a hack, as we don't actually track the number of states.
325	return 1	×
326
327	def __contains__(	×
328	self,
329	state: MdpState
330	) -> bool:
331	"""
332	Check whether a state is defined by the estimator.
333
334	:param state: State.
335	:return: True if defined and False otherwise.
336	"""
337
338	return True	×
339
340	def __eq__(	×
341	self,
342	other: object
343	) -> bool:
344	"""
345	Check whether the estimator equals another.
346
347	:param other: Other estimator.
348	:return: True if equal and False otherwise.
349	"""
350
351	if not isinstance(other, ApproximateStateValueEstimator):	×
352	raise ValueError(f'Expected {ApproximateStateValueEstimator}')	×
353
354	return self.model == other.model	×
355
356	def __ne__(	×
357	self,
358	other: object
359	) -> bool:
360	"""
361	Check whether the estimator does not equal another.
362
363	:param other: Other estimator.
364	:return: True if not equal and False otherwise.
365	"""
366
367	return not (self == other)	×

MatthewGerber / rlai / 27471184649

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous