13138848026

Committed 04 Feb 2025 02:53PM UTC coverage: 92.108% (+2.2%) from 89.877%

Build # 13138848026

Build Type

push

github

Committed by

mtf90

Commit Message

[maven-release-plugin] prepare release automatalib-0.12.0

Run Details

16609 of 18032 relevant lines covered (92.11%)

1.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.11

/serialization/fsm/src/main/java/net/automatalib/serialization/fsm/parser/AbstractFSMParser.java

/* Copyright (C) 2013-2025 TU Dortmund University
 * This file is part of AutomataLib <https://automatalib.net>.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.automatalib.serialization.fsm.parser;

import java.io.IOException;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.function.Function;

import net.automatalib.exception.FormatException;
import org.checkerframework.checker.nullness.qual.Nullable;

/**
 * This class provides methods to parse automata in FSM format.
 * <p>
 * The FSM is parsed by means of a tokenizer (a grammar is not used).
 *
 * @see <a href="http://www.win.tue.nl/vis1/home/apretori/data/fsm.html">the FSM format</a>
 */
public abstract class AbstractFSMParser<I> {

    /**
     * An enumeration for the three parts in the FSM file.
     */
    protected enum Part {
        DATA_DEFINITION,
        STATE_VECTORS,
        TRANSITIONS
    }

    // some messages for FSMParseExceptions.
    public static final String NO_SUCH_STATE = "state with number %d is undefined";
    public static final String NON_DETERMINISM_DETECTED = "non-determinism detected (previous value: %s)";
    public static final String EXPECT_CHAR = "expected char '%c' not found";
    public static final String EXPECT_NUMBER = "number expected";
    public static final String EXPECT_IDENTIFIER = "expecting identifier";
    public static final String EXPECT_STRING = "expecting string";

    /**
     * The function that transforms strings in the FSM file to input.
     */
    private final Function<String, I> inputParser;

    /**
     * The current line that is being parsed in the current part.
     */
    private int partLineNumber;

    /**
     * The set that contains all inputs that end up in the input alphabet (read from the file).
     */
    private final Set<I> inputs = new HashSet<>();

    /**
     * The set that contains all inputs that end up in the input alphabet (provided by the user, may be {@code null}).
     */
    protected final @Nullable Collection<? extends I> targetInputs;

    /**
     * Constructs a new AbstractFSMParser and defines all possible tokens.
     *
     * @param targetInputs
     *         A collection containing the inputs which should constitute the input alphabet of the parsed automaton.
     *         If {@code null}, the inputs will be automatically gathered from the read FSM file.
     * @param inputParser the Function that parses strings in the FSM file to input.
     */
    protected AbstractFSMParser(@Nullable Collection<? extends I> targetInputs, Function<String, I> inputParser) {
        this.inputParser = inputParser;
        this.targetInputs = targetInputs;
    }

    /**
     * Gets the function that transforms strings in the FSM file to input.
     *
     * @return the function.
     */
    protected Function<String, I> getInputParser() {
        return inputParser;
    }

    /**
     * Returns the line number that is being parsed in the current part.
     *
     * @return the line number.
     */
    protected int getPartLineNumber() {
        return partLineNumber;
    }

    /**
     * Gets the StreamTokenizer, that tokenizes tokens in the FSM file.
     *
     * @param reader
     *         the source of the FSM file
     *
     * @return the StreamTokenizer.
     */
    protected StreamTokenizer getStreamTokenizer(Reader reader) {
        final StreamTokenizer streamTokenizer = new StreamTokenizer(reader);
        streamTokenizer.resetSyntax();
        streamTokenizer.wordChars('a', 'z');
        streamTokenizer.wordChars('A', 'Z');
        streamTokenizer.wordChars('-', '-');
        streamTokenizer.wordChars('_', '_');
        streamTokenizer.wordChars('0', '9');
        streamTokenizer.wordChars('á', ' ');
        streamTokenizer.whitespaceChars(0, ' ');
        streamTokenizer.quoteChar('"');
        streamTokenizer.eolIsSignificant(true);
        streamTokenizer.ordinaryChar('(');
        streamTokenizer.ordinaryChar(')');
        return streamTokenizer;
    }

    /**
     * Gets the set that contains all inputs that end up in the input alphabet.
     *
     * @return the set of inputs.
     */
    protected Set<I> getInputs() {
        return inputs;
    }

    /**
     * Parse a data definition.
     *
     * @param streamTokenizer
     *         tokenizer containing the input
     *
     * @throws FormatException when the FSM source is invalid.
     * @throws IOException when FSM source could not be read.
     */
    protected abstract void parseDataDefinition(StreamTokenizer streamTokenizer) throws IOException, FormatException;

    /**
     * Perform some actions after all data definitions have been parsed.
     *
     * @param streamTokenizer
     *         tokenizer containing the input
     *
     * @throws FormatException when the FSM source is invalid.
     * @throws IOException when FSM source could not be read.
     */
    protected abstract void checkDataDefinitions(StreamTokenizer streamTokenizer) throws IOException, FormatException;

    /**
     * Parse a state vector.
     *
     * @param streamTokenizer
     *         tokenizer containing the input
     *
     * @throws FormatException when the FSM source is invalid.
     * @throws IOException when FSM source could not be read.
     */
    protected abstract void parseStateVector(StreamTokenizer streamTokenizer) throws IOException, FormatException;

    /**
     * Perform some actions after all state vectors have been parsed.
     *
     * @param streamTokenizer
     *         tokenizer containing the input
     *
     * @throws FormatException when the FSM source is invalid.
     * @throws IOException when FSM source could not be read.
     */
    protected abstract void checkStateVectors(StreamTokenizer streamTokenizer) throws IOException, FormatException;

    /**
     * Parse a transition.
     *
     * @param streamTokenizer
     *         tokenizer containing the input
     *
     * @throws FormatException when the FSM source is invalid.
     * @throws IOException when FSM source could not be read.
     */
    protected abstract void parseTransition(StreamTokenizer streamTokenizer) throws IOException, FormatException;

    /**
     * Perform some actions after all transitions have been parsed.
     *
     * @param streamTokenizer
     *         tokenizer containing the input
     *
     * @throws FormatException when the FSM source is invalid.
     * @throws IOException when FSM source could not be read.
     */
    protected abstract void checkTransitions(StreamTokenizer streamTokenizer) throws IOException, FormatException;

    /**
     * Parsed the FSM file line-by-line.
     * At first this method expects to parse data definitions, and calls {@link #parseDataDefinition(StreamTokenizer)}
     * for each data definition. After "---" is encountered {@link #checkDataDefinitions(StreamTokenizer)} is called,
     * and this method expects to parse state vectors. The behavior is similar for state vectors and transitions.
     * For each line this method will increment {@link #partLineNumber}, and reset it when a new part in the FSM file
     * begins.
     * <p>
     * Note that {@link StreamTokenizer} allows one to push back tokens. This is used whenever we have checked the
     * type of token we are going to read.
     *
     * @param reader
     *         the source of the FSM file
     *
     * @throws FormatException when the FSM source is invalid.
     * @throws IOException when FSM source could not be read.
     */
    protected void parse(Reader reader) throws IOException, FormatException {
        Part part = Part.DATA_DEFINITION;
        partLineNumber = 0;

        final StreamTokenizer streamTokenizer = getStreamTokenizer(reader);

        while (streamTokenizer.nextToken() != StreamTokenizer.TT_EOF) {
            streamTokenizer.pushBack();
            switch (part) {
                case DATA_DEFINITION: {
                    if (streamTokenizer.nextToken() == StreamTokenizer.TT_WORD && "---".equals(streamTokenizer.sval)) {
                        // we entered the part with the state vectors
                        part = Part.STATE_VECTORS;
                        partLineNumber = 0;
                        checkDataDefinitions(streamTokenizer);
                    } else {
                        streamTokenizer.pushBack();
                        parseDataDefinition(streamTokenizer);
                    }
                    break;
                }
                case STATE_VECTORS: {
                    if (streamTokenizer.nextToken() == StreamTokenizer.TT_WORD && "---".equals(streamTokenizer.sval)) {
                        // we entered the part with the transitions.
                        part = Part.TRANSITIONS;
                        partLineNumber = 0;
                        checkStateVectors(streamTokenizer);
                    } else {
                        streamTokenizer.pushBack();
                        parseStateVector(streamTokenizer);
                    }
                    break;
                }
                case TRANSITIONS: {
                    parseTransition(streamTokenizer);
                    break;
                }
                default: throw new AssertionError();
            }
            while (streamTokenizer.nextToken() != StreamTokenizer.TT_EOL) {
                // consume all tokens until EOL is reached
            }
            partLineNumber++;
        }
        checkTransitions(streamTokenizer);
    }
}

1	/* Copyright (C) 2013-2025 TU Dortmund University
2	* This file is part of AutomataLib <https://automatalib.net>.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	package net.automatalib.serialization.fsm.parser;
17
18	import java.io.IOException;
19	import java.io.Reader;
20	import java.io.StreamTokenizer;
21	import java.util.Collection;
22	import java.util.HashSet;
23	import java.util.Set;
24	import java.util.function.Function;
25
26	import net.automatalib.exception.FormatException;
27	import org.checkerframework.checker.nullness.qual.Nullable;
28
29	/**
30	* This class provides methods to parse automata in FSM format.
31	* <p>
32	* The FSM is parsed by means of a tokenizer (a grammar is not used).
33	*
34	* @see <a href="http://www.win.tue.nl/vis1/home/apretori/data/fsm.html">the FSM format</a>
35	*/
36	public abstract class AbstractFSMParser<I> {
37
38	/**
39	* An enumeration for the three parts in the FSM file.
40	*/
41	protected enum Part {	2✔
42	DATA_DEFINITION,	2✔
43	STATE_VECTORS,	2✔
44	TRANSITIONS	2✔
45	}
46
47	// some messages for FSMParseExceptions.
48	public static final String NO_SUCH_STATE = "state with number %d is undefined";
49	public static final String NON_DETERMINISM_DETECTED = "non-determinism detected (previous value: %s)";
50	public static final String EXPECT_CHAR = "expected char '%c' not found";
51	public static final String EXPECT_NUMBER = "number expected";
52	public static final String EXPECT_IDENTIFIER = "expecting identifier";
53	public static final String EXPECT_STRING = "expecting string";
54
55	/**
56	* The function that transforms strings in the FSM file to input.
57	*/
58	private final Function<String, I> inputParser;
59
60	/**
61	* The current line that is being parsed in the current part.
62	*/
63	private int partLineNumber;
64
65	/**
66	* The set that contains all inputs that end up in the input alphabet (read from the file).
67	*/
68	private final Set<I> inputs = new HashSet<>();	2✔
69
70	/**
71	* The set that contains all inputs that end up in the input alphabet (provided by the user, may be {@code null}).
72	*/
73	protected final @Nullable Collection<? extends I> targetInputs;
74
75	/**
76	* Constructs a new AbstractFSMParser and defines all possible tokens.
77	*
78	* @param targetInputs
79	* A collection containing the inputs which should constitute the input alphabet of the parsed automaton.
80	* If {@code null}, the inputs will be automatically gathered from the read FSM file.
81	* @param inputParser the Function that parses strings in the FSM file to input.
82	*/
83	protected AbstractFSMParser(@Nullable Collection<? extends I> targetInputs, Function<String, I> inputParser) {	2✔
84	this.inputParser = inputParser;	2✔
85	this.targetInputs = targetInputs;	2✔
86	}	2✔
87
88	/**
89	* Gets the function that transforms strings in the FSM file to input.
90	*
91	* @return the function.
92	*/
93	protected Function<String, I> getInputParser() {
94	return inputParser;	2✔
95	}
96
97	/**
98	* Returns the line number that is being parsed in the current part.
99	*
100	* @return the line number.
101	*/
102	protected int getPartLineNumber() {
103	return partLineNumber;	2✔
104	}
105
106	/**
107	* Gets the StreamTokenizer, that tokenizes tokens in the FSM file.
108	*
109	* @param reader
110	* the source of the FSM file
111	*
112	* @return the StreamTokenizer.
113	*/
114	protected StreamTokenizer getStreamTokenizer(Reader reader) {
115	final StreamTokenizer streamTokenizer = new StreamTokenizer(reader);	2✔
116	streamTokenizer.resetSyntax();	2✔
117	streamTokenizer.wordChars('a', 'z');	2✔
118	streamTokenizer.wordChars('A', 'Z');	2✔
119	streamTokenizer.wordChars('-', '-');	2✔
120	streamTokenizer.wordChars('_', '_');	2✔
121	streamTokenizer.wordChars('0', '9');	2✔
122	streamTokenizer.wordChars('á', ' ');	2✔
123	streamTokenizer.whitespaceChars(0, ' ');	2✔
124	streamTokenizer.quoteChar('"');	2✔
125	streamTokenizer.eolIsSignificant(true);	2✔
126	streamTokenizer.ordinaryChar('(');	2✔
127	streamTokenizer.ordinaryChar(')');	2✔
128	return streamTokenizer;	2✔
129	}
130
131	/**
132	* Gets the set that contains all inputs that end up in the input alphabet.
133	*
134	* @return the set of inputs.
135	*/
136	protected Set<I> getInputs() {
137	return inputs;	2✔
138	}
139
140	/**
141	* Parse a data definition.
142	*
143	* @param streamTokenizer
144	* tokenizer containing the input
145	*
146	* @throws FormatException when the FSM source is invalid.
147	* @throws IOException when FSM source could not be read.
148	*/
149	protected abstract void parseDataDefinition(StreamTokenizer streamTokenizer) throws IOException, FormatException;
150
151	/**
152	* Perform some actions after all data definitions have been parsed.
153	*
154	* @param streamTokenizer
155	* tokenizer containing the input
156	*
157	* @throws FormatException when the FSM source is invalid.
158	* @throws IOException when FSM source could not be read.
159	*/
160	protected abstract void checkDataDefinitions(StreamTokenizer streamTokenizer) throws IOException, FormatException;
161
162	/**
163	* Parse a state vector.
164	*
165	* @param streamTokenizer
166	* tokenizer containing the input
167	*
168	* @throws FormatException when the FSM source is invalid.
169	* @throws IOException when FSM source could not be read.
170	*/
171	protected abstract void parseStateVector(StreamTokenizer streamTokenizer) throws IOException, FormatException;
172
173	/**
174	* Perform some actions after all state vectors have been parsed.
175	*
176	* @param streamTokenizer
177	* tokenizer containing the input
178	*
179	* @throws FormatException when the FSM source is invalid.
180	* @throws IOException when FSM source could not be read.
181	*/
182	protected abstract void checkStateVectors(StreamTokenizer streamTokenizer) throws IOException, FormatException;
183
184	/**
185	* Parse a transition.
186	*
187	* @param streamTokenizer
188	* tokenizer containing the input
189	*
190	* @throws FormatException when the FSM source is invalid.
191	* @throws IOException when FSM source could not be read.
192	*/
193	protected abstract void parseTransition(StreamTokenizer streamTokenizer) throws IOException, FormatException;
194
195	/**
196	* Perform some actions after all transitions have been parsed.
197	*
198	* @param streamTokenizer
199	* tokenizer containing the input
200	*
201	* @throws FormatException when the FSM source is invalid.
202	* @throws IOException when FSM source could not be read.
203	*/
204	protected abstract void checkTransitions(StreamTokenizer streamTokenizer) throws IOException, FormatException;
205
206	/**
207	* Parsed the FSM file line-by-line.
208	* At first this method expects to parse data definitions, and calls {@link #parseDataDefinition(StreamTokenizer)}
209	* for each data definition. After "---" is encountered {@link #checkDataDefinitions(StreamTokenizer)} is called,
210	* and this method expects to parse state vectors. The behavior is similar for state vectors and transitions.
211	* For each line this method will increment {@link #partLineNumber}, and reset it when a new part in the FSM file
212	* begins.
213	* <p>
214	* Note that {@link StreamTokenizer} allows one to push back tokens. This is used whenever we have checked the
215	* type of token we are going to read.
216	*
217	* @param reader
218	* the source of the FSM file
219	*
220	* @throws FormatException when the FSM source is invalid.
221	* @throws IOException when FSM source could not be read.
222	*/
223	protected void parse(Reader reader) throws IOException, FormatException {
224	Part part = Part.DATA_DEFINITION;	2✔
225	partLineNumber = 0;	2✔
226
227	final StreamTokenizer streamTokenizer = getStreamTokenizer(reader);	2✔
228
229	while (streamTokenizer.nextToken() != StreamTokenizer.TT_EOF) {	2✔
230	streamTokenizer.pushBack();	2✔
231	switch (part) {	2✔
232	case DATA_DEFINITION: {
233	if (streamTokenizer.nextToken() == StreamTokenizer.TT_WORD && "---".equals(streamTokenizer.sval)) {	2✔
234	// we entered the part with the state vectors
235	part = Part.STATE_VECTORS;	2✔
236	partLineNumber = 0;	2✔
237	checkDataDefinitions(streamTokenizer);	2✔
238	} else {
239	streamTokenizer.pushBack();	2✔
240	parseDataDefinition(streamTokenizer);	2✔
241	}
242	break;	2✔
243	}
244	case STATE_VECTORS: {
245	if (streamTokenizer.nextToken() == StreamTokenizer.TT_WORD && "---".equals(streamTokenizer.sval)) {	2✔
246	// we entered the part with the transitions.
247	part = Part.TRANSITIONS;	2✔
248	partLineNumber = 0;	2✔
249	checkStateVectors(streamTokenizer);	2✔
250	} else {
251	streamTokenizer.pushBack();	2✔
252	parseStateVector(streamTokenizer);	2✔
253	}
254	break;	2✔
255	}
256	case TRANSITIONS: {
257	parseTransition(streamTokenizer);	2✔
258	break;	2✔
259	}
260	default: throw new AssertionError();	×
261	}
262	while (streamTokenizer.nextToken() != StreamTokenizer.TT_EOL) {	2✔
263	// consume all tokens until EOL is reached
264	}
265	partLineNumber++;	2✔
266	}
267	checkTransitions(streamTokenizer);	2✔
268	}	2✔
269	}

LearnLib / automatalib / 13138848026

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous