• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JohnSnowLabs / spark-nlp / 11429325160

20 Oct 2024 08:18PM UTC coverage: 60.052% (-0.2%) from 60.216%
11429325160

Pull #14439

github

web-flow
Merge 1c191569d into 9db33328b
Pull Request #14439: [SPARKNLP-1067] PromptAssembler

0 of 50 new or added lines in 2 files covered. (0.0%)

48 existing lines in 26 files now uncovered.

8985 of 14962 relevant lines covered (60.05%)

0.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.51
/src/main/scala/com/johnsnowlabs/nlp/annotators/Tokenizer.scala
1
/*
2
 * Copyright 2017-2022 John Snow Labs
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *    http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
package com.johnsnowlabs.nlp.annotators
18

19
import com.johnsnowlabs.nlp.AnnotatorApproach
20
import com.johnsnowlabs.nlp.AnnotatorType.{DOCUMENT, TOKEN}
21
import com.johnsnowlabs.nlp.annotators.param.ExternalResourceParam
22
import com.johnsnowlabs.nlp.util.io.{ExternalResource, MatchStrategy, ReadAs, ResourceHelper}
23
import com.johnsnowlabs.nlp.util.regex.RuleFactory
24
import org.apache.spark.ml.PipelineModel
25
import org.apache.spark.ml.param.{BooleanParam, IntParam, Param, StringArrayParam}
26
import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable}
27
import org.apache.spark.sql.Dataset
28

29
import java.util.regex.Pattern
30
import scala.collection.mutable.ArrayBuffer
31

32
/** Tokenizes raw text in document type columns into TokenizedSentence .
33
  *
34
  * This class represents a non fitted tokenizer. Fitting it will cause the internal RuleFactory
35
  * to construct the rules for tokenizing from the input configuration.
36
  *
37
  * Identifies tokens with tokenization open standards. A few rules will help customizing it if
38
  * defaults do not fit user needs.
39
  *
40
  * For extended examples of usage see the
41
  * [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/text/english/document-normalizer/document_normalizer_notebook.ipynb Examples]]
42
  * and
43
  * [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/TokenizerTestSpec.scala Tokenizer test class]]
44
  *
45
  * ==Example==
46
  * {{{
47
  * import spark.implicits._
48
  * import com.johnsnowlabs.nlp.DocumentAssembler
49
  * import com.johnsnowlabs.nlp.annotators.Tokenizer
50
  * import org.apache.spark.ml.Pipeline
51
  *
52
  * val data = Seq("I'd like to say we didn't expect that. Jane's boyfriend.").toDF("text")
53
  * val documentAssembler = new DocumentAssembler().setInputCol("text").setOutputCol("document")
54
  * val tokenizer = new Tokenizer().setInputCols("document").setOutputCol("token").fit(data)
55
  *
56
  * val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer)).fit(data)
57
  * val result = pipeline.transform(data)
58
  *
59
  * result.selectExpr("token.result").show(false)
60
  * +-----------------------------------------------------------------------+
61
  * |output                                                                 |
62
  * +-----------------------------------------------------------------------+
63
  * |[I'd, like, to, say, we, didn't, expect, that, ., Jane's, boyfriend, .]|
64
  * +-----------------------------------------------------------------------+
65
  * }}}
66
  *
67
  * @param uid
68
  *   required uid for storing annotator to disk
69
  * @groupname anno Annotator types
70
  * @groupdesc anno
71
  *   Required input and expected output annotator types
72
  * @groupname Ungrouped Members
73
  * @groupname param Parameters.
74
  * @groupname setParam Parameter setters
75
  * @groupname getParam Parameter getters
76
  * @groupname Ungrouped Members
77
  * @groupprio param  1
78
  * @groupprio anno  2
79
  * @groupprio Ungrouped 3
80
  * @groupprio setParam  4
81
  * @groupprio getParam  5
82
  * @groupdesc param
83
  *   A list of (hyper-)parameter keys this annotator can take. Users can set and get the
84
  *   parameter values through setters and getters, respectively.
85
  */
86
class Tokenizer(override val uid: String) extends AnnotatorApproach[TokenizerModel] {
87

88
  /** Output annotator type : TOKEN
89
    *
90
    * @group anno
91
    */
92
  override val outputAnnotatorType: AnnotatorType = TOKEN
1✔
93

94
  /** Input annotator type : DOCUMENT
95
    *
96
    * @group anno
97
    */
98
  override val inputAnnotatorTypes: Array[AnnotatorType] = Array[AnnotatorType](
1✔
99
    DOCUMENT
1✔
100
  ) // A Tokenizer could require only for now a SentenceDetector annotator
101

102
  def this() = this(Identifiable.randomUID("REGEX_TOKENIZER"))
1✔
103

104
  /** Annotator that identifies points of analysis in a useful manner */
105
  override val description: String =
106
    "Annotator that identifies points of analysis in a useful manner"
1✔
107

108
  /** Words that won't be affected by tokenization rules
109
    *
110
    * @group param
111
    */
112
  val exceptions: StringArrayParam =
113
    new StringArrayParam(this, "exceptions", "Words that won't be affected by tokenization rules")
1✔
114

115
  /** Path to file containing list of exceptions
116
    *
117
    * @group param
118
    */
119
  val exceptionsPath: ExternalResourceParam = new ExternalResourceParam(
1✔
120
    this,
121
    "exceptionsPath",
1✔
122
    "Path to file containing list of exceptions")
1✔
123

124
  /** Whether to care for case sensitiveness in exceptions (Default: `true`)
125
    *
126
    * @group param
127
    */
128
  val caseSensitiveExceptions: BooleanParam = new BooleanParam(
1✔
129
    this,
130
    "caseSensitiveExceptions",
1✔
131
    "Whether to care for case sensitiveness in exceptions")
1✔
132

133
  /** Character list used to separate from token boundaries (Default: `Array(".", ",", ";", ":",
134
    * "!", "?", "*", "-", "(", ")", "\"", "'")`)
135
    * @group param
136
    */
137
  val contextChars: StringArrayParam = new StringArrayParam(
1✔
138
    this,
139
    "contextChars",
1✔
140
    "Character list used to separate from token boundaries")
1✔
141

142
  /** Character list used to separate from the inside of tokens
143
    *
144
    * @group param
145
    */
146
  val splitChars: StringArrayParam = new StringArrayParam(
1✔
147
    this,
148
    "splitChars",
1✔
149
    "Character list used to separate from the inside of tokens")
1✔
150

151
  /** Pattern to separate from the inside of tokens. takes priority over splitChars.
152
    *
153
    * This pattern will be applied to the tokens which where extracted with the target pattern
154
    * previously
155
    *
156
    * ''' Example:'''
157
    *
158
    * {{{
159
    * import org.apache.spark.ml.Pipeline
160
    *
161
    * import com.johnsnowlabs.nlp.annotators.Tokenizer
162
    *
163
    * import com.johnsnowlabs.nlp.DocumentAssembler
164
    *
165
    * val textDf = sqlContext.sparkContext.parallelize(Array("Tokens in this-text will#be#split on hashtags-and#dashes")).toDF("text")
166
    *
167
    * val documentAssembler = new DocumentAssembler().setInputCol("text").setOutputCol("sentences")
168
    *
169
    * val tokenizer = new Tokenizer().setInputCols("sentences").setOutputCol("tokens").setSplitPattern("-|#")
170
    *
171
    * new Pipeline().setStages(Array(documentAssembler, tokenizer)).fit(textDf).transform(textDf).select("tokens.result").show(false)
172
    * }}}
173
    *
174
    * This will yield: `Tokens, in, this, text, will, be, split, on, hashtags, and, dashes`
175
    * @group param
176
    */
177
  val splitPattern: Param[String] = new Param(
1✔
178
    this,
179
    "splitPattern",
1✔
180
    "Pattern to separate from the inside of tokens. takes priority over splitChars.")
1✔
181

182
  /** Pattern to grab from text as token candidates. (Default: `"\\S+"`)
183
    *
184
    * Defaults to: "\\S+" which means anything not a space will be matched and considered as a
185
    * token candidate, This will cause text to be split on on white spaces to yield token
186
    * candidates.
187
    *
188
    * This rule will be added to the BREAK_PATTERN varaible, which is used to yield token
189
    * candidates.
190
    *
191
    * {{{
192
    * import org.apache.spark.ml.Pipeline
193
    * import com.johnsnowlabs.nlp.annotators.Tokenizer
194
    * import com.johnsnowlabs.nlp.DocumentAssembler
195
    *
196
    * val textDf = sqlContext.sparkContext.parallelize(Array("I only consider lowercase characters and NOT UPPERCASED and only the numbers 0,1, to 7 as tokens but not 8 or 9")).toDF("text")
197
    * val documentAssembler = new DocumentAssembler().setInputCol("text").setOutputCol("sentences")
198
    * val tokenizer = new Tokenizer().setInputCols("sentences").setOutputCol("tokens").setTargetPattern("a-z-0-7")
199
    * new Pipeline().setStages(Array(documentAssembler, tokenizer)).fit(textDf).transform(textDf).select("tokens.result").show(false)
200
    * }}}
201
    *
202
    * This will yield: `only, consider, lowercase, characters, and, and, only, the, numbers, 0, 1,
203
    * to, 7, as, tokens, but, not, or`
204
    * @group param
205
    */
206
  val targetPattern: Param[String] = new Param(
1✔
207
    this,
208
    "targetPattern",
1✔
209
    "Pattern to grab from text as token candidates. Defaults \\S+")
1✔
210

211
  /** Regex patterns that match tokens within a single target. groups identify different
212
    * sub-tokens. multiple defaults
213
    *
214
    * Infix patterns must use regex group. Notice each group will result in separate token
215
    *
216
    * '''Example:'''
217
    *
218
    * {{{
219
    * import org.apache.spark.ml.Pipeline
220
    * import com.johnsnowlabs.nlp.annotators.Tokenizer
221
    * import com.johnsnowlabs.nlp.DocumentAssembler
222
    *
223
    * val textDf = sqlContext.sparkContext.parallelize(Array("l'une d'un l'un, des l'extrême des l'extreme")).toDF("text")
224
    * val documentAssembler = new DocumentAssembler().setInputCol("text").setOutputCol("sentences")
225
    * val tokenizer = new Tokenizer().setInputCols("sentences").setOutputCol("tokens").setInfixPatterns(Array("([\\p{L}\\w]+'{1})([\\p{L}\\w]+)"))
226
    * new Pipeline().setStages(Array(documentAssembler, tokenizer)).fit(textDf).transform(textDf).select("tokens.result").show(false)
227
    *
228
    * }}}
229
    *
230
    * This will yield: `l', une, d', un, l', un, , , des, l', extrême, des, l', extreme`
231
    * @group param
232
    */
233
  val infixPatterns: StringArrayParam = new StringArrayParam(
1✔
234
    this,
235
    "infixPatterns",
1✔
236
    "Regex patterns that match tokens within a single target. groups identify different sub-tokens. multiple defaults")
1✔
237

238
  /** Regex with groups and begins with \\A to match target prefix. Overrides contextCharacters
239
    * Param
240
    *
241
    * @group param
242
    */
243
  val prefixPattern: Param[String] = new Param[String](
1✔
244
    this,
245
    "prefixPattern",
1✔
246
    "Regex with groups and begins with \\A to match target prefix. Overrides contextCharacters Param")
1✔
247

248
  /** Regex with groups and ends with \\z to match target suffix. Overrides contextCharacters
249
    * Param
250
    *
251
    * @group param
252
    */
253
  val suffixPattern: Param[String] = new Param[String](
1✔
254
    this,
255
    "suffixPattern",
1✔
256
    "Regex with groups and ends with \\z to match target suffix. Overrides contextCharacters Param")
1✔
257

258
  /** Set the minimum allowed length for each token
259
    *
260
    * @group param
261
    */
262
  val minLength = new IntParam(this, "minLength", "Set the minimum allowed length for each token")
1✔
263

264
  /** Set the minimum allowed length for each token
265
    * @group setParam
266
    */
267
  def setMinLength(value: Int): this.type = {
268
    require(value >= 0, "minLength must be greater equal than 0")
1✔
269
    require(value.isValidInt, "minLength must be Int")
1✔
270
    set(minLength, value)
1✔
271
  }
272

273
  /** Get the minimum allowed length for each token
274
    * @group getParam
275
    */
276
  def getMinLength(value: Int): Int = $(minLength)
×
277

278
  /** Set the maximum allowed length for each token
279
    *
280
    * @group param
281
    */
282
  val maxLength = new IntParam(this, "maxLength", "Set the maximum allowed length for each token")
1✔
283

284
  /** Get the maximum allowed length for each token
285
    * @group setParam
286
    */
287
  def setMaxLength(value: Int): this.type = {
288
    require(
1✔
289
      value >= $ {
1✔
290
        minLength
1✔
291
      },
292
      "maxLength must be greater equal than minLength")
×
293
    require(value.isValidInt, "minLength must be Int")
1✔
294
    set(maxLength, value)
1✔
295
  }
296

297
  /** Get the maximum allowed length for each token
298
    * @group getParam
299
    */
300
  def getMaxLength(value: Int): Int = $(maxLength)
×
301

302
  /** Set a basic regex rule to identify token candidates in text.
303
    * @group setParam
304
    */
305
  def setTargetPattern(value: String): this.type = set(targetPattern, value)
1✔
306

307
  /** Regex pattern to separate from the inside of tokens. Takes priority over splitChars.
308
    * @group setParam
309
    */
310
  def setSplitPattern(value: String): this.type = set(splitPattern, value)
1✔
311

312
  /** Set a list of Regex patterns that match tokens within a single target. Groups identify
313
    * different sub-tokens. multiple defaults
314
    * @group setParam
315
    */
316
  def setInfixPatterns(value: Array[String]): this.type = set(infixPatterns, value)
1✔
317

318
  /** Add an extension pattern regex with groups to the top of thsetExceptionse rules (will target
319
    * first, from more specific to the more general).
320
    *
321
    * @group setParam
322
    */
323
  def addInfixPattern(value: String): this.type = set(infixPatterns, value +: $(infixPatterns))
×
324

325
  /** Regex to identify subtokens that come in the beginning of the token. Regex has to start with
326
    * \\A and must contain groups (). Each group will become a separate token within the prefix.
327
    * Defaults to non-letter characters. e.g. quotes or parenthesis
328
    *
329
    * @group setParam
330
    */
331
  def setPrefixPattern(value: String): this.type = set(prefixPattern, value)
×
332

333
  /** Regex to identify subtokens that are in the end of the token. Regex has to end with \\z and
334
    * must contain groups (). Each group will become a separate token within the prefix. Defaults
335
    * to non-letter characters. e.g. quotes or parenthesis
336
    *
337
    * @group setParam
338
    */
339
  def setSuffixPattern(value: String): this.type = set(suffixPattern, value)
×
340

341
  /** List of tokens to not alter at all. Allows composite tokens like two worded tokens that the
342
    * user may not want to split.
343
    *
344
    * @group setParam
345
    */
346
  def setExceptions(value: Array[String]): this.type = set(exceptions, value)
1✔
347

348
  /** Add a single exception
349
    *
350
    * @group setParam
351
    */
352
  def addException(value: String): this.type =
353
    set(exceptions, get(exceptions).getOrElse(Array.empty[String]) :+ value)
1✔
354

355
  /** @group setParam */
356
  def getExceptions: Array[String] = $(exceptions)
×
357

358
  /** Path to txt file with list of token exceptions
359
    *
360
    * @group getParam
361
    */
362
  def setExceptionsPath(
363
      path: String,
364
      readAs: ReadAs.Format = ReadAs.TEXT,
365
      options: Map[String, String] = Map("format" -> "text")): this.type =
366
    set(exceptionsPath, ExternalResource(path, readAs, options))
1✔
367

368
  /** Whether to follow case sensitiveness for matching exceptions in text
369
    *
370
    * @group getParam
371
    */
372
  def setCaseSensitiveExceptions(value: Boolean): this.type = set(caseSensitiveExceptions, value)
1✔
373

374
  /** Whether to follow case sensitiveness for matching exceptions in text
375
    *
376
    * @group getParam
377
    */
378
  def getCaseSensitiveExceptions(value: Boolean): Boolean = $(caseSensitiveExceptions)
×
379

380
  /** Add an extension pattern regex with groups to the top of the rules (will target first, from
381
    * more specific to the more general).
382
    *
383
    * @group getParam
384
    */
385
  def getInfixPatterns: Array[String] = $(infixPatterns)
×
386

387
  /** Regex to identify subtokens that come in the beginning of the token. Regex has to start with
388
    * \\A and must contain groups (). Each group will become a separate token within the prefix.
389
    * Defaults to non-letter characters. e.g. quotes or parenthesis
390
    *
391
    * @group getParam
392
    */
393
  def getPrefixPattern: String = $(prefixPattern)
×
394

395
  /** Regex to identify subtokens that are in the end of the token. Regex has to end with \\z and
396
    * must contain groups (). Each group will become a separate token within the prefix. Defaults
397
    * to non-letter characters. e.g. quotes or parenthesis
398
    *
399
    * @group getParam
400
    */
401
  def getSuffixPattern: String = $(suffixPattern)
×
402

403
  /** Basic regex rule to identify a candidate for tokenization. Defaults to \\S+ which means
404
    * anything not a space
405
    *
406
    * @group getParam
407
    */
408
  def getTargetPattern: String = $(targetPattern)
×
409

410
  /** List of 1 character string to split tokens inside, such as hyphens. Ignored if using infix,
411
    * prefix or suffix patterns.
412
    *
413
    * @group getParam
414
    */
415
  def getSplitPattern: String = $(splitPattern)
×
416

417
  /** List of 1 character string to rip off from tokens, such as parenthesis or question marks.
418
    * Ignored if using prefix, infix or suffix patterns.
419
    *
420
    * @group setParam
421
    */
422
  def setContextChars(v: Array[String]): this.type = {
423
    require(v.forall(_.length == 1), "All elements in context chars must have length == 1")
1✔
424
    set(contextChars, v)
1✔
425
  }
426

427
  /** Add one character string to rip off from tokens, such as parenthesis or question marks.
428
    * Ignored if using prefix, infix or suffix patterns.
429
    *
430
    * @group setParam
431
    */
432
  def addContextChars(v: String): this.type = {
433
    require(v.length == 1, "Context char must have length == 1")
×
434
    set(contextChars, get(contextChars).getOrElse(Array.empty[String]) :+ v)
×
435
  }
436

437
  /** List of 1 character string to rip off from tokens, such as parenthesis or question marks.
438
    * Ignored if using prefix, infix or suffix patterns.
439
    *
440
    * @group getParam
441
    */
442
  def getContextChars: Array[String] = {
443
    $(contextChars)
×
444
  }
445

446
  /** List of 1 character string to split tokens inside, such as hyphens. Ignored if using infix,
447
    * prefix or suffix patterns.
448
    *
449
    * @group setParam
450
    */
451
  def setSplitChars(v: Array[String]): this.type = {
452
    require(
1✔
453
      v.forall(x => x.length == 1 || (x.length == 2 && x.substring(0, 1) == "\\")),
1✔
454
      "All elements in context chars must have length == 1")
×
455
    set(splitChars, v)
1✔
456
  }
457

458
  /** One character string to split tokens inside, such as hyphens. Ignored if using infix, prefix
459
    * or suffix patterns.
460
    *
461
    * @group setParam
462
    */
463
  def addSplitChars(v: String): this.type = {
464
    require(
1✔
UNCOV
465
      v.length == 1 || (v.length == 2 && v.substring(0, 1) == "\\"),
×
466
      "Context char must have length == 1")
×
467
    set(splitChars, get(splitChars).getOrElse(Array.empty[String]) :+ v)
1✔
468
  }
469

470
  /** List of 1 character string to split tokens inside, such as hyphens. Ignored if using infix,
471
    * prefix or suffix patterns.
472
    *
473
    * @group getParam
474
    */
475
  def getSplitChars: Array[String] = {
476
    $(splitChars)
×
477
  }
478

479
  setDefault(
1✔
480
    inputCols -> Array(DOCUMENT),
1✔
481
    outputCol -> "token",
1✔
482
    targetPattern -> "\\S+",
1✔
483
    contextChars -> Array(".", ",", ";", ":", "!", "?", "*", "-", "(", ")", "\"", "'"),
1✔
484
    caseSensitiveExceptions -> true,
1✔
485
    minLength -> 0)
1✔
486

487
  /** Build rule factory which combines all defined parameters to build regex that is applied to
488
    * tokens
489
    */
490
  def buildRuleFactory: RuleFactory = {
491
    val rules = ArrayBuffer.empty[String]
1✔
492

493
    lazy val quotedContext = Pattern.quote($(contextChars).mkString(""))
494

495
    val processedPrefix = get(prefixPattern).getOrElse(s"\\A([$quotedContext]*)")
1✔
496
    require(
1✔
497
      processedPrefix.startsWith("\\A"),
1✔
498
      "prefixPattern must begin with \\A to ensure it is the beginning of the string")
×
499

500
    val processedSuffix = get(suffixPattern).getOrElse(s"([$quotedContext]*)\\z")
1✔
501
    require(
1✔
502
      processedSuffix.endsWith("\\z"),
1✔
503
      "suffixPattern must end with \\z to ensure it is the end of the string")
×
504

505
    val processedInfixes =
506
      get(infixPatterns).getOrElse(Array(s"([^$quotedContext](?:.*[^$quotedContext])*)"))
1✔
507

508
    require(
1✔
509
      processedInfixes.forall(ip => ip.contains("(") && ip.contains(")")),
1✔
510
      "infix patterns must use regex group. Notice each group will result in separate token")
×
511
    processedInfixes.foreach(infix => {
1✔
512
      val ruleBuilder = new StringBuilder
1✔
513
      ruleBuilder.append(processedPrefix)
1✔
514
      ruleBuilder.append(infix)
1✔
515
      ruleBuilder.append(processedSuffix)
1✔
516
      rules.append(ruleBuilder.toString)
1✔
517
    })
518
    rules.foldLeft(new RuleFactory(MatchStrategy.MATCH_FIRST))((factory, rule) =>
1✔
519
      factory.addRule(rule.r, rule))
1✔
520
  }
521

522
  /** Clears out rules and constructs a new rule for every combination of rules provided . The
523
    * strategy is to catch one token per regex group. User may add its own groups if needs targets
524
    * to be tokenized separately from the rest
525
    */
526
  override def train(
527
      dataset: Dataset[_],
528
      recursivePipeline: Option[PipelineModel]): TokenizerModel = {
529

530
    /** Clears out rules and constructs a new rule for every combination of rules provided */
531
    /** The strategy is to catch one token per regex group */
532
    /** User may add its own groups if needs targets to be tokenized separately from the rest */
533
    val ruleFactory = buildRuleFactory
1✔
534

535
    val processedExceptions = get(exceptionsPath)
1✔
536
      .map(er => ResourceHelper.parseLines(er))
1✔
537
      .getOrElse(Array.empty[String]) ++ get(exceptions).getOrElse(Array.empty[String])
1✔
538

539
    val raw = new TokenizerModel()
540
      .setCaseSensitiveExceptions($(caseSensitiveExceptions))
1✔
541
      .setTargetPattern($(targetPattern))
1✔
542
      .setRules(ruleFactory)
543
      .setMinLength($(minLength))
1✔
544

545
    if (isDefined(maxLength))
1✔
546
      raw.setMaxLength($(maxLength))
1✔
547

548
    if (processedExceptions.nonEmpty)
1✔
549
      raw.setExceptions(processedExceptions)
1✔
550

551
    if (isSet(splitPattern)) raw.setSplitPattern($(splitPattern))
1✔
552

553
    if (isSet(splitChars)) raw.setSplitChars($(splitChars))
1✔
554

555
    raw
556

557
  }
558

559
}
560

561
/** This is the companion object of [[Tokenizer]]. Please refer to that class for the
562
  * documentation.
563
  */
564
object Tokenizer extends DefaultParamsReadable[Tokenizer]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc