• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JohnSnowLabs / spark-nlp / 18685790193

21 Oct 2025 01:39PM UTC coverage: 55.216%. First build
18685790193

Pull #14676

github

web-flow
Merge 427de3761 into b827818c7
Pull Request #14676: Spark NLP 6.2.0 Release

147 of 185 new or added lines in 7 files covered. (79.46%)

11924 of 21595 relevant lines covered (55.22%)

0.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.36
/src/main/scala/com/johnsnowlabs/nlp/annotators/er/EntityRulerModel.scala
1
/*
2
 * Copyright 2017-2022 John Snow Labs
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *    http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
package com.johnsnowlabs.nlp.annotators.er
18

19
import com.johnsnowlabs.nlp.AnnotatorType.{CHUNK, DOCUMENT, TOKEN}
20
import com.johnsnowlabs.nlp.annotators.common._
21
import com.johnsnowlabs.nlp.annotators.er.EntityRulerModel.{
22
  AUTO_MODES,
23
  ENTITY_PRESETS,
24
  describeAutoMode,
25
  getPatternByName
26
}
27
import com.johnsnowlabs.nlp.serialization.StructFeature
28
import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasPretrained, HasSimpleAnnotate}
29
import com.johnsnowlabs.storage.Database.{ENTITY_REGEX_PATTERNS, Name}
30
import com.johnsnowlabs.storage._
31
import org.apache.spark.broadcast.Broadcast
32
import org.apache.spark.ml.PipelineModel
33
import org.apache.spark.ml.param.{BooleanParam, Param, ParamMap, StringArrayParam}
34
import org.apache.spark.ml.util.Identifiable
35
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
36
import org.slf4j.{Logger, LoggerFactory}
37

38
import java.util.Locale
39
import scala.util.Try
40

41
/** Instantiated model of the [[EntityRulerApproach]]. For usage and examples see the
42
  * documentation of the main class.
43
  *
44
  * @param uid
45
  *   internally renquired UID to make it writable
46
  * @groupname anno Annotator types
47
  * @groupdesc anno
48
  *   Required input and expected output annotator types
49
  * @groupname Ungrouped Members
50
  * @groupname param Parameters
51
  * @groupname setParam Parameter setters
52
  * @groupname getParam Parameter getters
53
  * @groupname Ungrouped Members
54
  * @groupprio param  1
55
  * @groupprio anno  2
56
  * @groupprio Ungrouped 3
57
  * @groupprio setParam  4
58
  * @groupprio getParam  5
59
  * @groupdesc param
60
  *   A list of (hyper-)parameter keys this annotator can take. Users can set and get the
61
  *   parameter values through setters and getters, respectively.
62
  */
63
class EntityRulerModel(override val uid: String)
64
    extends AnnotatorModel[EntityRulerModel]
65
    with HasSimpleAnnotate[EntityRulerModel]
66
    with HasStorageModel {
67

68
  def this() = this(Identifiable.randomUID("ENTITY_RULER_MODEL"))
1✔
69

70
  @transient
71
  private val logger: Logger = LoggerFactory.getLogger("EntityRulerModel")
1✔
72

73
  // Keeping this parameter for backward compatibility
74
  private[er] val enablePatternRegex =
75
    new BooleanParam(this, "enablePatternRegex", "Enables regex pattern match")
1✔
76

77
  private[er] val useStorage =
78
    new BooleanParam(this, "useStorage", "Whether to use RocksDB storage to serialize patterns")
1✔
79

80
  private[er] val regexEntities =
81
    new StringArrayParam(this, "regexEntities", "entities defined in regex patterns")
1✔
82

83
  private[er] val entityRulerFeatures: StructFeature[EntityRulerFeatures] =
84
    new StructFeature[EntityRulerFeatures](
1✔
85
      this,
86
      "Structure to store data when RocksDB is not used")
1✔
87

88
  private[er] val sentenceMatch = new BooleanParam(
1✔
89
    this,
90
    "sentenceMatch",
1✔
91
    "Whether to find match at sentence level (regex only). True: sentence level. False: token level")
1✔
92

93
  private[er] val ahoCorasickAutomaton: StructFeature[Option[AhoCorasickAutomaton]] =
94
    new StructFeature[Option[AhoCorasickAutomaton]](this, "AhoCorasickAutomaton")
1✔
95

96
  private[er] val extractEntities = new StringArrayParam(
1✔
97
    this,
98
    "extractEntities",
1✔
99
    "List of entity labels to extract (empty = extract all entities)")
1✔
100

101
  /** AutoMode defines logical bundles of regex patterns to activate (e.g., "network_entities",
102
    * "email_entities"). When set, overrides activeEntities selection.
103
    */
104
  private[er] val autoMode: Param[String] = new Param[String](
1✔
105
    this,
106
    "autoMode",
1✔
107
    "Predefined bundle of entity patterns to use (e.g. 'network_entities', 'email_entities', 'all_entities').")
1✔
108

109
  private[er] def setRegexEntities(value: Array[String]): this.type = set(regexEntities, value)
1✔
110

111
  private[er] def setEntityRulerFeatures(value: EntityRulerFeatures): this.type =
112
    set(entityRulerFeatures, value)
1✔
113

114
  private[er] def setUseStorage(value: Boolean): this.type = set(useStorage, value)
1✔
115

116
  private[er] def setSentenceMatch(value: Boolean): this.type = set(sentenceMatch, value)
1✔
117

118
  private[er] def setAhoCorasickAutomaton(value: Option[AhoCorasickAutomaton]): this.type =
119
    set(ahoCorasickAutomaton, value)
1✔
120

121
  def setExtractEntities(value: Array[String]): this.type = set(extractEntities, value)
1✔
122

123
  def setAutoMode(value: String): this.type = set(autoMode, value)
1✔
124

125
  private var automatonModel: Option[Broadcast[AhoCorasickAutomaton]] = None
1✔
126

127
  val hasAutoMode: Boolean = {
128
    val value = if (isDefined(autoMode)) Try($(autoMode)).toOption else None
1✔
NEW
129
    val result = value.exists(AUTO_MODES.contains)
×
130
    result
131
  }
132

133
  def setAutomatonModelIfNotSet(
134
      spark: SparkSession,
135
      automaton: Option[AhoCorasickAutomaton]): this.type = {
136
    if (automatonModel.isEmpty && automaton.isDefined) {
1✔
137
      automatonModel = Some(spark.sparkContext.broadcast(automaton.get))
1✔
138
    }
139
    this
140
  }
141

142
  def getAutomatonModelIfNotSet: Option[AhoCorasickAutomaton] = {
143
    if (automatonModel.isDefined) {
1✔
144
      Some(automatonModel.get.value)
1✔
145
    } else {
146
      if (this.get(ahoCorasickAutomaton).isDefined && $$(ahoCorasickAutomaton).isDefined)
1✔
147
        $$(ahoCorasickAutomaton)
1✔
148
      else None
1✔
149
    }
150
  }
151

152
  setDefault(
1✔
153
    useStorage -> false,
1✔
154
    sentenceMatch -> false,
1✔
155
    caseSensitive -> true,
1✔
156
    enablePatternRegex -> false,
1✔
157
    extractEntities -> Array(),
1✔
158
    regexEntities -> Array(),
1✔
159
    autoMode -> "")
1✔
160

161
  /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator
162
    * type
163
    */
164
  val inputAnnotatorTypes: Array[String] = Array(DOCUMENT)
1✔
165
  override val optionalInputAnnotatorTypes: Array[String] = Array(TOKEN)
1✔
166
  val outputAnnotatorType: AnnotatorType = CHUNK
1✔
167

168
  override def _transform(
169
      dataset: Dataset[_],
170
      recursivePipeline: Option[PipelineModel]): DataFrame = {
171
    if ($(regexEntities).nonEmpty) {
1✔
172
      val structFields = dataset.schema.fields
1✔
173
        .filter(field => field.metadata.contains("annotatorType"))
1✔
174
        .filter(field => field.metadata.getString("annotatorType") == TOKEN)
1✔
175
      if (structFields.isEmpty) {
1✔
176
        throw new IllegalArgumentException(
×
177
          s"Missing $TOKEN annotator. Regex patterns requires it in your pipeline")
178
      } else {
179
        super._transform(dataset, recursivePipeline)
1✔
180
      }
181
    } else {
182
      super._transform(dataset, recursivePipeline)
1✔
183
    }
184
  }
185

186
  private def getActiveEntitiesFromAutoMode: Array[String] = {
187
    if (isDefined(autoMode) && Option($(autoMode)).exists(_.nonEmpty)) {
1✔
188
      val modeKey = $(autoMode).toUpperCase(Locale.ROOT)
1✔
NEW
189
      val validEntities = EntityRulerModel.AUTO_MODES.getOrElse(modeKey, Seq.empty)
×
190
      validEntities.toArray
1✔
191
    } else {
192
      // Fallback for legacy regex-based pipelines
193
      $(regexEntities)
1✔
194
    }
195
  }
196

197
  override def beforeAnnotate(dataset: Dataset[_]): Dataset[_] = {
198
    getAutomatonModelIfNotSet.foreach { automaton =>
1✔
199
      this.setAutomatonModelIfNotSet(dataset.sparkSession, Some(automaton))
1✔
200
    }
201
    dataset
202
  }
203

204
  /** takes a document and annotations and produces new annotations of this annotator's annotation
205
    * type
206
    *
207
    * @param annotations
208
    *   Annotations that correspond to inputAnnotationCols generated by previous annotators if any
209
    * @return
210
    *   any number of annotations processed for every input annotation. Not necessary one to one
211
    *   relationship
212
    */
213
  def annotate(annotations: Seq[Annotation]): Seq[Annotation] = {
214
    var annotatedEntitiesByKeywords: Seq[Annotation] = Seq()
1✔
215
    val sentences = SentenceSplit.unpack(annotations)
1✔
216
    val annotatedEntitiesByRegex = computeAnnotatedEntitiesByRegex(annotations, sentences)
1✔
217

218
    if (getAutomatonModelIfNotSet.isDefined) {
1✔
219
      annotatedEntitiesByKeywords = sentences.flatMap { sentence =>
1✔
220
        getAutomatonModelIfNotSet.get.searchPatternsInText(sentence)
1✔
221
      }
222
    }
223

224
    annotatedEntitiesByRegex ++ annotatedEntitiesByKeywords
1✔
225
  }
226

227
  private def computeAnnotatedEntitiesByRegex(
228
      annotations: Seq[Annotation],
229
      sentences: Seq[Sentence]): Seq[Annotation] = {
230
    val entitiesToUse = {
231
      val baseEntities =
232
        if ($(extractEntities).nonEmpty) {
1✔
233
          $(regexEntities).filter(e => $(extractEntities).contains(e.split(",")(0)))
1✔
234
        } else if (isDefined(autoMode)) getActiveEntitiesFromAutoMode
1✔
NEW
235
        else $(regexEntities)
×
236

237
      baseEntities
238
    }
239

240
    if (entitiesToUse.nonEmpty) {
1✔
241
      val regexPatternsReader =
242
        if ($(useStorage))
1✔
243
          Some(getReader(Database.ENTITY_REGEX_PATTERNS).asInstanceOf[RegexPatternsReader])
1✔
244
        else None
1✔
245

246
      if ($(sentenceMatch)) {
1✔
247
        annotateEntitiesFromRegexPatternsBySentence(sentences, regexPatternsReader, entitiesToUse)
1✔
248
      } else {
1✔
249
        val tokenizedWithSentences = TokenizedWithSentence.unpack(annotations)
1✔
250
        annotateEntitiesFromRegexPatterns(
1✔
251
          tokenizedWithSentences,
252
          regexPatternsReader,
253
          entitiesToUse)
254
      }
255
    } else Seq()
1✔
256
  }
257

258
  private def annotateEntitiesFromRegexPatterns(
259
      tokenizedWithSentences: Seq[TokenizedSentence],
260
      regexPatternsReader: Option[RegexPatternsReader],
261
      activeEntities: Array[String]): Seq[Annotation] = {
262

263
    val annotatedEntities = tokenizedWithSentences.flatMap { tokenizedWithSentence =>
1✔
264
      tokenizedWithSentence.indexedTokens.flatMap { indexedToken =>
1✔
265
        val entity = getMatchedEntity(indexedToken.token, regexPatternsReader, activeEntities)
1✔
266
        if (entity.isDefined) {
1✔
267
          val entityMetadata = getEntityMetadata(entity)
1✔
268
          Some(
1✔
269
            Annotation(
1✔
270
              CHUNK,
1✔
271
              indexedToken.begin,
1✔
272
              indexedToken.end,
1✔
273
              indexedToken.token,
1✔
274
              entityMetadata ++ Map("sentence" -> tokenizedWithSentence.sentenceIndex.toString)))
1✔
275
        } else {
276
          None
1✔
277
        }
278
      }
279
    }
280

281
    annotatedEntities
282
  }
283

284
  private def getMatchedEntity(
285
      token: String,
286
      regexPatternsReader: Option[RegexPatternsReader],
287
      activeEntities: Array[String]): Option[String] = {
288

289
    val hasActiveEntities = activeEntities != null && activeEntities.nonEmpty
1✔
290

291
    val selectedEntities: Seq[(String, String)] = if (hasAutoMode) {
1✔
NEW
292
      describeAutoMode($(autoMode)).flatMap(name => getPatternByName(name).map(name -> _))
×
293

294
    } else if (hasActiveEntities) {
1✔
295
      activeEntities.flatMap { regexEntity =>
1✔
296
        // load from reader if available, else from local model storage
297
        val regexPatterns: Option[Seq[String]] = regexPatternsReader match {
298
          case Some(rpr) => {
299
            rpr.lookup(regexEntity)
1✔
300
          }
301
          case None => {
302
            // fallback if entityRulerFeatures is not set
303
            if (get(entityRulerFeatures).isDefined)
1✔
304
              $$(entityRulerFeatures).regexPatterns.get(regexEntity)
1✔
305
            else
306
              EntityRulerModel.ENTITY_PRESETS.get(regexEntity).map(Seq(_))
1✔
307
          }
308
        }
309

310
        regexPatterns match {
311
          case Some(patterns) => patterns.map(p => regexEntity -> p)
1✔
NEW
312
          case None => Seq.empty
×
313
        }
314
      }
315

316
    } else {
NEW
317
      ENTITY_PRESETS.toSeq
×
318
    }
319

320
    val matchesByEntity = selectedEntities.flatMap { case (entityName, regexPattern) =>
1✔
321
      regexPattern.r.findFirstIn(token).map(_ => entityName)
1✔
322
    }
323

324
    if (matchesByEntity.size > 1)
1✔
325
      logger.warn(
1✔
326
        s"[EntityRulerModel] Multiple entities matched token '$token': ${matchesByEntity.mkString(", ")}. " +
1✔
327
          s"Returning first entity '${matchesByEntity.head}'.")
1✔
328

329
    matchesByEntity.headOption
1✔
330
  }
331

332
  /** Extracts all regex-matched entities within a sentence, supporting both autoMode and manual
333
    * patterns.
334
    */
335
  private def getMatchedEntityBySentence(
336
      sentence: Sentence,
337
      regexPatternsReader: Option[RegexPatternsReader],
338
      activeEntities: Array[String]): Array[(IndexedToken, String)] = {
339

340
    import EntityRulerModel._
341

342
    val hasActiveEntities = activeEntities != null && activeEntities.nonEmpty
1✔
343

344
    val selectedEntities: Seq[(String, String)] = if (hasAutoMode) {
1✔
NEW
345
      describeAutoMode($(autoMode)).flatMap(name => getPatternByName(name).map(name -> _))
×
346

347
    } else if (hasActiveEntities) {
1✔
348
      activeEntities.flatMap { regexEntity =>
1✔
349
        val regexPatterns: Option[Seq[String]] = regexPatternsReader match {
350
          case Some(rpr) => rpr.lookup(regexEntity)
1✔
351
          case None => {
352
            // fallback if entityRulerFeatures is not set
353
            if (get(entityRulerFeatures).isDefined)
1✔
354
              $$(entityRulerFeatures).regexPatterns.get(regexEntity)
1✔
355
            else
356
              EntityRulerModel.ENTITY_PRESETS.get(regexEntity).map(Seq(_))
1✔
357
          }
358
        }
359

360
        regexPatterns match {
361
          case Some(patterns) => patterns.map(p => regexEntity -> p)
1✔
NEW
362
          case None => Seq.empty
×
363
        }
364
      }
365

366
    } else {
NEW
367
      ENTITY_PRESETS.toSeq
×
368
    }
369

370
    val matchesByEntity = selectedEntities
371
      .flatMap { case (regexEntity, regexPattern) =>
1✔
372
        val allMatches = regexPattern.r
373
          .findAllMatchIn(sentence.content)
1✔
374
          .map { matched =>
375
            val begin = matched.start + sentence.start
1✔
376
            val end = matched.end + sentence.start - 1
1✔
377
            (matched.matched, begin, end, regexEntity)
1✔
378
          }
379
          .toSeq
1✔
380

381
        // Merge overlapping intervals for same pattern
382
        val intervals = allMatches.map { case (_, b, e, _) => List(b, e) }.toList
1✔
383
        val mergedIntervals = EntityRulerUtil.mergeIntervals(intervals)
1✔
384

385
        val filteredMatches =
386
          allMatches.filter { case (_, b, e, _) => mergedIntervals.contains(List(b, e)) }
1✔
387

388
        if (filteredMatches.nonEmpty) Some(filteredMatches) else None
1✔
389
      }
390
      .flatten
1✔
391
      .sortBy(_._2) // sort by begin position
1✔
392

393
    matchesByEntity.map { case (matchedText, begin, end, entityLabel) =>
1✔
394
      (IndexedToken(matchedText, begin, end), entityLabel)
1✔
395
    }.toArray
1✔
396
  }
397

398
  private def annotateEntitiesFromRegexPatternsBySentence(
399
      sentences: Seq[Sentence],
400
      patternsReader: Option[RegexPatternsReader],
401
      activeEntities: Array[String]): Seq[Annotation] = {
402

403
    val annotatedEntities = sentences.flatMap { sentence =>
1✔
404
      val matchedEntities = getMatchedEntityBySentence(sentence, patternsReader, activeEntities)
1✔
405
      matchedEntities.map { case (indexedToken, label) =>
1✔
406
        val entityMetadata = getEntityMetadata(Some(label))
1✔
407
        Annotation(
1✔
408
          CHUNK,
1✔
409
          indexedToken.begin,
1✔
410
          indexedToken.end,
1✔
411
          indexedToken.token,
1✔
412
          entityMetadata ++ Map("sentence" -> sentence.index.toString))
1✔
413
      }
414
    }
415
    annotatedEntities
416
  }
417

418
  private def getEntityMetadata(labelData: Option[String]): Map[String, String] = {
419

420
    val entityMetadata = labelData.get
421
      .split(",")
1✔
422
      .zipWithIndex
1✔
423
      .flatMap { case (metadata, index) =>
1✔
424
        if (index == 0) {
1✔
425
          Map("entity" -> metadata)
1✔
426
        } else Map("id" -> metadata)
1✔
427
      }
428
      .toMap
1✔
429

430
    entityMetadata
431
  }
432

433
  override def copy(extra: ParamMap): EntityRulerModel = {
NEW
434
    val copied = defaultCopy(extra)
×
NEW
435
    if (isDefined(autoMode)) this.setAutoMode($(autoMode))
×
436
    copied
437
  }
438

439
  override def deserializeStorage(path: String, spark: SparkSession): Unit = {
440
    if ($(useStorage)) {
1✔
441
      super.deserializeStorage(path: String, spark: SparkSession)
×
442
    }
443
  }
444

445
  override def onWrite(path: String, spark: SparkSession): Unit = {
446
    if ($(useStorage)) {
1✔
447
      super.onWrite(path, spark)
×
448
    }
449
  }
450

451
  protected val databases: Array[Name] = EntityRulerModel.databases
1✔
452

453
  protected def createReader(database: Name, connection: RocksDBConnection): StorageReader[_] = {
454
    new RegexPatternsReader(connection)
1✔
455
  }
456
}
457

458
trait ReadablePretrainedEntityRuler
459
    extends StorageReadable[EntityRulerModel]
460
    with HasPretrained[EntityRulerModel] {
461

462
  override val databases: Array[Name] = Array(ENTITY_REGEX_PATTERNS)
1✔
463

464
  override val defaultModelName: Option[String] = None
1✔
465

466
  override def pretrained(): EntityRulerModel = super.pretrained()
×
467

468
  override def pretrained(name: String): EntityRulerModel = super.pretrained(name)
×
469

470
  override def pretrained(name: String, lang: String): EntityRulerModel =
471
    super.pretrained(name, lang)
×
472

473
  override def pretrained(name: String, lang: String, remoteLoc: String): EntityRulerModel =
474
    super.pretrained(name, lang, remoteLoc)
×
475

476
}
477

478
object EntityRulerModel extends ReadablePretrainedEntityRuler {
479

480
  val EMAIL_DATETIMETZ_PATTERN = "EMAIL_DATETIMETZ_PATTERN"
1✔
481
  val EMAIL_ADDRESS_PATTERN = "EMAIL_ADDRESS_PATTERN"
1✔
482
  val IPV4_PATTERN = "IPV4_PATTERN"
1✔
483
  val IPV6_PATTERN = "IPV6_PATTERN"
1✔
484
  val IP_ADDRESS_PATTERN = "IP_ADDRESS_PATTERN"
1✔
485
  val IP_ADDRESS_NAME_PATTERN = "IP_ADDRESS_NAME_PATTERN"
1✔
486
  val MAPI_ID_PATTERN = "MAPI_ID_PATTERN"
1✔
487
  val US_PHONE_NUMBERS_PATTERN = "US_PHONE_NUMBERS_PATTERN"
1✔
488
  val IMAGE_URL_PATTERN = "IMAGE_URL_PATTERN"
1✔
489

490
  val NETWORK_ENTITIES = "NETWORK_ENTITIES"
1✔
491
  val EMAIL_ENTITIES = "EMAIL_ENTITIES"
1✔
492
  val COMMUNICATION_ENTITIES = "COMMUNICATION_ENTITIES"
1✔
493
  val CONTACT_ENTITIES = "CONTACT_ENTITIES"
1✔
494
  val MEDIA_ENTITIES = "MEDIA_ENTITIES"
1✔
495
  val ALL_ENTITIES = "ALL_ENTITIES"
1✔
496

497
  private lazy val ENTITY_PRESETS: Map[String, String] = Map(
498
    EMAIL_DATETIMETZ_PATTERN -> "[A-Za-z]{3},\\s\\d{1,2}\\s[A-Za-z]{3}\\s\\d{4}\\s\\d{2}:\\d{2}:\\d{2}\\s[+-]\\d{4}",
499
    EMAIL_ADDRESS_PATTERN -> "(?i)[a-z0-9\\.\\-+_]+@[a-z0-9\\.\\-+_]+\\.[a-z]+",
500
    IPV4_PATTERN -> "(?:25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]?\\d)(?:\\.(?:25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]?\\d)){3}",
501
    IPV6_PATTERN -> "[a-z0-9]{4}::[a-z0-9]{4}:[a-z0-9]{4}:[a-z0-9]{4}:[a-z0-9]{4}%?[0-9]*",
502
    IP_ADDRESS_NAME_PATTERN -> "[a-zA-Z0-9-]*\\.[a-zA-Z]*\\.[a-zA-Z]*",
503
    MAPI_ID_PATTERN -> "[0-9]*\\.[0-9]*\\.[0-9]*\\.[0-9]*",
504
    US_PHONE_NUMBERS_PATTERN -> "(?:\\+?(\\d{1,3}))?[-. (]*(\\d{3})?[-. )]*(\\d{3})[-. ]*(\\d{4})(?: *x(\\d+))?\\s*$",
505
    IMAGE_URL_PATTERN -> "(?i)https?://(?:[a-z0-9$_@.&+!*\\(\\),%-])+(?:/[a-z0-9$_@.&+!*\\(\\),%-]*)*\\.(?:jpg|jpeg|png|gif|bmp|heic)")
506

507
  private lazy val AUTO_MODES: Map[String, Seq[String]] = Map(
508
    NETWORK_ENTITIES
509
      .toUpperCase(Locale.ROOT) -> Seq(IPV4_PATTERN, IPV6_PATTERN, IP_ADDRESS_NAME_PATTERN),
510
    EMAIL_ENTITIES.toUpperCase(Locale.ROOT) -> Seq(
511
      EMAIL_ADDRESS_PATTERN,
512
      EMAIL_DATETIMETZ_PATTERN,
513
      MAPI_ID_PATTERN),
514
    COMMUNICATION_ENTITIES.toUpperCase(Locale.ROOT) -> Seq(
515
      EMAIL_ADDRESS_PATTERN,
516
      US_PHONE_NUMBERS_PATTERN),
517
    CONTACT_ENTITIES.toUpperCase(Locale.ROOT) -> (
518
      Seq(EMAIL_ADDRESS_PATTERN, US_PHONE_NUMBERS_PATTERN) ++
519
        Seq(IPV4_PATTERN, IPV6_PATTERN, IP_ADDRESS_NAME_PATTERN)
520
    ),
521
    MEDIA_ENTITIES.toUpperCase(Locale.ROOT) -> Seq(IMAGE_URL_PATTERN),
522
    ALL_ENTITIES.toUpperCase(Locale.ROOT) -> ENTITY_PRESETS.keys.toSeq)
523

NEW
524
  def getPatternByName(name: String): Option[String] = ENTITY_PRESETS.get(name)
×
525

NEW
526
  def describeAutoMode(mode: String): Seq[String] = AUTO_MODES.getOrElse(mode, Seq.empty)
×
527

528
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc