• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JohnSnowLabs / spark-nlp / 4413868535

pending completion
4413868535

push

github

GitHub
SPARKNLP-746: Handle empty validation sets (#13615)

8597 of 12936 relevant lines covered (66.46%)

0.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/scala/com/johnsnowlabs/nlp/annotators/common/EmbeddingsWithSentence.scala
1
/*
2
 * Copyright 2017-2022 John Snow Labs
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *    http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
package com.johnsnowlabs.nlp.annotators.common
18

19
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
20

21
object EmbeddingsWithSentence extends Annotated[TokenizedSentence] {
22

23
  override def annotatorType: String = AnnotatorType.WORD_EMBEDDINGS
×
24

25
  override def unpack(annotations: Seq[Annotation]): Seq[TokenizedSentence] = {
26
    val tokens = annotations
27
      .filter(_.annotatorType == annotatorType)
×
28
      .toArray
×
29

30
    val sentences = SentenceSplit.unpack(annotations)
×
31

32
    /** // Evaluate whether to enable this validation to check proper usage of DOCUMENT and
33
      * SENTENCE within entire pipelines require(tokens.map(_.metadata.getOrElse("sentence",
34
      * "0").toInt).distinct.length == sentences.length, "Inconsistencies found in pipeline.
35
      * Tokens in sentences does not match with sentence count")
36
      */
37
    sentences
38
      .map(sentence => {
×
39
        val sentenceTokens = tokens
40
          .filter(token => token.begin >= sentence.start & token.end <= sentence.end)
×
41
          .map(token => IndexedToken(token.result, token.begin, token.end))
×
42
        sentenceTokens
43
      })
44
      .zipWithIndex
×
45
      .map { case (indexedTokens, index) => TokenizedSentence(indexedTokens, index) }
×
46
      .filter(_.indexedTokens.nonEmpty)
×
47

48
  }
49

50
  override def pack(sentences: Seq[TokenizedSentence]): Seq[Annotation] = {
51
    sentences.flatMap { sentence =>
×
52
      sentence.indexedTokens.map { token =>
×
53
        Annotation(
×
54
          annotatorType,
×
55
          token.begin,
×
56
          token.end,
×
57
          token.token,
×
58
          Map("sentence" -> sentence.sentenceIndex.toString))
×
59
      }
60
    }
61
  }
62
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc