• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JohnSnowLabs / spark-nlp / 4413868535

pending completion
4413868535

push

github

GitHub
SPARKNLP-746: Handle empty validation sets (#13615)

8597 of 12936 relevant lines covered (66.46%)

0.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/scala/com/johnsnowlabs/nlp/annotators/common/DependencyParsed.scala
1
/*
2
 * Copyright 2017-2022 John Snow Labs
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *    http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
package com.johnsnowlabs.nlp.annotators.common
18

19
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
20

21
case class DependencyParsedSentence(tokens: Array[WordWithDependency])
22

23
case class WordWithDependency(word: String, begin: Int, end: Int, head: Int)
24

25
object DependencyParsed extends Annotated[DependencyParsedSentence] {
26

27
  override def annotatorType: String = AnnotatorType.DEPENDENCY
×
28

29
  override def unpack(annotations: Seq[Annotation]): Seq[DependencyParsedSentence] = {
30
    val sentences = TokenizedWithSentence.unpack(annotations)
×
31
    val depAnnotations = annotations
32
      .filter(a => a.annotatorType == annotatorType)
×
33
      .sortBy(a => a.begin)
×
34

35
    var last = 0
×
36
    sentences.map { sentence =>
×
37
      val sorted = sentence.indexedTokens.sortBy(t => t.begin)
×
38
      val dependencies = (last until (last + sorted.length)).map { i =>
×
39
        depAnnotations(i).metadata("head").toInt
×
40
      }
41

42
      last += sorted.length
×
43

44
      val words = sorted.zip(dependencies).map { case (token, dependency) =>
×
45
        WordWithDependency(token.token, token.begin, token.end, dependency)
×
46
      }
47

48
      DependencyParsedSentence(words)
×
49
    }
50
  }
51

52
  override def pack(items: Seq[DependencyParsedSentence]): Seq[Annotation] = {
53
    items.zipWithIndex.flatMap { case (sentence, index) =>
×
54
      sentence.tokens.map { token =>
×
55
        val headData = getHeadData(token.head, sentence)
×
56
        val realHead = if (token.head == sentence.tokens.length) 0 else token.head + 1
×
57
        Annotation(
×
58
          annotatorType,
×
59
          token.begin,
×
60
          token.end,
×
61
          headData.word,
×
62
          Map(
×
63
            "head" -> realHead.toString,
×
64
            "head.begin" -> headData.begin.toString,
×
65
            "head.end" -> headData.end.toString,
×
66
            "sentence" -> index.toString))
×
67
      }
68
    }
69
  }
70

71
  def getHeadData(head: Int, sentence: DependencyParsedSentence): WordWithDependency = {
72
    val root: WordWithDependency = WordWithDependency("ROOT", -1, -1, -1)
×
73
    sentence.tokens.lift(head).getOrElse(root)
×
74
  }
75

76
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc