4413868535

Build Type

push

github

Committed by GitHub

Commit Message

SPARKNLP-746: Handle empty validation sets (#13615)

Run Details

8597 of 12936 relevant lines covered (66.46%)

0.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/src/main/scala/com/johnsnowlabs/nlp/annotators/common/DependencyParsed.scala

/*
 * Copyright 2017-2022 John Snow Labs
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.johnsnowlabs.nlp.annotators.common

import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}

case class DependencyParsedSentence(tokens: Array[WordWithDependency])

case class WordWithDependency(word: String, begin: Int, end: Int, head: Int)

object DependencyParsed extends Annotated[DependencyParsedSentence] {

  override def annotatorType: String = AnnotatorType.DEPENDENCY

  override def unpack(annotations: Seq[Annotation]): Seq[DependencyParsedSentence] = {
    val sentences = TokenizedWithSentence.unpack(annotations)
    val depAnnotations = annotations
      .filter(a => a.annotatorType == annotatorType)
      .sortBy(a => a.begin)

    var last = 0
    sentences.map { sentence =>
      val sorted = sentence.indexedTokens.sortBy(t => t.begin)
      val dependencies = (last until (last + sorted.length)).map { i =>
        depAnnotations(i).metadata("head").toInt
      }

      last += sorted.length

      val words = sorted.zip(dependencies).map { case (token, dependency) =>
        WordWithDependency(token.token, token.begin, token.end, dependency)
      }

      DependencyParsedSentence(words)
    }
  }

  override def pack(items: Seq[DependencyParsedSentence]): Seq[Annotation] = {
    items.zipWithIndex.flatMap { case (sentence, index) =>
      sentence.tokens.map { token =>
        val headData = getHeadData(token.head, sentence)
        val realHead = if (token.head == sentence.tokens.length) 0 else token.head + 1
        Annotation(
          annotatorType,
          token.begin,
          token.end,
          headData.word,
          Map(
            "head" -> realHead.toString,
            "head.begin" -> headData.begin.toString,
            "head.end" -> headData.end.toString,
            "sentence" -> index.toString))
      }
    }
  }

  def getHeadData(head: Int, sentence: DependencyParsedSentence): WordWithDependency = {
    val root: WordWithDependency = WordWithDependency("ROOT", -1, -1, -1)
    sentence.tokens.lift(head).getOrElse(root)
  }

}

1	/*
2	* Copyright 2017-2022 John Snow Labs
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	package com.johnsnowlabs.nlp.annotators.common
18
19	import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
20
21	case class DependencyParsedSentence(tokens: Array[WordWithDependency])
22
23	case class WordWithDependency(word: String, begin: Int, end: Int, head: Int)
24
25	object DependencyParsed extends Annotated[DependencyParsedSentence] {
26
27	override def annotatorType: String = AnnotatorType.DEPENDENCY	×
28
29	override def unpack(annotations: Seq[Annotation]): Seq[DependencyParsedSentence] = {
30	val sentences = TokenizedWithSentence.unpack(annotations)	×
31	val depAnnotations = annotations
32	.filter(a => a.annotatorType == annotatorType)	×
33	.sortBy(a => a.begin)	×
34
35	var last = 0	×
36	sentences.map { sentence =>	×
37	val sorted = sentence.indexedTokens.sortBy(t => t.begin)	×
38	val dependencies = (last until (last + sorted.length)).map { i =>	×
39	depAnnotations(i).metadata("head").toInt	×
40	}
41
42	last += sorted.length	×
43
44	val words = sorted.zip(dependencies).map { case (token, dependency) =>	×
45	WordWithDependency(token.token, token.begin, token.end, dependency)	×
46	}
47
48	DependencyParsedSentence(words)	×
49	}
50	}
51
52	override def pack(items: Seq[DependencyParsedSentence]): Seq[Annotation] = {
53	items.zipWithIndex.flatMap { case (sentence, index) =>	×
54	sentence.tokens.map { token =>	×
55	val headData = getHeadData(token.head, sentence)	×
56	val realHead = if (token.head == sentence.tokens.length) 0 else token.head + 1	×
57	Annotation(	×
58	annotatorType,	×
59	token.begin,	×
60	token.end,	×
61	headData.word,	×
62	Map(	×
63	"head" -> realHead.toString,	×
64	"head.begin" -> headData.begin.toString,	×
65	"head.end" -> headData.end.toString,	×
66	"sentence" -> index.toString))	×
67	}
68	}
69	}
70
71	def getHeadData(head: Int, sentence: DependencyParsedSentence): WordWithDependency = {
72	val root: WordWithDependency = WordWithDependency("ROOT", -1, -1, -1)	×
73	sentence.tokens.lift(head).getOrElse(root)	×
74	}
75
76	}

JohnSnowLabs / spark-nlp / 4413868535

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous