• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JohnSnowLabs / spark-nlp / 8572297544

05 Apr 2024 03:40PM UTC coverage: 62.641% (+0.04%) from 62.599%
8572297544

push

github

web-flow
[SPARKNLP-1031] Solves Dependency Parsers training issue (#14225)

1 of 1 new or added line in 1 file covered. (100.0%)

28 existing lines in 22 files now uncovered.

8962 of 14307 relevant lines covered (62.64%)

0.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

52.94
/src/main/scala/com/johnsnowlabs/nlp/annotators/sda/vivekn/ViveknSentimentUtils.scala
1
/*
2
 * Copyright 2017-2022 John Snow Labs
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *    http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
package com.johnsnowlabs.nlp.annotators.sda.vivekn
18

19
import com.johnsnowlabs.nlp.util.io.ExternalResource
20
import com.johnsnowlabs.nlp.util.io.ResourceHelper.SourceStream
21

22
import java.io.FileNotFoundException
23
import scala.collection.mutable.{ListBuffer, Map => MMap}
24

25
trait ViveknSentimentUtils {
26

27
  /** Detects negations and transforms them into not_ form */
28
  def negateSequence(words: Array[String]): Set[String] = {
29
    val negations = Seq("not", "cannot", "no")
1✔
30
    val delims = Seq("?.,!:;")
1✔
31
    val result = ListBuffer.empty[String]
1✔
32
    var negation = false
1✔
33
    var prev: Option[String] = None
1✔
34
    var pprev: Option[String] = None
1✔
35
    words.foreach(word => {
1✔
36
      val processed = word.toLowerCase
1✔
37
      val negated = if (negation) "not_" + processed else processed
1✔
38
      result.append(negated)
1✔
39
      if (prev.isDefined) {
1✔
40
        val bigram = prev.get + " " + negated
1✔
41
        result.append(bigram)
1✔
42
        if (pprev.isDefined) {
1✔
43
          result.append(pprev.get + " " + bigram)
1✔
44
        }
45
        pprev = prev
46
      }
47
      prev = Some(negated)
1✔
48
      if (negations.contains(processed) || processed.endsWith("n't")) negation = !negation
1✔
UNCOV
49
      if (delims.exists(word.contains)) negation = false
×
50
    })
51
    result.toSet
1✔
52
  }
53

54
  def ViveknWordCount(
55
      er: ExternalResource,
56
      prune: Int,
57
      f: List[String] => Set[String],
58
      left: MMap[String, Long] = MMap.empty[String, Long].withDefaultValue(0),
59
      right: MMap[String, Long] = MMap.empty[String, Long].withDefaultValue(0))
60
      : (MMap[String, Long], MMap[String, Long]) = {
61
    val regex = er.options("tokenPattern").r
×
62
    val prefix = "not_"
×
63
    val sourceStream = SourceStream(er.path)
×
64
    sourceStream.content.foreach(c =>
×
65
      c.foreach(line => {
×
66
        val words = regex.findAllMatchIn(line).map(_.matched).toList
×
67
        f.apply(words)
68
          .foreach(w => {
×
69
            left(w) += 1
×
70
            right(prefix + w) += 1
×
71
          })
72
      }))
73
    sourceStream.close()
×
74
    if (left.isEmpty || right.isEmpty)
×
75
      throw new FileNotFoundException(
×
76
        "Word count dictionary for vivekn sentiment does not exist or is empty")
77
    if (prune > 0)
×
78
      (left.filter { case (_, v) => v > 1 }, right.filter { case (_, v) => v > 1 })
×
79
    else
80
      (left, right)
×
81
  }
82
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc