• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JohnSnowLabs / spark-nlp / 4413868535

pending completion
4413868535

push

github

GitHub
SPARKNLP-746: Handle empty validation sets (#13615)

8597 of 12936 relevant lines covered (66.46%)

0.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/scala/com/johnsnowlabs/nlp/annotators/audio/feature_extractor/Preprocessor.scala
1
/*
2
 * Copyright 2017-2022 John Snow Labs
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *    http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
package com.johnsnowlabs.nlp.annotators.audio.feature_extractor
18

19
import com.johnsnowlabs.util.JsonParser
20
import org.json4s.jackson.JsonMethods
21
import org.json4s.{JNothing, JValue}
22

23
private[johnsnowlabs] case class Preprocessor(
24
    do_normalize: Boolean = true,
25
    feature_size: Int,
26
    padding_side: String,
27
    padding_value: Float,
28
    return_attention_mask: Boolean,
29
    sampling_rate: Int)
30

31
private[johnsnowlabs] object Preprocessor {
32
  def apply(
33
      do_normalize: Boolean = true,
34
      feature_size: Int,
35
      padding_side: String,
36
      padding_value: Float,
37
      return_attention_mask: Boolean,
38
      sampling_rate: Int): Preprocessor = {
39

40
    // if more complex manipulation is required
41
    new Preprocessor(
×
42
      do_normalize,
43
      feature_size,
44
      padding_side,
45
      padding_value,
46
      return_attention_mask,
47
      sampling_rate)
48
  }
49

50
  private implicit class JValueExtended(value: JValue) {
51
    def has(childString: String): Boolean = {
52
      (value \ childString) != JNothing
×
53
    }
54
  }
55

56
  def schemaCheckWav2Vec2(jsonStr: String): Boolean = {
57
    val json = JsonMethods.parse(jsonStr)
×
58
    val schemaCorrect =
59
      if (json.has("do_normalize") && json.has("feature_size") && json.has("padding_side") && json
×
60
          .has("padding_value") && json.has("return_attention_mask") && json.has("sampling_rate"))
×
61
        true
×
62
      else false
×
63

64
    schemaCorrect
65
  }
66

67
  def loadPreprocessorConfig(preprocessorConfigJsonContent: String): Preprocessor = {
68

69
    val preprocessorJsonErrorMsg =
70
      s"""The schema of preprocessor_config.json file is incorrect. It should look like this:         
×
71
         |{
72
         |  "do_normalize": true,
73
         |  "feature_size": 1,
74
         |  "padding_side": "right",
75
         |  "padding_value": 0.0,
76
         |  "return_attention_mask": false,
77
         |  "sampling_rate": 16000
78
         |}
79
         |""".stripMargin
×
80
    require(
×
81
      Preprocessor.schemaCheckWav2Vec2(preprocessorConfigJsonContent),
×
82
      preprocessorJsonErrorMsg)
83

84
    val preprocessorConfig =
85
      try {
86
        JsonParser.parseObject[Preprocessor](preprocessorConfigJsonContent)
×
87
      } catch {
88
        case e: Exception =>
89
          println(s"${preprocessorJsonErrorMsg} \n error: ${e.getMessage}")
×
90
          JsonParser.parseObject[Preprocessor](preprocessorConfigJsonContent)
×
91
      }
92
    preprocessorConfig
93
  }
94
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc