13883000244

Committed 16 Mar 2025 11:44AM UTC coverage: 59.034% (-1.0%) from 60.072%

Build # 13883000244

Build Type

Pull #14444

github

Committed by

web-flow

Commit Message

Merge 6d717703b into 05000ab4a

Pull Request Pull Request #14444: Sparknlp 1060 implement phi 3.5 vision

Run Details

0 of 292 new or added lines in 5 files covered. (0.0%)

20 existing lines in 14 files now uncovered.

9413 of 15945 relevant lines covered (59.03%)

0.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.22

/src/main/scala/com/johnsnowlabs/nlp/annotators/sda/pragmatic/SentimentDetectorModel.scala

/*
 * Copyright 2017-2022 John Snow Labs
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.johnsnowlabs.nlp.annotators.sda.pragmatic

import com.johnsnowlabs.nlp.annotators.common.TokenizedWithSentence
import com.johnsnowlabs.nlp.serialization.MapFeature
import com.johnsnowlabs.nlp.{
  Annotation,
  AnnotatorModel,
  HasSimpleAnnotate,
  ParamsAndFeaturesReadable
}
import org.apache.spark.ml.param.{BooleanParam, DoubleParam}
import org.apache.spark.ml.util.Identifiable

/** Rule based sentiment detector, which calculates a score based on predefined keywords.
  *
  * This is the instantiated model of the [[SentimentDetector]]. For training your own model,
  * please see the documentation of that class.
  *
  * A dictionary of predefined sentiment keywords must be provided with `setDictionary`, where
  * each line is a word delimited to its class (either `positive` or `negative`). The dictionary
  * can be set in either in the form of a delimited text file or directly as an
  * [[com.johnsnowlabs.nlp.util.io.ExternalResource ExternalResource]].
  *
  * By default, the sentiment score will be assigned labels `"positive"` if the score is `>= 0`,
  * else `"negative"`. To retrieve the raw sentiment scores, `enableScore` needs to be set to
  * `true`.
  *
  * For extended examples of usage, see the
  * [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/training/english/dictionary-sentiment/sentiment.ipynb Examples]]
  * and the
  * [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/sda/pragmatic/PragmaticSentimentTestSpec.scala SentimentTestSpec]].
  *
  * @see
  *   [[com.johnsnowlabs.nlp.annotators.sda.vivekn.ViveknSentimentApproach ViveknSentimentApproach]]
  *   for an alternative approach to sentiment extraction
  * @param uid
  *   internal uid needed for saving annotator to disk
  * @groupname anno Annotator types
  * @groupdesc anno
  *   Required input and expected output annotator types
  * @groupname Ungrouped Members
  * @groupname param Parameters
  * @groupname setParam Parameter setters
  * @groupname getParam Parameter getters
  * @groupname Ungrouped Members
  * @groupprio param  1
  * @groupprio anno  2
  * @groupprio Ungrouped 3
  * @groupprio setParam  4
  * @groupprio getParam  5
  * @groupdesc param
  *   A list of (hyper-)parameter keys this annotator can take. Users can set and get the
  *   parameter values through setters and getters, respectively.
  */
class SentimentDetectorModel(override val uid: String)
    extends AnnotatorModel[SentimentDetectorModel]
    with HasSimpleAnnotate[SentimentDetectorModel] {

  import com.johnsnowlabs.nlp.AnnotatorType._

  /** Sentiment dict
    *
    * @group param
    */
  val sentimentDict = new MapFeature[String, String](this, "sentimentDict")

  /** @group param */
  lazy val model: PragmaticScorer = new PragmaticScorer(
    $$(sentimentDict),
    $(positiveMultiplier),
    $(negativeMultiplier),
    $(incrementMultiplier),
    $(decrementMultiplier),
    $(reverseMultiplier))

  /** Output annotation type : SENTIMENT
    *
    * @group anno
    */
  override val outputAnnotatorType: AnnotatorType = SENTIMENT

  /** Input annotation type : TOKEN, DOCUMENT
    *
    * @group anno
    */
  override val inputAnnotatorTypes: Array[AnnotatorType] = Array(TOKEN, DOCUMENT)

  def this() = this(Identifiable.randomUID("SENTIMENT"))

  /** Multiplier for positive sentiments (Default: `1.0`)
    *
    * @group param
    */
  val positiveMultiplier = new DoubleParam(
    this,
    "positiveMultiplier",
    "Multiplier for positive sentiments. Defaults 1.0")

  /** Multiplier for negative sentiments (Default: `-1.0`)
    *
    * @group param
    */
  val negativeMultiplier = new DoubleParam(
    this,
    "negativeMultiplier",
    "Multiplier for negative sentiments. Defaults -1.0")

  /** Multiplier for increment sentiments (Default: `2.0`)
    *
    * @group param
    */
  val incrementMultiplier = new DoubleParam(
    this,
    "incrementMultiplier",
    "Multiplier for increment sentiments. Defaults 2.0")

  /** Multiplier for decrement sentiments (Default: `-2.0`)
    *
    * @group param
    */
  val decrementMultiplier = new DoubleParam(
    this,
    "decrementMultiplier",
    "Multiplier for decrement sentiments. Defaults -2.0")

  /** Multiplier for revert sentiments (Default: `-1.0`)
    *
    * @group param
    */
  val reverseMultiplier =
    new DoubleParam(this, "reverseMultiplier", "Multiplier for revert sentiments. Defaults -1.0")

  /** if true, score will show as a string type containing a double value, else will output string
    * `"positive"` or `"negative"` (Default: `false`)
    *
    * @group param
    */
  val enableScore = new BooleanParam(
    this,
    "enableScore",
    "if true, score will show as a string type containing a double value, else will output string \"positive\" or \"negative\". Defaults false")

  /** Multiplier for positive sentiments (Default: `1.0`)
    *
    * @group setParam
    */
  def setPositiveMultipler(v: Double): this.type = set(positiveMultiplier, v)

  /** Multiplier for negative sentiments (Default: `-1.0`)
    *
    * @group setParam
    */
  def setNegativeMultipler(v: Double): this.type = set(negativeMultiplier, v)

  /** Multiplier for increment sentiments (Default: `2.0`)
    *
    * @group setParam
    */
  def setIncrementMultipler(v: Double): this.type = set(incrementMultiplier, v)

  /** Multiplier for decrement sentiments (Default: `-2.0`)
    *
    * @group setParam
    */
  def setDecrementMultipler(v: Double): this.type = set(decrementMultiplier, v)

  /** Multiplier for revert sentiments (Default: `-1.0`)
    *
    * @group setParam
    */
  def setReverseMultipler(v: Double): this.type = set(reverseMultiplier, v)

  /** If true, score will show as a string type containing a double value, else will output string
    * `"positive"` or `"negative"` (Default: `false`)
    *
    * @group setParam
    */
  def setEnableScore(v: Boolean): this.type = set(enableScore, v)

  /** Path to file with list of inputs and their content, with such delimiter, readAs LINE_BY_LINE
    * or as SPARK_DATASET. If latter is set, options is passed to spark reader.
    *
    * @group setParam
    */
  def setSentimentDict(value: Map[String, String]): this.type = set(sentimentDict, value)

  /** Tokens are needed to identify each word in a sentence boundary POS tags are optionally
    * submitted to the model in case they are needed Lemmas are another optional annotator for
    * some models Bounds of sentiment are hardcoded to 0 as they render useless
    *
    * @param annotations
    *   Annotations that correspond to inputAnnotationCols generated by previous annotators if any
    * @return
    *   any number of annotations processed for every input annotation. Not necessary one to one
    *   relationship
    */
  override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = {
    val tokenizedSentences = TokenizedWithSentence.unpack(annotations)

    val score = model.score(tokenizedSentences.toArray)

    Seq(
      Annotation(
        outputAnnotatorType,
        0,
        0, {
          if ($(enableScore)) score.toString else if (score >= 0) "positive" else "negative"
        },
        Map.empty[String, String]))
  }

}

object SentimentDetectorModel extends ParamsAndFeaturesReadable[SentimentDetectorModel]

1	/*
2	* Copyright 2017-2022 John Snow Labs
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	package com.johnsnowlabs.nlp.annotators.sda.pragmatic
18
19	import com.johnsnowlabs.nlp.annotators.common.TokenizedWithSentence
20	import com.johnsnowlabs.nlp.serialization.MapFeature
21	import com.johnsnowlabs.nlp.{
22	Annotation,
23	AnnotatorModel,
24	HasSimpleAnnotate,
25	ParamsAndFeaturesReadable
26	}
27	import org.apache.spark.ml.param.{BooleanParam, DoubleParam}
28	import org.apache.spark.ml.util.Identifiable
29
30	/** Rule based sentiment detector, which calculates a score based on predefined keywords.
31	*
32	* This is the instantiated model of the [[SentimentDetector]]. For training your own model,
33	* please see the documentation of that class.
34	*
35	* A dictionary of predefined sentiment keywords must be provided with `setDictionary`, where
36	* each line is a word delimited to its class (either `positive` or `negative`). The dictionary
37	* can be set in either in the form of a delimited text file or directly as an
38	* [[com.johnsnowlabs.nlp.util.io.ExternalResource ExternalResource]].
39	*
40	* By default, the sentiment score will be assigned labels `"positive"` if the score is `>= 0`,
41	* else `"negative"`. To retrieve the raw sentiment scores, `enableScore` needs to be set to
42	* `true`.
43	*
44	* For extended examples of usage, see the
45	* [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/training/english/dictionary-sentiment/sentiment.ipynb Examples]]
46	* and the
47	* [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/sda/pragmatic/PragmaticSentimentTestSpec.scala SentimentTestSpec]].
48	*
49	* @see
50	* [[com.johnsnowlabs.nlp.annotators.sda.vivekn.ViveknSentimentApproach ViveknSentimentApproach]]
51	* for an alternative approach to sentiment extraction
52	* @param uid
53	* internal uid needed for saving annotator to disk
54	* @groupname anno Annotator types
55	* @groupdesc anno
56	* Required input and expected output annotator types
57	* @groupname Ungrouped Members
58	* @groupname param Parameters
59	* @groupname setParam Parameter setters
60	* @groupname getParam Parameter getters
61	* @groupname Ungrouped Members
62	* @groupprio param 1
63	* @groupprio anno 2
64	* @groupprio Ungrouped 3
65	* @groupprio setParam 4
66	* @groupprio getParam 5
67	* @groupdesc param
68	* A list of (hyper-)parameter keys this annotator can take. Users can set and get the
69	* parameter values through setters and getters, respectively.
70	*/
71	class SentimentDetectorModel(override val uid: String)
72	extends AnnotatorModel[SentimentDetectorModel]
73	with HasSimpleAnnotate[SentimentDetectorModel] {
74
75	import com.johnsnowlabs.nlp.AnnotatorType._
76
77	/** Sentiment dict
78	*
79	* @group param
80	*/
81	val sentimentDict = new MapFeature[String, String](this, "sentimentDict")	1✔
82
83	/** @group param */
84	lazy val model: PragmaticScorer = new PragmaticScorer(
85	$$(sentimentDict),
86	$(positiveMultiplier),
87	$(negativeMultiplier),
88	$(incrementMultiplier),
89	$(decrementMultiplier),
90	$(reverseMultiplier))
91
92	/** Output annotation type : SENTIMENT
93	*
94	* @group anno
95	*/
96	override val outputAnnotatorType: AnnotatorType = SENTIMENT	1✔
97
98	/** Input annotation type : TOKEN, DOCUMENT
99	*
100	* @group anno
101	*/
102	override val inputAnnotatorTypes: Array[AnnotatorType] = Array(TOKEN, DOCUMENT)	1✔
103
104	def this() = this(Identifiable.randomUID("SENTIMENT"))	1✔
105
106	/** Multiplier for positive sentiments (Default: `1.0`)
107	*
108	* @group param
109	*/
110	val positiveMultiplier = new DoubleParam(	1✔
111	this,
112	"positiveMultiplier",	1✔
113	"Multiplier for positive sentiments. Defaults 1.0")	1✔
114
115	/** Multiplier for negative sentiments (Default: `-1.0`)
116	*
117	* @group param
118	*/
119	val negativeMultiplier = new DoubleParam(	1✔
120	this,
121	"negativeMultiplier",	1✔
122	"Multiplier for negative sentiments. Defaults -1.0")	1✔
123
124	/** Multiplier for increment sentiments (Default: `2.0`)
125	*
126	* @group param
127	*/
128	val incrementMultiplier = new DoubleParam(	1✔
129	this,
130	"incrementMultiplier",	1✔
131	"Multiplier for increment sentiments. Defaults 2.0")	1✔
132
133	/** Multiplier for decrement sentiments (Default: `-2.0`)
134	*
135	* @group param
136	*/
137	val decrementMultiplier = new DoubleParam(	1✔
138	this,
139	"decrementMultiplier",	1✔
140	"Multiplier for decrement sentiments. Defaults -2.0")	1✔
141
142	/** Multiplier for revert sentiments (Default: `-1.0`)
143	*
144	* @group param
145	*/
146	val reverseMultiplier =
147	new DoubleParam(this, "reverseMultiplier", "Multiplier for revert sentiments. Defaults -1.0")	1✔
148
149	/** if true, score will show as a string type containing a double value, else will output string
150	* `"positive"` or `"negative"` (Default: `false`)
151	*
152	* @group param
153	*/
154	val enableScore = new BooleanParam(	1✔
155	this,
156	"enableScore",	1✔
157	"if true, score will show as a string type containing a double value, else will output string \"positive\" or \"negative\". Defaults false")	1✔
158
159	/** Multiplier for positive sentiments (Default: `1.0`)
160	*
161	* @group setParam
162	*/
163	def setPositiveMultipler(v: Double): this.type = set(positiveMultiplier, v)	1✔
164
165	/** Multiplier for negative sentiments (Default: `-1.0`)
166	*
167	* @group setParam
168	*/
169	def setNegativeMultipler(v: Double): this.type = set(negativeMultiplier, v)	1✔
170
171	/** Multiplier for increment sentiments (Default: `2.0`)
172	*
173	* @group setParam
174	*/
175	def setIncrementMultipler(v: Double): this.type = set(incrementMultiplier, v)	1✔
176
177	/** Multiplier for decrement sentiments (Default: `-2.0`)
178	*
179	* @group setParam
180	*/
181	def setDecrementMultipler(v: Double): this.type = set(decrementMultiplier, v)	1✔
182
183	/** Multiplier for revert sentiments (Default: `-1.0`)
184	*
185	* @group setParam
186	*/
187	def setReverseMultipler(v: Double): this.type = set(reverseMultiplier, v)	1✔
188
189	/** If true, score will show as a string type containing a double value, else will output string
190	* `"positive"` or `"negative"` (Default: `false`)
191	*
192	* @group setParam
193	*/
194	def setEnableScore(v: Boolean): this.type = set(enableScore, v)	1✔
195
196	/** Path to file with list of inputs and their content, with such delimiter, readAs LINE_BY_LINE
197	* or as SPARK_DATASET. If latter is set, options is passed to spark reader.
198	*
199	* @group setParam
200	*/
201	def setSentimentDict(value: Map[String, String]): this.type = set(sentimentDict, value)	1✔
202
203	/** Tokens are needed to identify each word in a sentence boundary POS tags are optionally
204	* submitted to the model in case they are needed Lemmas are another optional annotator for
205	* some models Bounds of sentiment are hardcoded to 0 as they render useless
206	*
207	* @param annotations
208	* Annotations that correspond to inputAnnotationCols generated by previous annotators if any
209	* @return
210	* any number of annotations processed for every input annotation. Not necessary one to one
211	* relationship
212	*/
213	override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = {
214	val tokenizedSentences = TokenizedWithSentence.unpack(annotations)	1✔
215
216	val score = model.score(tokenizedSentences.toArray)	1✔
217
218	Seq(	1✔
219	Annotation(	1✔
220	outputAnnotatorType,	1✔
221	0,	1✔
222	0, {	1✔
UNCOV 223	if ($(enableScore)) score.toString else if (score >= 0) "positive" else "negative"	×
224	},
225	Map.empty[String, String]))	1✔
226	}
227
228	}
229
230	object SentimentDetectorModel extends ParamsAndFeaturesReadable[SentimentDetectorModel]

JohnSnowLabs / spark-nlp / 13883000244

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous