10675586955

Committed 03 Sep 2024 02:30AM UTC coverage: 61.821% (-0.06%) from 61.884%

Build # 10675586955

Build Type

Pull #14379

github

Committed by

web-flow

Commit Message

Merge 1f222af49 into 9285df8c6

Pull Request Pull Request #14379: SPARKNLP Introducing LLAMA 3

Run Details

0 of 27 new or added lines in 3 files covered. (0.0%)

15 existing lines in 11 files now uncovered.

8982 of 14529 relevant lines covered (61.82%)

0.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/src/main/scala/com/johnsnowlabs/nlp/HasGeneratorProperties.scala

package com.johnsnowlabs.nlp

import org.apache.spark.ml.param._

/** Parameters to configure beam search text generation. */
trait HasGeneratorProperties {
  this: ParamsAndFeaturesWritable =>

  /** Set transformer task, e.g. `"summarize:"` (Default: `""`).
    *
    * @group param
    */
  val task = new Param[String](this, "task", "Set transformer task, e.g. 'summarize'")

  /** @group setParam */
  def setTask(value: String): this.type = {
    if (get(task).isEmpty)
      set(task, value)
    this
  }

  /** @group getParam */
  def getTask: Option[String] = get(this.task)

  /** max length of the input sequence (Default: `0`)
    *
    * @group param
    */
  val maxInputLength =
    new IntParam(this, "maxInputLength", "Maximum length of the input sequence")

  def setMaxInputLength(value: Int): this.type = {
    set(maxInputLength, value)
    this
  }

  /** Minimum length of the sequence to be generated (Default: `0`)
    *
    * @group param
    */
  val minOutputLength =
    new IntParam(this, "minOutputLength", "Minimum length of the sequence to be generated")

  /** @group setParam */
  def setMinOutputLength(value: Int): this.type = {
    set(minOutputLength, value)
    this
  }

  /** @group getParam */
  def getMinOutputLength: Int = $(this.minOutputLength)

  /** Maximum length of the sequence to be generated (Default: `20`)
    *
    * @group param
    */
  val maxOutputLength =
    new IntParam(this, "maxOutputLength", "Maximum length of the sequence to be generated")

  /** @group setParam */
  def setMaxOutputLength(value: Int): this.type = {
    set(maxOutputLength, value)
    this
  }

  /** @group getParam */
  def getMaxOutputLength: Int = $(this.maxOutputLength)

  /** Whether or not to use sampling, use greedy decoding otherwise (Default: `false`)
    *
    * @group param
    */
  val doSample = new BooleanParam(
    this,
    "doSample",
    "Whether or not to use sampling; use greedy decoding otherwise")

  /** @group setParam */
  def setDoSample(value: Boolean): this.type = {
    set(doSample, value)
    this
  }

  /** @group getParam */
  def getDoSample: Boolean = $(this.doSample)

  /** The value used to module the next token probabilities (Default: `1.0`)
    *
    * @group param
    */
  val temperature =
    new DoubleParam(this, "temperature", "The value used to module the next token probabilities")

  /** @group setParam */
  def setTemperature(value: Double): this.type = {
    set(temperature, value)
    this
  }

  /** @group getParam */
  def getTemperature: Double = $(this.temperature)

  /** The number of highest probability vocabulary tokens to keep for top-k-filtering (Default:
    * `50`)
    *
    * @group param
    */
  val topK = new IntParam(
    this,
    "topK",
    "The number of highest probability vocabulary tokens to keep for top-k-filtering")

  /** @group setParam */
  def setTopK(value: Int): this.type = {
    set(topK, value)
    this
  }

  /** @group getParam */
  def getTopK: Int = $(this.topK)

  /** If set to float < `1.0`, only the most probable tokens with probabilities that add up to
    * `topP` or higher are kept for generation (Default: `1.0`)
    *
    * @group param
    */
  val topP = new DoubleParam(
    this,
    "topP",
    "If set to float < 1, only the most probable tokens with probabilities that add up to ``top_p`` or higher are kept for generation")

  /** @group setParam */
  def setTopP(value: Double): this.type = {
    set(topP, value)
    this
  }

  /** @group getParam */
  def getTopP: Double = $(this.topP)

  /** The parameter for repetition penalty (Default: `1.0`). `1.0` means no penalty. See
    * [[https://arxiv.org/pdf/1909.05858.pdf this paper]] for more details.
    *
    * @group param
    */
  val repetitionPenalty = new DoubleParam(
    this,
    "repetitionPenalty",
    "The parameter for repetition penalty. 1.0 means no penalty.")

  /** @group setParam */
  def setRepetitionPenalty(value: Double): this.type = {
    set(repetitionPenalty, value)
    this
  }

  /** @group getParam */
  def getRepetitionPenalty: Double = $(this.repetitionPenalty)

  /** If set to int > `0`, all ngrams of that size can only occur once (Default: `0`)
    *
    * @group param
    */
  val noRepeatNgramSize = new IntParam(
    this,
    "noRepeatNgramSize",
    "If set to int > 0, all ngrams of that size can only occur once")

  /** @group setParam */
  def setNoRepeatNgramSize(value: Int): this.type = {
    set(noRepeatNgramSize, value)
    this
  }

  /** @group getParam */
  def getNoRepeatNgramSize: Int = $(this.noRepeatNgramSize)

  /** Optional Random seed for the model. Needs to be of type `Int`.
    *
    * @group param
    */
  var randomSeed: Option[Long] = None

  /** @group setParam */
  def setRandomSeed(value: Long): this.type = {
    if (randomSeed.isEmpty) {
      this.randomSeed = Some(value)
    }
    this
  }

  /** @group getParam */
  def getRandomSeed: Option[Long] = this.randomSeed

  /** Beam size for the beam search algorithm (Default: `4`)
    *
    * @group param
    */
  val beamSize = new IntParam(this, "beamSize", "Number of beams for beam search.")

  /** @group setParam */
  def setBeamSize(beamNum: Int): this.type = {
    set(beamSize, beamNum)
  }

  /** @group getParam */
  def getBeamSize: Int = $(beamSize)

  /** The number of sequences to return from the beam search.
    *
    * @group param
    */
  val nReturnSequences = new IntParam(
    this,
    "nReturnSequences",
    "The number of sequences to return from the beam search.")

  /** @group setParam */
  def setNReturnSequences(beamNum: Int): this.type = {
    set(nReturnSequences, beamNum)
  }

  /** @group getParam */
  def getNReturnSequences: Int = $(nReturnSequences)

  /** Stop tokens to terminate the generation
    *
    * @group param
    */
  val stopTokenIds =
    new IntArrayParam(this, "stopTokenIds", "Stop tokens to terminate the generation")

  /** @group setParam */
  def setStopTokenIds(value: Array[Int]): this.type = {
    set(stopTokenIds, value)
  }

  /** @group getParam */
  def getStopTokenIds: Array[Int] = $(stopTokenIds)
}

1	package com.johnsnowlabs.nlp
2
3	import org.apache.spark.ml.param._
4
5	/** Parameters to configure beam search text generation. */
6	trait HasGeneratorProperties {
7	this: ParamsAndFeaturesWritable =>
8
9	/** Set transformer task, e.g. `"summarize:"` (Default: `""`).
10	*
11	* @group param
12	*/
13	val task = new Param[String](this, "task", "Set transformer task, e.g. 'summarize'")	×
14
15	/** @group setParam */
16	def setTask(value: String): this.type = {
17	if (get(task).isEmpty)	×
18	set(task, value)	×
19	this
20	}
21
22	/** @group getParam */
23	def getTask: Option[String] = get(this.task)	×
24
25	/** max length of the input sequence (Default: `0`)
26	*
27	* @group param
28	*/
29	val maxInputLength =
30	new IntParam(this, "maxInputLength", "Maximum length of the input sequence")	×
31
32	def setMaxInputLength(value: Int): this.type = {
33	set(maxInputLength, value)	×
34	this
35	}
36
37	/** Minimum length of the sequence to be generated (Default: `0`)
38	*
39	* @group param
40	*/
41	val minOutputLength =
42	new IntParam(this, "minOutputLength", "Minimum length of the sequence to be generated")	×
43
44	/** @group setParam */
45	def setMinOutputLength(value: Int): this.type = {
46	set(minOutputLength, value)	×
47	this
48	}
49
50	/** @group getParam */
51	def getMinOutputLength: Int = $(this.minOutputLength)	×
52
53	/** Maximum length of the sequence to be generated (Default: `20`)
54	*
55	* @group param
56	*/
57	val maxOutputLength =
58	new IntParam(this, "maxOutputLength", "Maximum length of the sequence to be generated")	×
59
60	/** @group setParam */
61	def setMaxOutputLength(value: Int): this.type = {
62	set(maxOutputLength, value)	×
63	this
64	}
65
66	/** @group getParam */
67	def getMaxOutputLength: Int = $(this.maxOutputLength)	×
68
69	/** Whether or not to use sampling, use greedy decoding otherwise (Default: `false`)
70	*
71	* @group param
72	*/
73	val doSample = new BooleanParam(	×
74	this,
75	"doSample",	×
76	"Whether or not to use sampling; use greedy decoding otherwise")	×
77
78	/** @group setParam */
79	def setDoSample(value: Boolean): this.type = {
80	set(doSample, value)	×
81	this
82	}
83
84	/** @group getParam */
85	def getDoSample: Boolean = $(this.doSample)	×
86
87	/** The value used to module the next token probabilities (Default: `1.0`)
88	*
89	* @group param
90	*/
91	val temperature =
92	new DoubleParam(this, "temperature", "The value used to module the next token probabilities")	×
93
94	/** @group setParam */
95	def setTemperature(value: Double): this.type = {
96	set(temperature, value)	×
97	this
98	}
99
100	/** @group getParam */
101	def getTemperature: Double = $(this.temperature)	×
102
103	/** The number of highest probability vocabulary tokens to keep for top-k-filtering (Default:
104	* `50`)
105	*
106	* @group param
107	*/
108	val topK = new IntParam(	×
109	this,
110	"topK",	×
111	"The number of highest probability vocabulary tokens to keep for top-k-filtering")	×
112
113	/** @group setParam */
114	def setTopK(value: Int): this.type = {
115	set(topK, value)	×
116	this
117	}
118
119	/** @group getParam */
120	def getTopK: Int = $(this.topK)	×
121
122	/** If set to float < `1.0`, only the most probable tokens with probabilities that add up to
123	* `topP` or higher are kept for generation (Default: `1.0`)
124	*
125	* @group param
126	*/
127	val topP = new DoubleParam(	×
128	this,
129	"topP",	×
130	"If set to float < 1, only the most probable tokens with probabilities that add up to ``top_p`` or higher are kept for generation")	×
131
132	/** @group setParam */
133	def setTopP(value: Double): this.type = {
134	set(topP, value)	×
135	this
136	}
137
138	/** @group getParam */
139	def getTopP: Double = $(this.topP)	×
140
141	/** The parameter for repetition penalty (Default: `1.0`). `1.0` means no penalty. See
142	* [[https://arxiv.org/pdf/1909.05858.pdf this paper]] for more details.
143	*
144	* @group param
145	*/
146	val repetitionPenalty = new DoubleParam(	×
147	this,
148	"repetitionPenalty",	×
149	"The parameter for repetition penalty. 1.0 means no penalty.")	×
150
151	/** @group setParam */
152	def setRepetitionPenalty(value: Double): this.type = {
153	set(repetitionPenalty, value)	×
154	this
155	}
156
157	/** @group getParam */
158	def getRepetitionPenalty: Double = $(this.repetitionPenalty)	×
159
160	/** If set to int > `0`, all ngrams of that size can only occur once (Default: `0`)
161	*
162	* @group param
163	*/
164	val noRepeatNgramSize = new IntParam(	×
165	this,
166	"noRepeatNgramSize",	×
167	"If set to int > 0, all ngrams of that size can only occur once")	×
168
169	/** @group setParam */
170	def setNoRepeatNgramSize(value: Int): this.type = {
171	set(noRepeatNgramSize, value)	×
172	this
173	}
174
175	/** @group getParam */
176	def getNoRepeatNgramSize: Int = $(this.noRepeatNgramSize)	×
177
178	/** Optional Random seed for the model. Needs to be of type `Int`.
179	*
180	* @group param
181	*/
182	var randomSeed: Option[Long] = None	×
183
184	/** @group setParam */
185	def setRandomSeed(value: Long): this.type = {
186	if (randomSeed.isEmpty) {	×
187	this.randomSeed = Some(value)	×
188	}
189	this
190	}
191
192	/** @group getParam */
193	def getRandomSeed: Option[Long] = this.randomSeed	×
194
195	/** Beam size for the beam search algorithm (Default: `4`)
196	*
197	* @group param
198	*/
199	val beamSize = new IntParam(this, "beamSize", "Number of beams for beam search.")	×
200
201	/** @group setParam */
202	def setBeamSize(beamNum: Int): this.type = {
203	set(beamSize, beamNum)	×
204	}
205
206	/** @group getParam */
207	def getBeamSize: Int = $(beamSize)	×
208
209	/** The number of sequences to return from the beam search.
210	*
211	* @group param
212	*/
213	val nReturnSequences = new IntParam(	×
214	this,
215	"nReturnSequences",	×
216	"The number of sequences to return from the beam search.")	×
217
218	/** @group setParam */
219	def setNReturnSequences(beamNum: Int): this.type = {
220	set(nReturnSequences, beamNum)	×
221	}
222
223	/** @group getParam */
224	def getNReturnSequences: Int = $(nReturnSequences)	×
225
226	/** Stop tokens to terminate the generation
227	*
228	* @group param
229	*/
230	val stopTokenIds =
NEW 231	new IntArrayParam(this, "stopTokenIds", "Stop tokens to terminate the generation")	×
232
233	/** @group setParam */
234	def setStopTokenIds(value: Array[Int]): this.type = {
235	set(stopTokenIds, value)	×
236	}
237
238	/** @group getParam */
239	def getStopTokenIds: Array[Int] = $(stopTokenIds)	×
240	}

JohnSnowLabs / spark-nlp / 10675586955

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous