• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JohnSnowLabs / spark-nlp / 13883000244

16 Mar 2025 11:44AM UTC coverage: 59.034% (-1.0%) from 60.072%
13883000244

Pull #14444

github

web-flow
Merge 6d717703b into 05000ab4a
Pull Request #14444: Sparknlp 1060 implement phi 3.5 vision

0 of 292 new or added lines in 5 files covered. (0.0%)

20 existing lines in 14 files now uncovered.

9413 of 15945 relevant lines covered (59.03%)

0.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/util/transform/Phi3vUtils.scala
1
package com.johnsnowlabs.nlp.annotators.cv.util.transform
2
import java.awt.image.BufferedImage
3
import java.awt.{Color, Graphics2D}
4
import scala.collection.mutable.ListBuffer
5
import scala.collection.mutable.ArrayBuffer
6

7
import ImageResizeUtils.resizeBufferedImage
8

9
private[johnsnowlabs] object Phi3vUtils {
10
  // padding image
11

12
  def padding_336(image: BufferedImage): BufferedImage = {
13
    // Get the current width and height of the image
NEW
14
    val width = image.getWidth
×
NEW
15
    val height = image.getHeight
×
16

17
    // Calculate the target height (multiple of 336)
NEW
18
    val targetHeight = Math.ceil(height.toDouble / 336).toInt * 336
×
19

20
    // Calculate the padding for top and bottom
NEW
21
    val topPadding = (targetHeight - height) / 2
×
NEW
22
    val bottomPadding = targetHeight - height - topPadding
×
23

24
    // No padding for left and right
NEW
25
    val leftPadding = 0
×
NEW
26
    val rightPadding = 0
×
27

28
    // Create a new BufferedImage with the padded dimensions
NEW
29
    val paddedImage = new BufferedImage(width, targetHeight, BufferedImage.TYPE_INT_RGB)
×
30

31
    // Create Graphics2D object to draw the padded image
NEW
32
    val g2d: Graphics2D = paddedImage.createGraphics()
×
33

34
    // Set white background for the padding (fill)
NEW
35
    g2d.setColor(Color.WHITE)
×
NEW
36
    g2d.fillRect(0, 0, width, targetHeight)
×
37

38
    // Draw the original image onto the center of the new padded image
NEW
39
    g2d.drawImage(image, leftPadding, topPadding, null)
×
40

41
    // Dispose of the Graphics2D context
NEW
42
    g2d.dispose()
×
43

44
    // Return the new padded image
45
    paddedImage
46
  }
47

48
  def transposeImage(img: BufferedImage): BufferedImage = {
NEW
49
    val transposedImage = new BufferedImage(img.getHeight, img.getWidth, img.getType)
×
NEW
50
    val g2d = transposedImage.createGraphics()
×
51

NEW
52
    g2d.rotate(Math.PI / 2)
×
NEW
53
    g2d.translate(0, -img.getHeight)
×
NEW
54
    g2d.drawImage(img, 0, 0, null)
×
NEW
55
    g2d.dispose()
×
56

57
    transposedImage
58
  }
59

60
  def calc_padded_size(width: Int, height: Int, padding_unit: Int = 336): (Int, Int) = {
NEW
61
    val target_height = Math.ceil(height / padding_unit).intValue * padding_unit
×
NEW
62
    val top_padding = Math.ceil((target_height - height) / 2).intValue
×
NEW
63
    val bottom_padding = target_height - height - top_padding
×
NEW
64
    val left_padding = 0
×
NEW
65
    val right_padding = 0
×
NEW
66
    val padded_width = width + left_padding + right_padding
×
NEW
67
    val padded_height = height + top_padding + bottom_padding
×
NEW
68
    (padded_width, padded_height)
×
69
  }
70

71
  def HDTransform(img: BufferedImage, hdNum: Int = 16): BufferedImage = {
NEW
72
    var width = img.getWidth
×
NEW
73
    var height = img.getHeight
×
NEW
74
    var transposed = false
×
75

76
    // Transpose the image if width is smaller than height
77
    var transformedImg = img
NEW
78
    if (width < height) {
×
NEW
79
      transformedImg = transposeImage(transformedImg)
×
NEW
80
      transposed = true
×
NEW
81
      width = transformedImg.getWidth
×
NEW
82
      height = transformedImg.getHeight
×
83
    }
84

NEW
85
    val ratio = width.toDouble / height.toDouble
×
NEW
86
    var scale = 1
×
87

88
    // Calculate the scaling factor
NEW
89
    while (scale * math.ceil(scale / ratio) <= hdNum) {
×
NEW
90
      scale += 1
×
91
    }
NEW
92
    scale -= 1
×
93

94
    // New dimensions
NEW
95
    val newWidth = (scale * 336).toInt
×
NEW
96
    val newHeight = (newWidth / ratio).toInt
×
97

98
    // Resize the image
NEW
99
    transformedImg = resizeBufferedImage(newWidth, newHeight, 2)(transformedImg)
×
100

101
    // Apply padding to make the image 336x336
NEW
102
    transformedImg = padding_336(transformedImg)
×
103

104
    // Transpose back if needed
NEW
105
    if (transposed) {
×
NEW
106
      transformedImg = transposeImage(transformedImg)
×
107
    }
108

109
    transformedImg
110
  }
111

112
  // Function to extract a subimage and reset position information
113
  def getNewSubimage(
114
      image: BufferedImage,
115
      x: Int,
116
      y: Int,
117
      width: Int,
118
      height: Int): BufferedImage = {
119
    // Create a new BufferedImage to store the subimage
NEW
120
    val subImage = new BufferedImage(width, height, image.getType)
×
121

122
    // Create a Graphics2D object to draw the subimage
NEW
123
    val g2d: Graphics2D = subImage.createGraphics()
×
124

125
    // Draw the original image's subimage into the new BufferedImage
NEW
126
    g2d.drawImage(image, 0, 0, width, height, x, y, x + width, y + height, null)
×
127

128
    // Dispose the graphics context to free up resources
NEW
129
    g2d.dispose()
×
130

131
    // Return the new subimage with reset position information
132
    subImage
133
  }
134

135
  // Function to calculate the shapes (height and width of the image)
136
  def calculateShapes(images: List[BufferedImage]): Array[Array[Int]] = {
NEW
137
    images.map(img => Array(img.getHeight, img.getWidth)).toArray
×
138
  }
139

140
  // Function to calculate the number of image tokens
141
//  def calculateImageTokens(shapes: List[(Int, Int)]): List[Int] = {
142
//    shapes.map { case (h, w) =>
143
//      ((h / 336) * (w / 336) + 1) * 144 + 1 + ((h / 336 + 1) * 12)
144
//    }
145
//  }
146

147
  def calculateImageTokens(shapes: Array[Array[Int]]): List[Int] = {
NEW
148
    shapes.map { case Array(h, w) =>
×
NEW
149
      ((h / 336) * (w / 336) + 1) * 144 + 1 + ((h / 336 + 1) * 12)
×
NEW
150
    }.toList
×
151
  }
152

153
  // Function to reshape the images (assuming each image is already HD transformed)
154
//  def reshapeImages(
155
//      images: List[BufferedImage],
156
//      shapes: List[(Int, Int)]): List[List[BufferedImage]] = {
157
//    images.zip(shapes).map { case (img, (h, w)) =>
158
//      val numH = h / 336
159
//      val numW = w / 336
160
//      val reshapedImages = new ListBuffer[BufferedImage]
161
//
162
//      // Splitting the image into 336x336 crops
163
//      for (i <- 0 until numH; j <- 0 until numW) {
164
//        val crop = getNewSubimage(img, j * 336, i * 336, 336, 336)
165
//        reshapedImages += crop
166
//      }
167
//      reshapedImages.toList
168
//    }
169
//  }
170

171
  def reshapeImages(
172
      images: List[BufferedImage],
173
      shapes: Array[Array[Int]]): List[List[BufferedImage]] = {
NEW
174
    images.zip(shapes).map { case (img, Array(h, w)) =>
×
NEW
175
      val numH = h / 336
×
NEW
176
      val numW = w / 336
×
NEW
177
      val reshapedImages = new ListBuffer[BufferedImage]
×
178

179
      // Splitting the image into 336x336 crops
NEW
180
      for (i <- 0 until numH; j <- 0 until numW) {
×
NEW
181
        val crop = getNewSubimage(img, j * 336, i * 336, 336, 336)
×
NEW
182
        reshapedImages += crop
×
183
      }
NEW
184
      reshapedImages.toList
×
185
    }
186
  }
187

188
  // Function to concatenate global and local images (manually)
189
  def concatenateImages(
190
      globalImage: BufferedImage,
191
      localImages: List[BufferedImage]): BufferedImage = {
NEW
192
    val totalWidth = 336 * localImages.size + 336
×
NEW
193
    val totalHeight = 336
×
NEW
194
    val concatenatedImage = new BufferedImage(totalWidth, totalHeight, BufferedImage.TYPE_INT_RGB)
×
NEW
195
    val g2d: Graphics2D = concatenatedImage.createGraphics()
×
196

197
    // Draw global image first
NEW
198
    g2d.drawImage(globalImage, 0, 0, null)
×
199

200
    // Draw each local image next to the global image
NEW
201
    localImages.zipWithIndex.foreach { case (localImage, index) =>
×
NEW
202
      g2d.drawImage(localImage, (index + 1) * 336, 0, null)
×
203
    }
204

NEW
205
    g2d.dispose()
×
206
    concatenatedImage
207
  }
208

209
  // Function to pad the images to a specified number of crops (maxNumCrops)
210
  def padToMaxNumCrops(image: BufferedImage, maxNumCrops: Int): BufferedImage = {
NEW
211
    val width = image.getWidth
×
NEW
212
    val height = image.getHeight
×
213

214
    // If the number of crops is less than maxNumCrops, pad with white
NEW
215
    val targetWidth = 336 * maxNumCrops
×
NEW
216
    val paddedImage = new BufferedImage(targetWidth, height, BufferedImage.TYPE_INT_RGB)
×
NEW
217
    val g2d: Graphics2D = paddedImage.createGraphics()
×
218

219
    // Fill with white background
NEW
220
    g2d.setColor(Color.WHITE)
×
NEW
221
    g2d.fillRect(0, 0, targetWidth, height)
×
222

223
    // Draw the original image onto the white background
NEW
224
    g2d.drawImage(image, 0, 0, null)
×
NEW
225
    g2d.dispose()
×
226

227
    paddedImage
228
  }
229

230
  // Main function that processes the HD transformed images
231
  def processHdImages(
232
      hdImages: List[BufferedImage],
233
      numCrops: Int): (List[BufferedImage], Array[Array[Int]], List[Int]) = {
234
    // Step 1: Create global images (resize to 336x336)
235
    // val resizeGlobal =
NEW
236
    val globalImages = hdImages.map(resizeBufferedImage(336, 336, 3))
×
237

238
    // Step 2: Calculate shapes [(h, w)] where h, w are multiples of 336
NEW
239
    val shapes = calculateShapes(hdImages)
×
240

241
    // Step 3: Calculate number of image tokens
NEW
242
    val numImgTokens = calculateImageTokens(shapes)
×
243

244
    // Step 4: Reshape the HD images into 336x336 crops
NEW
245
    val reshapedHdImages = reshapeImages(hdImages, shapes)
×
246

247
    // Step 5: Concatenate global and local images
248
    val concatenatedImages =
NEW
249
      globalImages.zip(reshapedHdImages).map { case (globalImage, localImages) =>
×
NEW
250
        concatenateImages(globalImage, localImages)
×
251
      }
252

253
    // Step 6: Pad to max_num_crops if necessary
NEW
254
    val paddedImages = concatenatedImages.map(padToMaxNumCrops(_, numCrops + 1))
×
NEW
255
    (paddedImages, shapes, numImgTokens)
×
256
  }
257

258
  // Function to normalize pixel values of an image crop
259
  def normalizeImageCrop(
260
      imgCrop: Array[Array[Array[Int]]],
261
      mean: Array[Double],
262
      std: Array[Double]): Array[Array[Array[Float]]] = {
NEW
263
    val channels = imgCrop.length
×
NEW
264
    val height = imgCrop(0).length
×
NEW
265
    val width = imgCrop(0)(0).length
×
266

267
    // Create a 3D array for normalized values
NEW
268
    val normalizedCrop = Array.ofDim[Float](channels, height, width)
×
269

NEW
270
    for (c <- 0 until channels) {
×
NEW
271
      for (y <- 0 until height) {
×
NEW
272
        for (x <- 0 until width) {
×
273
          // Normalize the pixel value: (value - mean) / std
NEW
274
          normalizedCrop(c)(y)(x) = (imgCrop(c)(y)(x) / 255.0 - mean(c)).toFloat / std(c).toFloat
×
275
        }
276
      }
277
    }
278

279
    normalizedCrop
280
  }
281

282
  // Helper function to convert a BufferedImage crop to a 3D array (3, 336, 336) for RGB channels
283
  def imageCropToArray(imgCrop: BufferedImage): Array[Array[Array[Int]]] = {
NEW
284
    val height = imgCrop.getHeight
×
NEW
285
    val width = imgCrop.getWidth
×
286

287
    // Create a 3D array for RGB channels
NEW
288
    val channels = 3
×
NEW
289
    val cropArray = Array.ofDim[Int](channels, height, width)
×
290

NEW
291
    for (y <- 0 until height; x <- 0 until width) {
×
NEW
292
      val color = new java.awt.Color(imgCrop.getRGB(x, y))
×
NEW
293
      cropArray(0)(y)(x) = color.getRed // Red channel
×
NEW
294
      cropArray(1)(y)(x) = color.getGreen // Green channel
×
NEW
295
      cropArray(2)(y)(x) = color.getBlue // Blue channel
×
296
    }
297

298
    cropArray
299
  }
300

301
  // Function to split an image into 336x336 crops, convert to a 3D array, and normalize if required
302
  def splitImageToCrops(
303
      image: BufferedImage,
304
      cropSize: Int = 336,
305
      normalize: Boolean = false,
306
      mean: Array[Double] = Array(0.48145466, 0.4578275, 0.40821073),
307
      std: Array[Double] = Array(0.26862954, 0.26130258, 0.27577711))
308
      : (Array[Array[Array[Array[Float]]]], Int) = {
NEW
309
    val height = image.getHeight
×
NEW
310
    val width = image.getWidth
×
311

312
    // Number of crops along height and width
NEW
313
    val numHCrops = height / cropSize
×
NEW
314
    val numWCrops = width / cropSize
×
315

316
    // Store the crops in a 4D array (numCrops, 3, 336, 336)
NEW
317
    val cropsBuffer = ArrayBuffer[Array[Array[Array[Float]]]]()
×
318

NEW
319
    for (i <- 0 until numHCrops) {
×
NEW
320
      for (j <- 0 until numWCrops) {
×
321
        // Extract a crop of 336x336
NEW
322
        val imgCrop = image.getSubimage(j * cropSize, i * cropSize, cropSize, cropSize)
×
323
        // Convert the crop to a 3D array (3, 336, 336)
NEW
324
        val cropArray = imageCropToArray(imgCrop)
×
325

326
        // Normalize the crop if the option is enabled
327
        val normalizedCrop = if (normalize) {
NEW
328
          normalizeImageCrop(cropArray, mean, std)
×
329
        } else {
330
          // Convert Int array to Double array if normalization is off
NEW
331
          cropArray.map(_.map(_.map(_.toFloat / 255.0.toFloat)))
×
332
        }
333

NEW
334
        cropsBuffer.append(normalizedCrop)
×
335
      }
336
    }
337

338
    // Convert ArrayBuffer to an array
NEW
339
    (cropsBuffer.toArray, numHCrops * numWCrops)
×
340
  }
341

342
  // Function to convert processedImages (BufferedImages) into a 5D array (b, h//336 * w//336, 3, 336, 336)
343
  def processedImagesTo5DArray(
344
      processedImages: List[BufferedImage],
345
      normalize: Boolean = false,
346
      mean: Array[Double] = Array(0.48145466, 0.4578275, 0.40821073),
347
      std: Array[Double] = Array(0.26862954, 0.26130258, 0.27577711))
348
      : (Array[Array[Array[Array[Array[Float]]]]]) = {
349
    // Store the 5D array (b, h//336 * w//336, 3, 336, 336)
NEW
350
    val batchBuffer = ArrayBuffer[Array[Array[Array[Array[Float]]]]]()
×
351
    // Process each image in the batch
NEW
352
    processedImages.foreach { img =>
×
353
      // Split the image into crops, convert each crop into a 3D array, and normalize if required
NEW
354
      val (imageCropsArray, numCrops) =
×
355
        splitImageToCrops(img, normalize = normalize, mean = mean, std = std)
NEW
356
      batchBuffer.append(imageCropsArray)
×
357
    }
358

359
    // Convert ArrayBuffer to array (b, numCrops, 3, 336, 336)
NEW
360
    batchBuffer.toArray
×
361
  }
362
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc