13883000244

Committed 16 Mar 2025 11:44AM UTC coverage: 59.034% (-1.0%) from 60.072%

Build # 13883000244

Build Type

Pull #14444

github

Committed by

web-flow

Commit Message

Merge 6d717703b into 05000ab4a

Pull Request Pull Request #14444: Sparknlp 1060 implement phi 3.5 vision

Run Details

0 of 292 new or added lines in 5 files covered. (0.0%)

20 existing lines in 14 files now uncovered.

9413 of 15945 relevant lines covered (59.03%)

0.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/util/transform/Phi3vUtils.scala

1	package com.johnsnowlabs.nlp.annotators.cv.util.transform
2	import java.awt.image.BufferedImage
3	import java.awt.{Color, Graphics2D}
4	import scala.collection.mutable.ListBuffer
5	import scala.collection.mutable.ArrayBuffer
6
7	import ImageResizeUtils.resizeBufferedImage
8
9	private[johnsnowlabs] object Phi3vUtils {
10	// padding image
11
12	def padding_336(image: BufferedImage): BufferedImage = {
13	// Get the current width and height of the image
NEW 14	val width = image.getWidth	×
NEW 15	val height = image.getHeight	×
16
17	// Calculate the target height (multiple of 336)
NEW 18	val targetHeight = Math.ceil(height.toDouble / 336).toInt * 336	×
19
20	// Calculate the padding for top and bottom
NEW 21	val topPadding = (targetHeight - height) / 2	×
NEW 22	val bottomPadding = targetHeight - height - topPadding	×
23
24	// No padding for left and right
NEW 25	val leftPadding = 0	×
NEW 26	val rightPadding = 0	×
27
28	// Create a new BufferedImage with the padded dimensions
NEW 29	val paddedImage = new BufferedImage(width, targetHeight, BufferedImage.TYPE_INT_RGB)	×
30
31	// Create Graphics2D object to draw the padded image
NEW 32	val g2d: Graphics2D = paddedImage.createGraphics()	×
33
34	// Set white background for the padding (fill)
NEW 35	g2d.setColor(Color.WHITE)	×
NEW 36	g2d.fillRect(0, 0, width, targetHeight)	×
37
38	// Draw the original image onto the center of the new padded image
NEW 39	g2d.drawImage(image, leftPadding, topPadding, null)	×
40
41	// Dispose of the Graphics2D context
NEW 42	g2d.dispose()	×
43
44	// Return the new padded image
45	paddedImage
46	}
47
48	def transposeImage(img: BufferedImage): BufferedImage = {
NEW 49	val transposedImage = new BufferedImage(img.getHeight, img.getWidth, img.getType)	×
NEW 50	val g2d = transposedImage.createGraphics()	×
51
NEW 52	g2d.rotate(Math.PI / 2)	×
NEW 53	g2d.translate(0, -img.getHeight)	×
NEW 54	g2d.drawImage(img, 0, 0, null)	×
NEW 55	g2d.dispose()	×
56
57	transposedImage
58	}
59
60	def calc_padded_size(width: Int, height: Int, padding_unit: Int = 336): (Int, Int) = {
NEW 61	val target_height = Math.ceil(height / padding_unit).intValue * padding_unit	×
NEW 62	val top_padding = Math.ceil((target_height - height) / 2).intValue	×
NEW 63	val bottom_padding = target_height - height - top_padding	×
NEW 64	val left_padding = 0	×
NEW 65	val right_padding = 0	×
NEW 66	val padded_width = width + left_padding + right_padding	×
NEW 67	val padded_height = height + top_padding + bottom_padding	×
NEW 68	(padded_width, padded_height)	×
69	}
70
71	def HDTransform(img: BufferedImage, hdNum: Int = 16): BufferedImage = {
NEW 72	var width = img.getWidth	×
NEW 73	var height = img.getHeight	×
NEW 74	var transposed = false	×
75
76	// Transpose the image if width is smaller than height
77	var transformedImg = img
NEW 78	if (width < height) {	×
NEW 79	transformedImg = transposeImage(transformedImg)	×
NEW 80	transposed = true	×
NEW 81	width = transformedImg.getWidth	×
NEW 82	height = transformedImg.getHeight	×
83	}
84
NEW 85	val ratio = width.toDouble / height.toDouble	×
NEW 86	var scale = 1	×
87
88	// Calculate the scaling factor
NEW 89	while (scale * math.ceil(scale / ratio) <= hdNum) {	×
NEW 90	scale += 1	×
91	}
NEW 92	scale -= 1	×
93
94	// New dimensions
NEW 95	val newWidth = (scale * 336).toInt	×
NEW 96	val newHeight = (newWidth / ratio).toInt	×
97
98	// Resize the image
NEW 99	transformedImg = resizeBufferedImage(newWidth, newHeight, 2)(transformedImg)	×
100
101	// Apply padding to make the image 336x336
NEW 102	transformedImg = padding_336(transformedImg)	×
103
104	// Transpose back if needed
NEW 105	if (transposed) {	×
NEW 106	transformedImg = transposeImage(transformedImg)	×
107	}
108
109	transformedImg
110	}
111
112	// Function to extract a subimage and reset position information
113	def getNewSubimage(
114	image: BufferedImage,
115	x: Int,
116	y: Int,
117	width: Int,
118	height: Int): BufferedImage = {
119	// Create a new BufferedImage to store the subimage
NEW 120	val subImage = new BufferedImage(width, height, image.getType)	×
121
122	// Create a Graphics2D object to draw the subimage
NEW 123	val g2d: Graphics2D = subImage.createGraphics()	×
124
125	// Draw the original image's subimage into the new BufferedImage
NEW 126	g2d.drawImage(image, 0, 0, width, height, x, y, x + width, y + height, null)	×
127
128	// Dispose the graphics context to free up resources
NEW 129	g2d.dispose()	×
130
131	// Return the new subimage with reset position information
132	subImage
133	}
134
135	// Function to calculate the shapes (height and width of the image)
136	def calculateShapes(images: List[BufferedImage]): Array[Array[Int]] = {
NEW 137	images.map(img => Array(img.getHeight, img.getWidth)).toArray	×
138	}
139
140	// Function to calculate the number of image tokens
141	// def calculateImageTokens(shapes: List[(Int, Int)]): List[Int] = {
142	// shapes.map { case (h, w) =>
143	// ((h / 336) * (w / 336) + 1) * 144 + 1 + ((h / 336 + 1) * 12)
144	// }
145	// }
146
147	def calculateImageTokens(shapes: Array[Array[Int]]): List[Int] = {
NEW 148	shapes.map { case Array(h, w) =>	×
NEW 149	((h / 336) * (w / 336) + 1) * 144 + 1 + ((h / 336 + 1) * 12)	×
NEW 150	}.toList	×
151	}
152
153	// Function to reshape the images (assuming each image is already HD transformed)
154	// def reshapeImages(
155	// images: List[BufferedImage],
156	// shapes: List[(Int, Int)]): List[List[BufferedImage]] = {
157	// images.zip(shapes).map { case (img, (h, w)) =>
158	// val numH = h / 336
159	// val numW = w / 336
160	// val reshapedImages = new ListBuffer[BufferedImage]
161	//
162	// // Splitting the image into 336x336 crops
163	// for (i <- 0 until numH; j <- 0 until numW) {
164	// val crop = getNewSubimage(img, j * 336, i * 336, 336, 336)
165	// reshapedImages += crop
166	// }
167	// reshapedImages.toList
168	// }
169	// }
170
171	def reshapeImages(
172	images: List[BufferedImage],
173	shapes: Array[Array[Int]]): List[List[BufferedImage]] = {
NEW 174	images.zip(shapes).map { case (img, Array(h, w)) =>	×
NEW 175	val numH = h / 336	×
NEW 176	val numW = w / 336	×
NEW 177	val reshapedImages = new ListBuffer[BufferedImage]	×
178
179	// Splitting the image into 336x336 crops
NEW 180	for (i <- 0 until numH; j <- 0 until numW) {	×
NEW 181	val crop = getNewSubimage(img, j * 336, i * 336, 336, 336)	×
NEW 182	reshapedImages += crop	×
183	}
NEW 184	reshapedImages.toList	×
185	}
186	}
187
188	// Function to concatenate global and local images (manually)
189	def concatenateImages(
190	globalImage: BufferedImage,
191	localImages: List[BufferedImage]): BufferedImage = {
NEW 192	val totalWidth = 336 * localImages.size + 336	×
NEW 193	val totalHeight = 336	×
NEW 194	val concatenatedImage = new BufferedImage(totalWidth, totalHeight, BufferedImage.TYPE_INT_RGB)	×
NEW 195	val g2d: Graphics2D = concatenatedImage.createGraphics()	×
196
197	// Draw global image first
NEW 198	g2d.drawImage(globalImage, 0, 0, null)	×
199
200	// Draw each local image next to the global image
NEW 201	localImages.zipWithIndex.foreach { case (localImage, index) =>	×
NEW 202	g2d.drawImage(localImage, (index + 1) * 336, 0, null)	×
203	}
204
NEW 205	g2d.dispose()	×
206	concatenatedImage
207	}
208
209	// Function to pad the images to a specified number of crops (maxNumCrops)
210	def padToMaxNumCrops(image: BufferedImage, maxNumCrops: Int): BufferedImage = {
NEW 211	val width = image.getWidth	×
NEW 212	val height = image.getHeight	×
213
214	// If the number of crops is less than maxNumCrops, pad with white
NEW 215	val targetWidth = 336 * maxNumCrops	×
NEW 216	val paddedImage = new BufferedImage(targetWidth, height, BufferedImage.TYPE_INT_RGB)	×
NEW 217	val g2d: Graphics2D = paddedImage.createGraphics()	×
218
219	// Fill with white background
NEW 220	g2d.setColor(Color.WHITE)	×
NEW 221	g2d.fillRect(0, 0, targetWidth, height)	×
222
223	// Draw the original image onto the white background
NEW 224	g2d.drawImage(image, 0, 0, null)	×
NEW 225	g2d.dispose()	×
226
227	paddedImage
228	}
229
230	// Main function that processes the HD transformed images
231	def processHdImages(
232	hdImages: List[BufferedImage],
233	numCrops: Int): (List[BufferedImage], Array[Array[Int]], List[Int]) = {
234	// Step 1: Create global images (resize to 336x336)
235	// val resizeGlobal =
NEW 236	val globalImages = hdImages.map(resizeBufferedImage(336, 336, 3))	×
237
238	// Step 2: Calculate shapes [(h, w)] where h, w are multiples of 336
NEW 239	val shapes = calculateShapes(hdImages)	×
240
241	// Step 3: Calculate number of image tokens
NEW 242	val numImgTokens = calculateImageTokens(shapes)	×
243
244	// Step 4: Reshape the HD images into 336x336 crops
NEW 245	val reshapedHdImages = reshapeImages(hdImages, shapes)	×
246
247	// Step 5: Concatenate global and local images
248	val concatenatedImages =
NEW 249	globalImages.zip(reshapedHdImages).map { case (globalImage, localImages) =>	×
NEW 250	concatenateImages(globalImage, localImages)	×
251	}
252
253	// Step 6: Pad to max_num_crops if necessary
NEW 254	val paddedImages = concatenatedImages.map(padToMaxNumCrops(_, numCrops + 1))	×
NEW 255	(paddedImages, shapes, numImgTokens)	×
256	}
257
258	// Function to normalize pixel values of an image crop
259	def normalizeImageCrop(
260	imgCrop: Array[Array[Array[Int]]],
261	mean: Array[Double],
262	std: Array[Double]): Array[Array[Array[Float]]] = {
NEW 263	val channels = imgCrop.length	×
NEW 264	val height = imgCrop(0).length	×
NEW 265	val width = imgCrop(0)(0).length	×
266
267	// Create a 3D array for normalized values
NEW 268	val normalizedCrop = Array.ofDim[Float](channels, height, width)	×
269
NEW 270	for (c <- 0 until channels) {	×
NEW 271	for (y <- 0 until height) {	×
NEW 272	for (x <- 0 until width) {	×
273	// Normalize the pixel value: (value - mean) / std
NEW 274	normalizedCrop(c)(y)(x) = (imgCrop(c)(y)(x) / 255.0 - mean(c)).toFloat / std(c).toFloat	×
275	}
276	}
277	}
278
279	normalizedCrop
280	}
281
282	// Helper function to convert a BufferedImage crop to a 3D array (3, 336, 336) for RGB channels
283	def imageCropToArray(imgCrop: BufferedImage): Array[Array[Array[Int]]] = {
NEW 284	val height = imgCrop.getHeight	×
NEW 285	val width = imgCrop.getWidth	×
286
287	// Create a 3D array for RGB channels
NEW 288	val channels = 3	×
NEW 289	val cropArray = Array.ofDim[Int](channels, height, width)	×
290
NEW 291	for (y <- 0 until height; x <- 0 until width) {	×
NEW 292	val color = new java.awt.Color(imgCrop.getRGB(x, y))	×
NEW 293	cropArray(0)(y)(x) = color.getRed // Red channel	×
NEW 294	cropArray(1)(y)(x) = color.getGreen // Green channel	×
NEW 295	cropArray(2)(y)(x) = color.getBlue // Blue channel	×
296	}
297
298	cropArray
299	}
300
301	// Function to split an image into 336x336 crops, convert to a 3D array, and normalize if required
302	def splitImageToCrops(
303	image: BufferedImage,
304	cropSize: Int = 336,
305	normalize: Boolean = false,
306	mean: Array[Double] = Array(0.48145466, 0.4578275, 0.40821073),
307	std: Array[Double] = Array(0.26862954, 0.26130258, 0.27577711))
308	: (Array[Array[Array[Array[Float]]]], Int) = {
NEW 309	val height = image.getHeight	×
NEW 310	val width = image.getWidth	×
311
312	// Number of crops along height and width
NEW 313	val numHCrops = height / cropSize	×
NEW 314	val numWCrops = width / cropSize	×
315
316	// Store the crops in a 4D array (numCrops, 3, 336, 336)
NEW 317	val cropsBuffer = ArrayBuffer[Array[Array[Array[Float]]]]()	×
318
NEW 319	for (i <- 0 until numHCrops) {	×
NEW 320	for (j <- 0 until numWCrops) {	×
321	// Extract a crop of 336x336
NEW 322	val imgCrop = image.getSubimage(j * cropSize, i * cropSize, cropSize, cropSize)	×
323	// Convert the crop to a 3D array (3, 336, 336)
NEW 324	val cropArray = imageCropToArray(imgCrop)	×
325
326	// Normalize the crop if the option is enabled
327	val normalizedCrop = if (normalize) {
NEW 328	normalizeImageCrop(cropArray, mean, std)	×
329	} else {
330	// Convert Int array to Double array if normalization is off
NEW 331	cropArray.map(_.map(_.map(_.toFloat / 255.0.toFloat)))	×
332	}
333
NEW 334	cropsBuffer.append(normalizedCrop)	×
335	}
336	}
337
338	// Convert ArrayBuffer to an array
NEW 339	(cropsBuffer.toArray, numHCrops * numWCrops)	×
340	}
341
342	// Function to convert processedImages (BufferedImages) into a 5D array (b, h//336 * w//336, 3, 336, 336)
343	def processedImagesTo5DArray(
344	processedImages: List[BufferedImage],
345	normalize: Boolean = false,
346	mean: Array[Double] = Array(0.48145466, 0.4578275, 0.40821073),
347	std: Array[Double] = Array(0.26862954, 0.26130258, 0.27577711))
348	: (Array[Array[Array[Array[Array[Float]]]]]) = {
349	// Store the 5D array (b, h//336 * w//336, 3, 336, 336)
NEW 350	val batchBuffer = ArrayBuffer[Array[Array[Array[Array[Float]]]]]()	×
351	// Process each image in the batch
NEW 352	processedImages.foreach { img =>	×
353	// Split the image into crops, convert each crop into a 3D array, and normalize if required
NEW 354	val (imageCropsArray, numCrops) =	×
355	splitImageToCrops(img, normalize = normalize, mean = mean, std = std)
NEW 356	batchBuffer.append(imageCropsArray)	×
357	}
358
359	// Convert ArrayBuffer to array (b, numCrops, 3, 336, 336)
NEW 360	batchBuffer.toArray	×
361	}
362	}

JohnSnowLabs / spark-nlp / 13883000244

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous