• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JohnSnowLabs / spark-nlp / 8054290840

26 Feb 2024 07:11PM UTC coverage: 62.707%. First build
8054290840

Pull #14164

github

web-flow
Merge e9fdbe61f into e805c4308
Pull Request #14164: release/530-release-candidate

8964 of 14295 relevant lines covered (62.71%)

0.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/scala/com/johnsnowlabs/client/gcp/GCPGateway.scala
1
/*
2
 * Copyright 2017-2022 John Snow Labs
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *    http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
package com.johnsnowlabs.client.gcp
17

18
import com.google.cloud.storage.{BlobId, BlobInfo, Storage, StorageOptions}
19
import com.johnsnowlabs.client.CloudStorage
20
import com.johnsnowlabs.nlp.util.io.ResourceHelper
21
import com.johnsnowlabs.util.{ConfigHelper, ConfigLoader}
22
import org.apache.hadoop.fs.{FileSystem, Path}
23
import org.sparkproject.guava.collect.Iterables
24

25
import java.io.{File, InputStream}
26
import scala.collection.JavaConverters._
27

28
class GCPGateway(projectId: String = ConfigLoader.getConfigStringValue(ConfigHelper.gcpProjectId))
29
    extends CloudStorage {
30

31
  private lazy val storageClient: Storage = {
32
    if (projectId == null || projectId.isEmpty) {
33
      throw new UnsupportedOperationException(
34
        "projectId argument is mandatory to create GCP Storage client.")
35
    }
36

37
    StorageOptions.newBuilder.setProjectId(projectId).build.getService
38
  }
39

40
  override def doesBucketPathExist(bucketName: String, filePath: String): Boolean = {
41
    val storage = StorageOptions.newBuilder.setProjectId(projectId).build.getService
×
42
    val blobs = storage.list(
×
43
      bucketName,
44
      Storage.BlobListOption.prefix(filePath),
×
45
      Storage.BlobListOption.currentDirectory)
×
46

47
    val blobsSize = Iterables.size(blobs.iterateAll())
×
48
    blobsSize > 0
×
49
  }
50

51
  override def copyFileToBucket(
52
      bucketName: String,
53
      destinationPath: String,
54
      inputStream: InputStream): Unit = {
55
    val blobId = BlobId.of(bucketName, destinationPath)
×
56
    val blobInfo = BlobInfo.newBuilder(blobId).build
×
57
    storageClient.createFrom(blobInfo, inputStream)
×
58
  }
59

60
  override def copyInputStreamToBucket(
61
      bucketName: String,
62
      filePath: String,
63
      sourceFilePath: String): Unit = {
64
    val fileSystem = FileSystem.get(ResourceHelper.spark.sparkContext.hadoopConfiguration)
×
65
    val inputStream = fileSystem.open(new Path(sourceFilePath))
×
66
    val blobInfo = BlobInfo.newBuilder(bucketName, filePath).build()
×
67
    storageClient.createFrom(blobInfo, inputStream)
×
68
  }
69

70
  override def downloadFilesFromBucketToDirectory(
71
      bucketName: String,
72
      filePath: String,
73
      directoryPath: String,
74
      isIndex: Boolean = false): Unit = {
75
    try {
×
76
      val blobs = storageClient
77
        .list(bucketName, Storage.BlobListOption.prefix(filePath))
78
        .getValues
×
79
        .asScala
80
        .toArray
×
81

82
      blobs.foreach { blob =>
×
83
        val blobName = blob.getName
×
84
        val file = new File(s"$directoryPath/$blobName")
×
85

86
        if (blobName.endsWith("/")) {
×
87
          file.mkdirs()
×
88
        } else {
×
89
          file.getParentFile.mkdirs()
×
90
          blob.downloadTo(file.toPath)
×
91
        }
92
      }
93
    } catch {
94
      case e: Exception =>
95
        throw new Exception(
×
96
          "Error when downloading files from GCP Storage directory: " + e.getMessage)
97
    }
98
  }
99

100
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc