• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / aspace_helpers / a7e7a88d-8602-4fef-947a-2ffb925ac3c8

02 Oct 2025 06:49PM UTC coverage: 76.617% (+1.1%) from 75.532%
a7e7a88d-8602-4fef-947a-2ffb925ac3c8

Pull #766

circleci

sandbergja
CSV.read takes a filename, not a filehandle
Pull Request #766: Refactor barcode validation into its own class

53 of 54 new or added lines in 5 files covered. (98.15%)

2 existing lines in 1 file now uncovered.

308 of 402 relevant lines covered (76.62%)

318.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.07
/reports/aspace2alma/get_MARCxml.rb
1
require 'archivesspace/client'
1✔
2
require 'active_support/all'
1✔
3
require 'net/sftp'
1✔
4
require 'nokogiri'
1✔
5
require 'csv'
1✔
6
require_relative '../../helper_methods.rb'
1✔
7
require_relative 'resource'
1✔
8
require_relative 'top_container'
1✔
9
require_relative 'item_record_constructor'
1✔
10
require_relative 'barcode_validation'
1✔
11

12
#log errors to file
13
$stderr.reopen("log_err.txt", "w")
1✔
14
#keep values synced so they're not going to the buffer
15
$stderr.sync = true
1✔
16

17
#configure sendoff to alma
18
def alma_sftp (filename)
1✔
19
  Net::SFTP.start(ENV.fetch('SFTP_HOST', nil), ENV.fetch('SFTP_USERNAME', nil), { password: ENV.fetch('SFTP_PASSWORD', nil) }) do |sftp|
×
20
    sftp.upload!(filename, File.join('/alma/aspace/', File.basename(filename)))
×
21
  end
22
end
23

24
#get Alma barcode report from sftp
25
def get_file_from_sftp(remote_filename)
1✔
UNCOV
26
  Net::SFTP.start(ENV.fetch('SFTP_HOST', nil), ENV.fetch('SFTP_USERNAME', nil), { password: ENV.fetch('SFTP_PASSWORD', nil) }) do |sftp|
×
UNCOV
27
    sftp.download!(File.join('/alma/aspace/', File.basename(remote_filename)), remote_filename)
×
28
  end
29
end
30

31
#rename old files so we never send an outdated file by accident
32
def rename_file(original_path, new_path)
1✔
33
  Net::SFTP.start(ENV.fetch('SFTP_HOST', nil), ENV.fetch('SFTP_USERNAME', nil), { password: ENV.fetch('SFTP_PASSWORD', nil) }) do |sftp|
4✔
34
    sftp.stat(original_path) do |response|
4✔
35
      sftp.rename!(original_path, new_path) if response.ok?
×
36
    end
37
  end
38
end
39

40
#remove files in preparation for renaming
41
def remove_file(path)
1✔
42
  Net::SFTP.start(ENV.fetch('SFTP_HOST', nil), ENV.fetch('SFTP_USERNAME', nil), { password: ENV.fetch('SFTP_PASSWORD', nil) }) do |sftp|
4✔
43
    sftp.stat(path) do |response|
4✔
44
      sftp.remove!(path) if response.ok?
×
45
    end
46
  end
47
end
48

49
def datestamp
1✔
50
  Time.now.utc.strftime('%Y%m%d%H%M')
×
51
end
52

53
def fetch_and_process_records
1✔
54
  #open a quasi log to receive progress output
55
  log_out = File.open("log_out.txt", "w")
4✔
56
  aspace_login
4✔
57
  #log when the process started
58
  log_out.puts "Process started fetching records at #{Time.now}"
4✔
59
  filename = "MARC_out.xml"
4✔
60
  #rename MARC file:
61
  #in case the export fails, this ensures that
62
  #Alma will not find a stale file to import
63
  remove_file("/alma/aspace/MARC_out_old.xml")
4✔
64
  rename_file("/alma/aspace/#{filename}", "/alma/aspace/MARC_out_old.xml")
4✔
65

66
  barcode_duplicate_check = AlmaReportDuplicateCheck.new
4✔
67

68
  #get collection records from ASpace
69
  resources = get_resource_uris_for_all_repos
4✔
70

71
  file =  File.open(filename, "w")
4✔
72
  file << '<collection xmlns="http://www.loc.gov/MARC21/slim" xmlns:marc="http://www.loc.gov/MARC21/slim" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">'
4✔
73

74
  resources.each do |resource_uri|
4✔
75
    process_resource(resource_uri, file, log_out, barcode_duplicate_check)
6✔
76
  end
77

78
  file << '</collection>'
4✔
79
  file.close
4✔
80

81
  #send to alma
82
  alma_sftp(filename)
4✔
83

84
  #log when the process finished.
85
  log_out.puts "Process finished at #{Time.now}"
4✔
86
end
87

88
def process_resource(resource, file, log_out, barcode_duplicate_check)
1✔
89
  retries ||= 0
9✔
90

91
  my_resource = Resource.new(resource, @client, file, log_out)
9✔
92
  # uri = my_resource.marc_uri
93
  # marc_record = @client.get(uri)
94
  doc = my_resource.marc_xml
9✔
95

96
  # set up variables (these may return a sequence)
97
  ##################
98
  tag008 = my_resource.tag008
5✔
99

100
  tags040 = my_resource.tags040
5✔
101
  tag041 = my_resource.tag041
5✔
102
  tag099_a = my_resource.tag099_a
5✔
103
  tag245_g = my_resource.tag245_g
5✔
104
  tag351 = my_resource.tag351
5✔
105
  tags500 = my_resource.tags500
5✔
106
  tags500_a = my_resource.tags500_a
5✔
107
  tags520 = my_resource.tags520
5✔
108
  tags524 = my_resource.tags524
5✔
109
  tags535 = my_resource.tags535
5✔
110
  tags540 = my_resource.tags540
5✔
111
  tags541 = my_resource.tags541
5✔
112
  tags544 = my_resource.tags544
5✔
113
  tags561 = my_resource.tags561
5✔
114
  tags583 = my_resource.tags583
5✔
115
  tags852 = my_resource.tags852
5✔
116
  tag856 = my_resource.tag856
5✔
117
  tags6_7xx = my_resource.tags6_7xx
5✔
118
  subfields = my_resource.subfields
5✔
119

120
  #do stuff
121
  ##################
122

123
  #addresses github #128
124
  #recursively remove truly empty elements (blank text and empty attributes)
125
  my_resource.remove_empty_elements(doc)
5✔
126

127
  #addresses github #159
128
  my_resource.remove_linebreaks(doc)
5✔
129

130
  #addresses github #129
131
  tag008.previous=("<controlfield tag='001'>#{tag099_a.content}</controlfield>")
5✔
132

133
  #addresses github #130
134
  tag008.previous=("<controlfield tag='003'>PULFA</controlfield>")
5✔
135

136
  #addresses github #144
137
  #swap quotes so interpolation is possible
138
  tag008.next=("<datafield ind1=' ' ind2=' ' tag='035'>
5✔
139
    <subfield code='a'>(PULFA)#{tag099_a.content}</subfield>
140
    </datafield>")
141

142
  #addresses github #131
143
  tags040.each do |tag040|
5✔
144
    tag040.replace('<datafield ind1=" " ind2=" " tag="040">
5✔
145
        <subfield code="a">NjP</subfield>
146
        <subfield code="b">eng</subfield>
147
        <subfield code="e">dacs</subfield>
148
        <subfield code="c">NjP</subfield>
149
      </datafield>')
150
  end
151

152
  #addresses github #134
153
  tag041.next=("<datafield ind1=' ' ind2=' ' tag='046'>
5✔
154
        <subfield code='a'>i</subfield>
155
        <subfield code='c'>#{my_resource.tag008.content[7..10]}</subfield>
156
        <subfield code='e'>#{my_resource.tag008.content[11..14]}</subfield>
157
      </datafield>")
158

159
  #addresses github #
160
  tag245_g.content = "(mostly #{tag245_g.content})" unless tag245_g.nil?
5✔
161

162
  #addresses github #168
163
  #superseded by github #379
164
  #  tags520 = tags520.map.with_index { |tag520, index| tag520.remove if index > 0}
165

166
  #addresses github #380 - limit scopenotes to 8000 characters
167
  # (9999b field size limit in Alma v. 40,000+ character notes in ASpace)
168
  tags520 = tags520.each do |tag520|
5✔
169
    #ASpace exports everything to $a, so only one subfield to check
170
    tag520.at_xpath('marc:subfield[@code="a"]').content = tag520.at_xpath('marc:subfield[@code="a"]').content.truncate(7999)
10✔
171
  end
172

173
  #addresses github #133
174
  #superseded by github #205
175
  #NB node.children.before inserts new node as first of node's children; default for add_child is last
176
  # tags544.each do |tag544|
177
  #   tag544.children.before('<subfield code="a">')
178
  # end
179

180
  #addresses github #143
181
  #adapted from Mark's implementation of Don's logic
182
  tags6_7xx.each do |tag6xx|
5✔
183
    subfield_a = tag6xx.at_xpath('marc:subfield[@code="a"]')
357✔
184
    segments = subfield_a.content.split('--')
357✔
185
    segments.each { |segment| segment.strip! }
1,230✔
186
    subfield_a_text = segments[0]
357✔
187
    new_subfield_a = subfield_a.replace("<subfield code='a'>#{subfield_a_text}</subfield")
357✔
188
    segments[1..-1].each do |segment|
357✔
189
      code = segment =~ /^[0-9]{2}/ ? 'y' : 'x'
516✔
190
      tag6xx.children.last.next=("<subfield code='#{code}'>#{segment}</subfield>")
516✔
191
    end
192
    #addresses github issue #334
193
    if tag6xx.at_xpath('marc:subfield[@code="0"]')
357✔
194
      subfield0 = tag6xx.at_xpath('marc:subfield[@code="0"]')
50✔
195
      if subfield0.content =~ /viaf/
50✔
196
         subfield0.replace("<subfield code='1'>#{subfield0.content}</subfield>")
50✔
197
      end
198
    end
199
    if tag6xx.at_xpath('marc:subfield[@code="2"]')
357✔
200
        subfield2 = tag6xx.at_xpath('marc:subfield[@code="2"]')
100✔
201
        ind2 = tag6xx.at_xpath('@ind2')
100✔
202
        if subfield2.content =~ /^viaf$/
100✔
203
            subfield2.remove
50✔
204
            if ind2.content == '7'
50✔
205
                ind2.content = '0'
50✔
206
            end
207
        end
208
    end
209

210
    #add punctuation to the last subfield except $2
211
    # if tag6xx.children[-1].attribute('code') == '2'
212
    #   tag6xx.children[-2].content << '.' unless ['?', '-', '.'].include?(tag6xx.children[-2].content[-1])
213
    # else
214
    #   tag6xx.children[-1].content << '.' unless ['?', '-', '.'].include?(tag6xx.children[-1].content[-1])
215
    # end
216
  end
217

218
  #addresses github #132
219
  tags852.each do |tag|
5✔
220
    tag.remove
5✔
221
  end
222

223
  #addresses github #268
224
  unless tag856.nil?
5✔
225
    #addresses github #264 and #265
226
    tag856.replace("<datafield ind1='4' ind2='2' tag='856'>
5✔
227
      <subfield code='z'>Search and Request</subfield>
228
      #{tag856.at_xpath('marc:subfield[@code="u"]')}
229
      <subfield code='y'>Princeton University Library Finding Aids</subfield>
230
    </datafield>")
231
  end
232

233
  #addresses github 147
234
  unless tags500_a.nil?
5✔
235
    tags500_a.select do |tag500_a|
5✔
236
      #the exporter adds preceding text and punctuation for each physloc.
237
      #hardcode location codes because textual physlocs are patterned the same
238
      #account for 'sca' prefix (#247)
239

240
      if tag500_a.content.match(/Location of resource: (sca)?(anxb|ea|ex|flm|flmp|gax|hsvc|hsvm|mss|mudd|prnc|rarebooks|rcpph|rcppf|rcppl|rcpxc|rcpxg|rcpxm|rcpxr|st|thx|wa|review|oo|sc|sls)/)
8✔
241
        #strip text preceding and following code
242
        location_notes = tag500_a.content.gsub(/.*:\s(.+)[.]/, "\\1")
5✔
243
        location_notes.split.each do |tag|
244
          #add as the last datafield
245
          doc.xpath('//marc:datafield').last.next=
5✔
246
          ("<datafield ind1=' ' ind2=' ' tag='982'><subfield code='c'>#{tag}</subfield></datafield>")
5✔
247
          end unless location_notes.nil?
5✔
248
      end
249
    end
250
  end
251

252
  #addresses github #397
253
  params = Params.new(doc, tag099_a, log_out, nil)
5✔
254
  item_constructor = ItemRecordConstructor.new(@client, barcode_duplicate_check)
5✔
255
  item_constructor.construct_item_records(resource, params)
5✔
256

257
  #addresses github #205
258
  tag351.remove unless tag351.nil?
5✔
259
  tags500.each(&:remove) unless tags500.nil?
5✔
260
  tags524.each(&:remove) unless tags524.nil?
5✔
261
  tags535.each(&:remove) unless tags535.nil?
5✔
262
  tags540.each(&:remove) unless tags540.nil?
5✔
263
  tags541.each(&:remove) unless tags541.nil?
5✔
264
  tags544.each(&:remove) unless tags544.nil?
5✔
265
  tags561.each(&:remove) unless tags561.nil?
5✔
266
  tags583.each(&:remove) unless tags583.nil?
5✔
267

268
  #log which records were finished when
269
  log_out.puts "Fetched record #{tag099_a.content} at #{Time.now}\n"
5✔
270

271
  #try adding a delay to get around the rate limit
272
  sleep(0.25)
5✔
273

274
  #append record to file
275
  #the unless clause addresses #186, #268, #284, #548, #553
276
  file << doc.at_xpath('//marc:record') unless tag099_a.content =~ /^(C0140|C1771|AC214|AC364|C0744.06|C0935|C1296|WC059|RBD1|RBD1.1)$/ || tag856.nil?
5✔
277
  file.flush
5✔
278
  log_out.flush
5✔
279
rescue Errno::ECONNRESET,Errno::ECONNABORTED,Errno::ETIMEDOUT,Errno::ECONNREFUSED => error
280
  while (retries += 1) <= 3
8✔
281
    log_out.puts "Encountered #{error.class}: '#{error.message}' when retrieving resource #{resource} at #{Time.now}, retrying in #{retries} second(s)..."
3✔
282
    sleep(retries)
3✔
283
    retry
3✔
284
  end
285
  log_out.puts "Encountered #{error.class}: '#{error.message}' at #{Time.now}, unsuccessful in retrieving resource #{resource} after #{retries} retries"
1✔
286
end
287

288
# If you run this file directly, the main method will run
289
# If you run the file from rspec, it will only run when calling the method
290
if $PROGRAM_NAME == __FILE__
1✔
NEW
291
  fetch_and_process_records
×
292
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc