• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

archivesspace / archivesspace / 19916213389

04 Dec 2025 03:02AM UTC coverage: 80.708% (+1.4%) from 79.285%
19916213389

Pull #3803

github

661c5a
web-flow
Merge 418b25756 into 87083c526
Pull Request #3803: ANW-1831: Fix 404 console error for PUI resource `show` views

29005 of 35938 relevant lines covered (80.71%)

7935.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.96
/indexer/app/lib/indexer_common.rb
1
require 'ashttp'
13✔
2
require 'uri'
13✔
3
require 'json'
13✔
4
require 'fileutils'
13✔
5
require 'aspace_i18n'
13✔
6
require 'set'
13✔
7

8
require 'asutils'
13✔
9
require 'jsonmodel'
13✔
10
require 'jsonmodel_client'
13✔
11
require 'config/config-distribution'
13✔
12
require 'record_inheritance'
13✔
13

14
require_relative 'index_batch'
13✔
15
require_relative 'indexer_common_config'
13✔
16
require_relative 'indexer_timing'
13✔
17
require_relative 'fake_solr_timeout_response'
13✔
18

19
class IndexerCommon
13✔
20

21
  include JSONModel
13✔
22

23
  @@record_types = IndexerCommonConfig.record_types
13✔
24

25
  @@global_types = IndexerCommonConfig.global_types
13✔
26

27
  @@records_with_children = []
13✔
28
  @@init_hooks = []
13✔
29

30
  @@resolved_attributes = IndexerCommonConfig.resolved_attributes
13✔
31

32
  @@paused_until = Time.now
13✔
33

34
  def self.add_indexer_initialize_hook(&block)
5✔
35
    @@init_hooks << block
36
  end
37

38
  def self.add_attribute_to_resolve(attr)
5✔
39
    @@resolved_attributes.push(attr) unless @@resolved_attributes.include?(attr)
40
  end
41

42
  def resolved_attributes
5✔
43
    @@resolved_attributes
672✔
44
  end
45

8✔
46
  def record_types
5✔
47
    @@record_types
4,546✔
48
  end
49

50
  # This is to pause the indexer.
8✔
51
  # Duration is given in seconds.
52
  def self.pause(duration = 900 )
13✔
53
    @@paused_until = Time.now + duration
×
54
  end
55

56
  def self.paused?
13✔
57
    @@paused_until > Time.now
×
58
  end
59

60

8✔
61
  def initialize(backend_url)
911✔
62
    @backend_url = backend_url
17✔
63
    @document_prepare_hooks = []
17✔
64
    @extra_documents_hooks = []
25✔
65
    @delete_hooks = []
5,409✔
66
    @batch_hooks = []
17✔
67
    @current_session = nil
17✔
68

69
    while true
17✔
70
      begin
20✔
71
        JSONModel::init(:client_mode => true, :url => @backend_url)
17✔
72
        break
17✔
73
      rescue
74
        Log.error("Connection to backend failed (#{$!}).  Retrying...")
8✔
75
        sleep(5)
×
76
      end
77
    end
78

79
    # Force load up front
8✔
80
    self.enum_fields
41✔
81

24✔
82
    configure_doc_rules
41✔
83

24✔
84
    @@init_hooks.each do |hook|
41✔
85
      hook.call(self)
24✔
86
    end
87
  end
24✔
88

16✔
89
  def self.generate_years_for_date_range(begin_date, end_date)
29✔
90
    return [] unless begin_date
1,537✔
91

92
    end_date ||= begin_date
1,510✔
93

94
    b = begin_date.scan(/\A[0-9]{1,4}/).first
1,510✔
95
    e = end_date.scan(/\A[0-9]{1,4}/).first
1,510✔
96

97
    if b && e
1,510✔
98
      (b .. e).to_a
1,534✔
99
    else
100
      []
24✔
101
    end
102
  end
24✔
103

104

105
  def self.generate_permutations_for_identifier(identifer)
5✔
106
    return [] if identifer.nil?
24✔
107

108
    [
109
      identifer,
8✔
110
      identifer.gsub(/[[:punct:]]+/, " "),
529✔
111
      identifer.gsub(/[[:punct:] ]+/, ""),
112
      identifer.scan(/([0-9]+|[^0-9]+)/).flatten(1).join(" ")
521✔
113
    ].uniq
114
  end
521✔
115

521✔
116

117
  # Isolate leading alpha and numeric values to create a sortable string
521✔
118
  def self.generate_sort_string_for_identifier(identifier, size = 255)
526✔
119
    letters, numbers, rest = identifier.scan(/([^0-9]*)([0-9]*)(.*)/)[0]
210✔
120
    letters.strip.ljust(size).gsub(' ', '#') + numbers.strip.rjust(size).gsub(' ', '0') + rest.strip.ljust(size)
210✔
121
  end
122

123

124
  def self.extract_string_values(doc, *opts)
5✔
125
    return doc, doc if doc.is_a?(String)
6,522✔
126

12✔
127
    if doc.is_a?(Array)
6,513✔
128
      published_queue = doc.flatten
2,934✔
129
    else
130
      published_queue = [doc]
3,579✔
131
    end
132

133
    extract_unpublished = !opts.include?(:published_only)
6,535✔
134

135
    unpublished_queue = []
6,513✔
136
    published_strings = []
6,513✔
137
    unpublished_strings = []
6,513✔
138
    published_done = false
6,521✔
139

65✔
140
    [
6,578✔
141
      [published_queue, published_strings],
142
      [unpublished_queue, unpublished_strings]
143
    ].each do |queue, strings|
144

8✔
145
      while !queue.empty?
15,941✔
146
        doc = queue.pop
23,463✔
147

4,765✔
148
        if (!published_done && doc.has_key?("publish") && !doc["publish"])
24,934✔
149
          if extract_unpublished
1,239✔
150
            unpublished_queue.push(doc)
4,510✔
151
          end
152
          next
1,239✔
153
        end
4,765✔
154

155
        doc.each do |key, val|
26,989✔
156
          if IndexerCommonConfig.fullrecord_excludes.include?(key) || key =~ /_enum_s$/
254,288✔
157
            next # ignored
92,883✔
158
          elsif val.is_a?(String)
166,165✔
159
            strings.push(val)
55,507✔
160
          elsif val.is_a?(Hash)
110,658✔
161
            queue.push(val)
8,717✔
162
          elsif val.is_a?(Array)
97,176✔
163
            val.flatten.each do |v|
64,466✔
164
              if v.is_a?(String)
10,131✔
165
                strings.push(v)
9,749✔
166
              elsif v.is_a?(Hash)
35,769✔
167
                queue.push(v)
9,912✔
168
              end
25,862✔
169
            end
3,212✔
170
          end
3,212✔
171
        end
172
      end
3,212✔
173

174
      if extract_unpublished
11,176✔
175
        published_done = true
31,976✔
176
      else
330,437✔
177
        break
127,023✔
178
      end
205,256✔
179

69,075✔
180
    end
136,181✔
181

7,049✔
182
    if extract_unpublished
135,645✔
183
      return published_strings, unpublished_strings
88,835✔
184
    else
12,523✔
185
      return published_strings
2,237✔
186
    end
12,128✔
187
  end
12,136✔
188

189

190
  def build_fullrecord(doc, record)
5✔
191
    # 'fullrecord' only contains unpublished text at this stage, but 'fullrecord_published'
192
    # will be merged into it by Solr using copyField
193
    doc['fullrecord_published'], doc['fullrecord'] = IndexerCommon.extract_string_values(record['record'])
2,654✔
194
  end
9,530✔
195

9,530✔
196
  def add_agents(doc, record)
5✔
197
    if record['record']['linked_agents']
3,654✔
198
      # index all linked agents first
199
      doc['agents'] = record['record']['linked_agents'].collect{|link| link['_resolved']['display_name']['sort_name']}
2,959✔
200
      doc['agent_uris'] = record['record']['linked_agents'].collect{|link| link['ref']}
2,959✔
201

202
      # only published agents
4,765✔
203
      doc['published_agents'] = []
7,616✔
204
      doc['published_agent_uris'] = []
2,851✔
205
      record['record']['linked_agents'].each do |link|
2,851✔
206
        if link['_resolved']['publish']
108✔
207
          doc['published_agents'] << link['_resolved']['display_name']['sort_name']
31✔
208
          doc['published_agent_uris'] << link['ref']
31✔
209
        end
210
      end
8✔
211

212
      # index the creators only
213
      creators = record['record']['linked_agents'].select{|link| link['role'] === 'creator'}
6,253✔
214
      doc['creators'] = creators.collect{|link| link['_resolved']['display_name']['sort_name']} if not creators.empty?
2,869✔
215
    end
216
  end
8✔
217

3,294✔
218
  def add_subjects(doc, record)
5✔
219
    if record['record']['subjects']
5,455✔
220
      doc['subjects'] = record['record']['subjects'].map {|s| s['_resolved']['title']}.compact
4,593✔
221
      doc['subject_uris'] = record['record']['subjects'].collect{|link| link['ref']}
2,792✔
222
    end
223
  end
1,346✔
224

1,346✔
225

1,346✔
226
  def add_subjects_subrecord(doc, record, subrecord, type = 'subjects')
460✔
227
    doc['subjects'] ||= []
2,184✔
228
    doc['subject_uris'] ||= []
2,184✔
229

230
    if record['record'][subrecord]
2,184✔
231
      record['record'][subrecord].each do |sr|
2,184✔
232
        next unless sr[type]
233

1,801✔
234
        doc['subjects'].concat(sr[type].map {|s| s['_resolved']['title']}.compact)
1,418✔
235
        doc['subject_uris'].concat(sr[type].collect{|link| link['ref']})
236
      end
237
    end
238
  end
8✔
239

3,294✔
240

1,041✔
241
  def add_audit_info(doc, record)
1,046✔
242
    ['created_by', 'last_modified_by', 'user_mtime', 'system_mtime', 'create_time'].each do |f|
3,654✔
243
      doc[f] = record['record'][f] if record['record'].has_key?(f)
18,270✔
244
    end
245
  end
246

8✔
247

6,376✔
248
  def add_notes(doc, record)
6,381✔
249
    if record['record']['notes']
2,654✔
250
      # 'notes' only contains unpublished notes at this stage, but 'notes_published'
6,376✔
251
      # will be merged into it by Solr using copyField
6,376✔
252
      doc['notes_published'], doc['notes'] = IndexerCommon.extract_string_values(record['record']['notes'])
2,207✔
253
    end
254
  end
385✔
255

385✔
256

257
  def add_years(doc, record)
5✔
258
    if record['record']['dates']
3,654✔
259
      doc['years'] = []
2,779✔
260
      record['record']['dates'].each do |date|
2,779✔
261
        doc['years'] += IndexerCommon.generate_years_for_date_range(date['begin'], date['end'])
1,521✔
262
      end
3,294✔
263
      unless doc['years'].empty?
19,249✔
264
        doc['years'] = doc['years'].sort.uniq
1,335✔
265
        doc['year_sort'] = doc['years'].first.rjust(4, '0') + doc['years'].last.rjust(4, '0')
1,335✔
266
      end
267
      dates = record['record']['dates']
2,779✔
268
      display_dates = dates.select {|date| date['date_type'] == 'inclusive'}
4,300✔
269
      display_dates = dates if display_dates.empty?
6,073✔
270
      doc['dates'] = []
2,779✔
271
      display_dates.each do |date|
2,779✔
272
        if date['expression']
2,927✔
273
          doc['dates'] << date['expression']
1,428✔
274
        elsif date['date_type'] === "single"
25✔
275
          doc['dates'] << date['begin']
22✔
276
        elsif date['date_type']
5✔
277
          doc['dates'] << "#{date['begin']} - #{date['end']}"
14✔
278
        end
3,294✔
279
      end
972✔
280
    end
972✔
281
  end
521✔
282

283

972✔
284
  def add_level(doc, record)
440✔
285
    if record['record'].has_key? 'level'
4,089✔
286
      doc['level'] = (record['record']['level'] === 'otherlevel') ? record['record']['other_level'] : record['record']['level']
2,212✔
287
    end
972✔
288
  end
1,493✔
289

972✔
290

972✔
291
  def add_summary(doc, record)
977✔
292
    if record['record'].has_key?('notes') && record['record']['notes'].is_a?(Array)
4,172✔
293
      notes = record['record']['notes']
3,408✔
294
      abstract = notes.find {|note| note['type'] == 'abstract'}
3,007✔
295
      if abstract
2,968✔
296
        doc['summary'] = abstract['content'].join("\n")
12✔
297
      else
10✔
298
        scopecontent = notes.find {|note| note['type'] == 'scopecontent'}
2,956✔
299
        if scopecontent && scopecontent.has_key?('subnotes')
2,930✔
300
          doc['summary'] = scopecontent['subnotes'].map {|sn| sn['content']}.join("\n")
46✔
301
        end
302
      end
303
    end
304
  end
8✔
305

3,294✔
306

498✔
307
  def add_extents(doc, record)
5✔
308
    if record['record']['extents']
3,654✔
309
      extents = record['record']['extents']
2,779✔
310
      display_extents = extents.select {|extent| extent['portion'] == 'whole'}
3,240✔
311
      display_extents = extents if display_extents.empty?
2,787✔
312
      doc['extents'] = []
6,073✔
313
      display_extents.each do |extent|
4,250✔
314
        doc['extents'] << "#{extent['number']} --- #{extent['extent_type']}"
2,097✔
315
      end
1,471✔
316
    end
6✔
317
  end
318

1,607✔
319

1,465✔
320
  def enum_fields
33✔
321
    return @enum_fields if @enum_fields
3,671✔
322

323
    enum_fields = []
17✔
324
    queue = JSONModel.models.map {|_,model| model.schema['properties']}.flatten.uniq
2,771✔
325

326
    while !queue.empty?
17✔
327
      elt = queue.shift
245,471✔
328

3,294✔
329
      if elt.is_a?(Hash)
246,435✔
330
        elt.each do |k, v|
75,546✔
331
          if v.is_a?(Hash)
203,340✔
332
            enum_fields.push(k) if v['dynamic_enum'] || v.dig('items', 'dynamic_enum')
68,190✔
333
          end
972✔
334
          queue << v
202,944✔
335
        end
336
      elsif elt.is_a?(Array)
171,474✔
337
        queue.concat(elt)
8,347✔
338
      end
339
    end
340

8✔
341
    enum_fields.delete('items') # not an enum, creeps in through dynamic enum lists
3,335✔
342
    @enum_fields = enum_fields.uniq
17✔
343
  end
24✔
344

3,912✔
345
  def trim_ark_value(s)
5✔
346
    s.gsub(/\A.*ark:/, 'ark:')
24✔
347
  end
346,272✔
348

349
  def add_arks(doc, record)
346,277✔
350
    return unless AppConfig[:arks_enabled]
108,078✔
351

285,624✔
352
    if arks = record['record']['ark_name']
97,894✔
353
      doc['ark_name'] = ([arks.fetch('current', nil)] + arks.fetch('previous')).compact.map {|s| trim_ark_value(s)}
354
    end
285,624✔
355
  end
356

241,840✔
357

11,784✔
358
  def configure_doc_rules
5✔
359

360
    add_document_prepare_hook {|doc, record|
17✔
361
      found_keys = Set.new
3,678✔
362

24✔
363
      ASUtils.search_nested(record["record"], enum_fields, ['_resolved']) do |field, field_value|
3,654✔
364
        key = "#{field}_enum_s"
15,427✔
365

8✔
366
        doc[key] ||= Set.new
15,427✔
367
        doc[key] << field_value
15,427✔
368

369
        found_keys << key
15,435✔
370
      end
3,294✔
371

372
      ASUtils.search_nested(record["record"], ['items'], ['_resolved']) do |field, field_value|
3,654✔
373
        if field_value.is_a?(Hash) && field_value.key?('type')
5✔
374
          doc['type_enum_s'] ||= Set.new
375
          doc['type_enum_s'] << field_value.fetch('type')
376
          found_keys << 'type_enum_s'
377
        end
378
      end
8✔
379

380
      # Turn our sets back into regular arrays so they serialize out to JSON correctly
24✔
381
      found_keys.each do |key|
6,948✔
382
        doc[key] = doc[key].to_a.flatten
9,462✔
383
      end
3,294✔
384
    }
15,312✔
385

386
    add_document_prepare_hook {|doc, record|
15,329✔
387
      if doc['primary_type'] == 'archival_object'
18,966✔
388
        doc['resource'] = record['record']['resource']['ref'] if record['record']['resource']
1,991✔
389
        doc['title'] = record['record']['display_string']
17,303✔
390
        doc['identifier'] = record['record']['component_id']
1,991✔
391
        doc['component_id'] = record['record']['component_id']
1,991✔
392
        doc['ref_id'] = record['record']['ref_id']
5,285✔
393
        doc['slug'] = record['record']['slug']
1,996✔
394
        doc['is_slug_auto'] = record['record']['is_slug_auto']
1,991✔
395
      end
396
    }
397

398
    add_document_prepare_hook {|doc, record|
17✔
399
      add_subjects(doc, record)
3,654✔
400
      add_agents(doc, record)
3,654✔
401
      add_audit_info(doc, record)
6,948✔
402
      add_notes(doc, record)
14,066✔
403
      add_years(doc, record)
3,654✔
404
      add_level(doc, record)
3,654✔
405
      add_summary(doc, record)
3,654✔
406
      add_extents(doc, record)
3,678✔
407
      add_arks(doc, record)
6,948✔
408
    }
217✔
409

217✔
410
    add_document_prepare_hook {|doc, record|
234✔
411
      if doc['primary_type'] == 'accession'
3,871✔
412
        date = record['record']['accession_date']
335✔
413
        if date == '9999-12-31'
335✔
414
          unknown = I18n.t('accession.accession_date_unknown')
217✔
415
          doc['accession_date'] = unknown
416
          doc['fullrecord'] ||= ''
417
          doc['fullrecord'] << unknown + ' '
418
        else
24✔
419
          doc['accession_date'] = date
3,412✔
420
        end
3,294✔
421
        doc['accession_date_year'] = Date.parse(date).year
3,412✔
422
        doc['identifier'] = (0...4).map {|i| record['record']["id_#{i}"]}.compact.join("-")
3,884✔
423
        doc['title'] = record['record']['display_string']
3,412✔
424

3,294✔
425
        doc['acquisition_type'] = record['record']['acquisition_type']
3,412✔
426
        doc['resource_type'] = record['record']['resource_type']
3,412✔
427
        doc['restrictions_apply'] = record['record']['restrictions_apply']
3,412✔
428
        doc['access_restrictions'] = record['record']['access_restrictions']
118✔
429
        doc['use_restrictions'] = record['record']['use_restrictions']
118✔
430
        doc['related_resource_uris'] = record['record']['related_resources'].
142✔
431
                                          collect { |resource| resource["ref"] }.
3,300✔
432
                                          compact.uniq
298✔
433

298✔
434
        doc['related_accession_uris'] = record['record']['related_accessions'].
118✔
435
                                           collect { |accession| accession["ref"] }.
12✔
436
                                           compact.uniq
437

438
        doc['slug'] = record['record']['slug']
118✔
439
        doc['is_slug_auto'] = record['record']['is_slug_auto']
416✔
440
        if cm = record['record']['collection_management']
118✔
441
          doc['processing_priority'] = cm['processing_priority']
310✔
442
          doc['processors'] = cm['processors']
1,502✔
443
        end
298✔
444
      end
445
    }
298✔
446

298✔
447
    add_document_prepare_hook {|doc, record|
315✔
448
      if doc['primary_type'] == 'subject'
3,952✔
449
        doc['source'] = record['record']['source']
311✔
450
        doc['first_term_type'] = record['record']['terms'][0]['term_type']
311✔
451
        doc['publish'] = record['record']['publish'] && record['record']['is_linked_to_published_record']
17✔
452
        doc['slug'] = record['record']['slug']
13✔
453
        doc['is_slug_auto'] = record['record']['is_slug_auto']
13✔
454
      end
298✔
455
    }
9✔
456

457
    add_document_prepare_hook {|doc, record|
17✔
458
      if record['record'].has_key?('used_within_repositories')
3,952✔
459
        doc['used_within_repository'] = record['record']['used_within_repositories']
584✔
460
        doc['used_within_published_repository'] = record['record']['used_within_published_repositories']
584✔
461
      end
20✔
462
    }
20✔
463

464
    add_document_prepare_hook {|doc, record|
17✔
465
      if doc['primary_type'] == 'repository'
3,654✔
466
        doc['repository'] = doc["id"]
231✔
467
        doc['title'] = record['record']['repo_code']
255✔
468
        doc['repo_sort'] = record['record']['display_string']
3,525✔
469
        doc['slug'] = record['record']['slug']
500✔
470
        doc['is_slug_auto'] = record['record']['is_slug_auto']
500✔
471
        doc['position_int_sort'] = record['record']['position']
500✔
472
      end
269✔
473
    }
269✔
474

475
    add_document_prepare_hook {|doc, record|
17✔
476
      if doc['primary_type'] == 'location'
3,654✔
477
        if record['record'].has_key? 'temporary'
77✔
478
          doc['temporary'] = record['record']['temporary']
3,295✔
479
        end
1,066✔
480
        doc['building'] = record['record']['building']
1,119✔
481
        doc['floor'] = record['record']['floor']
53✔
482
        doc['room'] = record['record']['room']
53✔
483
        doc['area'] = record['record']['area']
53✔
484
       if record['record']['owner_repo']
77✔
485
         repo = JSONModel::HTTP.get_json(record['record']['owner_repo']['ref'])
3,296✔
486
          doc['owner_repo_uri_u_sstr'] = record['record']['owner_repo']['ref']
361✔
487
          doc['owner_repo_display_string_u_ssort'] = repo["repo_code"]
361✔
488
       end
359✔
489
       end
359✔
490
    }
359✔
491

359✔
492
    add_document_prepare_hook {|doc, record|
376✔
493
      if doc['primary_type'] == 'digital_object_component'
3,654✔
494
        doc['digital_object'] = record['record']['digital_object']['ref']
223✔
495
        doc['digital_object_id'] = record['record']['component_id']
223✔
496
        doc['identifier'] = record['record']['component_id']
247✔
497
        doc['title'] = record['record']['display_string']
3,517✔
498
        doc['slug'] = record['record']['slug']
367✔
499
        doc['is_slug_auto'] = record['record']['is_slug_auto']
239✔
500
      end
501
    }
144✔
502

144✔
503
    add_document_prepare_hook {|doc, record|
161✔
504
      if doc['primary_type'] == 'resource'
3,798✔
505
        doc['finding_aid_title'] = record['record']['finding_aid_title']
365✔
506
        doc['finding_aid_filing_title'] = record['record']['finding_aid_filing_title']
225✔
507
        doc['identifier'] = (0...4).map {|i| record['record']["id_#{i}"]}.compact.join("-")
1,109✔
508
        doc['resource_type'] = record['record']['resource_type']
225✔
509
        doc['level'] = record['record']['level']
221✔
510
        doc['restrictions'] = record['record']['restrictions']
221✔
511
        doc['ead_id'] = record['record']['ead_id']
221✔
512
        doc['finding_aid_status'] = record['record']['finding_aid_status']
221✔
513
        doc['related_accession_uris'] = record['record']['related_accessions'].
245✔
514
                                           collect{|accession| accession["ref"]}.
3,300✔
515
                                           compact.uniq
48✔
516
        doc['slug'] = record['record']['slug']
269✔
517
        doc['is_slug_auto'] = record['record']['is_slug_auto']
269✔
518
        if cm = record['record']['collection_management']
269✔
519
          doc['processing_priority'] = cm['processing_priority']
48✔
520
          doc['processors'] = cm['processors']
48✔
521
        end
522
      end
523

524
      if doc['primary_type'] == 'digital_object'
3,678✔
525
        doc['digital_object_type'] = record['record']['digital_object_type']
3,520✔
526

277✔
527
        doc['digital_object_id'] = record['record']['digital_object_id']
503✔
528
        doc['identifier'] = record['record']['digital_object_id']
1,611✔
529
        doc['level'] = record['record']['level']
503✔
530
        doc['restrictions'] = record['record']['restrictions']
503✔
531
        doc['slug'] = record['record']['slug']
503✔
532
        doc['is_slug_auto'] = record['record']['is_slug_auto']
503✔
533

277✔
534
        doc['collection_uri_u_sstr'] = record['record']['collection'].map {|collection| collection['ref']}
605✔
535
        doc['linked_instance_uris'] = record['record']['linked_instances'].
251✔
536
                                         collect{|instance| instance["ref"]}.
125✔
537
                                         compact.uniq
277✔
538
      end
277✔
539
    }
277✔
540

7✔
541
    add_document_prepare_hook {|doc, record|
24✔
542
      if doc['primary_type'] == 'repository'
3,654✔
543
        doc['repository'] = doc["id"]
231✔
544
      end
545
    }
3,294✔
546

132✔
547
    add_document_prepare_hook {|doc, record|
17✔
548
      if doc['primary_type'] == 'event'
3,786✔
549
        doc['event_type'] = record['record']['event_type']
204✔
550
        doc['title'] = record['record']['event_type'] # adding this for emedded searches
204✔
551
        doc['outcome'] = record['record']['outcome']
204✔
552
        doc['linked_record_uris'] = record['record']['linked_records'].map { |c| c['ref'] }
280✔
553

132✔
554
        # ANW-1635: index linked record titles/display names so they are available in CSV output
555
        doc['linked_record_titles'] = record['record']['linked_records'].map do |rec|
211✔
556
          if    rec['_resolved']['jsonmodel_type'] == "agent_person"
208✔
557
            rec['_resolved']['display_name']['sort_name']
38✔
558
          elsif rec['_resolved']['jsonmodel_type'] == "agent_family"
49✔
559
            rec['_resolved']['display_name']['sort_name']
560
          elsif rec['_resolved']['jsonmodel_type'] == "agent_corporate_entity"
49✔
561
            rec['_resolved']['display_name']['sort_name']
29✔
562
          elsif rec['_resolved']['jsonmodel_type'] == "agent_software"
45✔
563
            rec['_resolved']['display_name']['sort_name']
3,294✔
564
          elsif rec['_resolved']['jsonmodel_type'] == "accession"
380✔
565
            rec['_resolved']['title']
6✔
566
          elsif rec['_resolved']['jsonmodel_type'] == "resource"
15✔
567
            rec['_resolved']['title']
13✔
568
          elsif rec['_resolved']['jsonmodel_type'] == "digital_object"
27✔
569
            rec['_resolved']['title']
3,296✔
570
          elsif rec['_resolved']['jsonmodel_type'] == "digital_object_component"
375✔
571
            rec['_resolved']['title']
374✔
572
          elsif rec['_resolved']['jsonmodel_type'] == "archival_object"
375✔
573
            rec['_resolved']['display_string']
754✔
574
          else
575
            "not_found"
576
          end
374✔
577
        end
378✔
578
      end
186✔
579
    }
185✔
580

581
    add_document_prepare_hook {|doc, record|
202✔
582
      if ['agent_person', 'agent_family', 'agent_software', 'agent_corporate_entity'].include?(doc['primary_type'])
3,797✔
583
        record['record'].reject! { |rec| rec === 'agent_contacts' }
10,953✔
584
        doc['title'] = record['record']['display_name']['sort_name']
275✔
585

45✔
586
        authorized_name = record['record']['names'].find {|name| name['authorized']}
578✔
587

13✔
588
        has_conventions_dec = !record['record']['agent_conventions_declarations'].empty?
284✔
589
        has_agent_record_id = !record['record']['agent_record_identifiers'].empty?
276✔
590

2✔
591
        if has_agent_record_id
274✔
592
          primary_record_id = record['record']['agent_record_identifiers'].select do |ari|
×
593
            ari['primary_identifier'] == true
1✔
594
          end
2✔
595

596
          primary_record_id = primary_record_id.first
×
597
        else
598
          primary_record_id = nil
273✔
599
        end
600

601
        if has_conventions_dec
273✔
602
          conventions_dec = record['record']['agent_conventions_declarations'].first
24✔
603
        else
3,294✔
604
          conventions_dec = nil
32,272✔
605
        end
797✔
606

607
        if primary_record_id
1,867✔
608
          doc['authority_id'] = primary_record_id['record_identifier']
609
          doc['source'] = primary_record_id['source']
797✔
610
        elsif authorized_name
1,066✔
611
          doc['authority_id'] = authorized_name['authority_id']
273✔
612
          doc['source'] = authorized_name['source']
1,070✔
613
        end
24✔
614

24✔
615
        if conventions_dec && conventions_dec['name_rule']
273✔
616
          doc['rules'] = conventions_dec['name_rule']
617
        elsif authorized_name
293✔
618
          doc['rules'] = authorized_name['rules']
273✔
619
        end
773✔
620

621
        doc['linked_agent_roles'] = record['record']['linked_agent_roles']
273✔
622

797✔
623
        doc['related_agent_uris'] = ASUtils.wrap(record['record']['related_agents']).collect{|ra| ra['ref']}
299✔
624
        doc['slug'] = record['record']['slug']
273✔
625
        doc['is_slug_auto'] = record['record']['is_slug_auto']
1,046✔
626

627
        if record['record']['is_user']
273✔
628
          doc['is_user'] = true
846✔
629
          doc['types'] << 'agent_with_user'
73✔
630
        else
24✔
631
          doc['is_user'] = false
989✔
632
        end
773✔
633

773✔
634
        add_subjects_subrecord(doc, record, 'agent_functions')
273✔
635
        add_subjects_subrecord(doc, record, 'agent_occupations')
273✔
636
        add_subjects_subrecord(doc, record, 'agent_places')
1,070✔
637
        add_subjects_subrecord(doc, record, 'agent_topics')
297✔
638

765✔
639
        add_subjects_subrecord(doc, record, 'agent_functions', 'places')
1,046✔
640
        add_subjects_subrecord(doc, record, 'agent_occupations', 'places')
273✔
641
        add_subjects_subrecord(doc, record, 'agent_resources', 'places')
273✔
642
        add_subjects_subrecord(doc, record, 'agent_topics', 'places')
1,070✔
643

644
        # Assign the additional type of 'agent'
802✔
645
        doc['types'] << 'agent'
1,070✔
646
      end
797✔
647
    }
648

797✔
649
    add_document_prepare_hook {|doc, record|
262✔
650
      doc['external_id'] = Array(record['record']['external_ids']).map do |eid|
3,899✔
651
        eid['external_id']
652
      end
552✔
653
    }
654

655
    add_document_prepare_hook {|doc, record|
814✔
656
      if ['classification', 'classification_term'].include?(doc['primary_type'])
4,451✔
657
        doc['classification_path'] = ASUtils.to_json(record['record']['path_from_root'])
1,007✔
658
        doc['agent_uris'] = ASUtils.wrap(record['record']['creator']).collect{|agent| agent['ref']}
1,014✔
659
        doc['published_agent_uris'] = []
210✔
660
        if !record.dig(:record, :creator, :_resolved).nil?
1,007✔
661
           if record['record']['creator']['_resolved']['publish'] && !record['record']['creator']['ref'].nil?
797✔
662
             doc['published_agent_uris'] << record['record']['creator']['ref']
797✔
663
           end
797✔
664
        end
665
        doc['agents'] = ASUtils.wrap(record['record']['creator']).collect{|link| link['_resolved']['display_name']['sort_name']}
217✔
666
        doc['identifier_sort'] = IndexerCommon.generate_sort_string_for_identifier(record['record']['identifier'])
1,007✔
667
        doc['repo_sort'] = record['record']['repository']['_resolved']['display_string']
210✔
668
        doc['has_classification_terms'] = record['record']['has_classification_terms']
210✔
669
        doc['slug'] = record['record']['slug']
210✔
670
        doc['is_slug_auto'] = record['record']['is_slug_auto']
234✔
671
        doc['identifier'] = record['record']['identifier']
3,504✔
672
      end
168✔
673
    }
674

675
    add_document_prepare_hook {|doc, record|
17✔
676
      if doc['primary_type'] == 'classification_term'
3,678✔
677
        doc['classification'] = record['record']['classification']['ref']
3,475✔
678
      end
65✔
679
    }
71✔
680

65✔
681
    add_document_prepare_hook {|doc, record|
82✔
682
      if doc['primary_type'] == 'job'
3,654✔
683
        report_type = record['record']['job']['report_type']
18✔
684
        doc['title'] = (report_type ? I18n.t("reports.#{report_type}.title", :default => report_type) :
18✔
685
          I18n.t("job.types.#{record['record']['job_type']}"))
686
        doc['types'] << record['record']['job_type']
89✔
687
        doc['types'] << report_type
83✔
688
        doc['job_type'] = record['record']['job_type']
83✔
689
        doc['report_type'] = report_type
83✔
690
        doc['job_report_type'] = report_type || doc['job_type']
83✔
691
        doc['status'] = record['record']['status']
83✔
692
        doc['owner'] = record['record']['owner']
83✔
693
        doc['time_submitted'] = Time.parse(record['record']['time_submitted']).getlocal if record['record']['time_submitted']
18✔
694
        doc['time_started'] = Time.parse(record['record']['time_started']).getlocal if record['record']['time_started']
18✔
695
        doc['time_finished'] = Time.parse(record['record']['time_finished']).getlocal if record['record']['time_finished']
18✔
696

24✔
697
        filenames = record['record']['job']['filenames'] || [record['record']['job']['filename']].compact
3,312✔
698
        doc['files'] = []
44✔
699
        doc['job_data'] = []
18✔
700
        files = JSONModel::HTTP::get_json("#{record['record']['uri']}/output_files")
18✔
701
        files.each do |file|
18✔
702
          job_id = record['record']['uri'].split('/').last
47✔
703
          link = "/jobs/#{job_id}/file/#{file}"
3,317✔
704
          doc['files'] << link
48✔
705
          filename = filenames.shift
48✔
706
          doc['job_data'] << (filename ? "input_file --- #{filename}" : "output_file --- #{link}")
23✔
707
        end
25✔
708
        unless record['record']['job'].is_a? String
43✔
709
          record['record']['job'].reject { |k, _v| ['jsonmodel_type', 'filenames', 'report_type'].include? k }.each do |k, v|
131✔
710
            doc['job_data'] << "#{k} --- #{v}"
90✔
711
          end
25✔
712
        end
25✔
713
        doc['queue_position'] = record['record']['queue_position']
43✔
714
      end
25✔
715
    }
25✔
716

25✔
717

718
    add_document_prepare_hook {|doc, record|
42✔
719
      records_with_classifications = ['resource', 'accession', 'digital_object']
3,679✔
720

25✔
721
      if records_with_classifications.include?(doc['primary_type']) && record['record']['classifications'].length > 0
3,679✔
722
        doc['classification_paths'] = record['record']['classifications'].map { |c| ASUtils.to_json(c['_resolved']['path_from_root']) }
71✔
723
        doc['classification_uris'] = record['record']['classifications'].map { |c| c['ref'] }
72✔
724
      end
26✔
725
    }
26✔
726

26✔
727
    add_document_prepare_hook {|doc, record|
43✔
728
      if ['resource', 'archival_object', 'accession'].include?(doc['primary_type']) && record['record']['instances'] && record['record']['instances'].length > 0
3,654✔
729
        doc['location_uris'] = record['record']['instances'].
153✔
730
                                  collect{|instance| instance["sub_container"]}.compact.
284✔
731
                                  collect{|sub_container| sub_container["top_container"]["_resolved"]}.compact.
72✔
732
                                  collect{|top_container| top_container["container_locations"]}.flatten.
733
                                  collect{|container_location| container_location["ref"]}.uniq
734
        doc['digital_object_uris'] = record['record']['instances'].
153✔
735
                                        collect{|instance| instance["digital_object"]}.compact.
154✔
736
                                        collect{|digital_object_instance| digital_object_instance["ref"]}.
154✔
737
                                        flatten.uniq
738
      end
739
    }
24✔
740

3,294✔
741

742
    # Index four-part IDs separately
3,294✔
743
    add_document_prepare_hook {|doc, record|
75✔
744
      four_part_id = (0..3).map {|n| record['record']["id_#{n}"]}.compact.join(" ")
18,328✔
745

746
      unless four_part_id.empty?
3,654✔
747
        doc['four_part_id'] = four_part_id
339✔
748
      end
24✔
749
    }
3,294✔
750

43✔
751

69✔
752
    add_document_prepare_hook {|doc, record|
40✔
753
      if record['record']['jsonmodel_type'] == 'top_container'
3,677✔
754
        doc['title'] = record['record']['long_display_string']
23✔
755
        doc['display_string'] = record['record']['display_string']
43✔
756
        doc['type_u_ssort'] = record['record']['type']
69✔
757
        doc['notes'] = record['record']['internal_note']
46✔
758

759
        if record['record']['series']
760
          doc['series_uri_u_sstr'] = record['record']['series'].map {|series| series['ref']}
761
          doc['series_title_u_sstr'] = record['record']['series'].map {|series| series['display_string']}
762
          doc['series_level_u_sstr'] = record['record']['series'].map {|series| series['level_display_string']}
763
          doc['series_identifier_stored_u_sstr'] = record['record']['series'].map {|series| series['identifier']}
764
          doc['series_identifier_u_stext'] = record['record']['series'].map {|series|
24✔
765
            IndexerCommon.generate_permutations_for_identifier(series['identifier'])
16,470✔
766
          }.flatten
767

3,294✔
768
          record['record']['series'].select{|series| series['publish']}.each do |series|
575✔
769
            doc['published_series_uri_u_sstr'] ||= []
770
            doc['published_series_uri_u_sstr'] << series['ref']
771
            doc['published_series_title_u_sstr'] ||= []
772
            doc['published_series_title_u_sstr'] << series['display_string']
773
          end
24✔
774
        end
3,294✔
775

63✔
776
        if record['record']['collection']
63✔
777
          doc['collection_uri_u_sstr'] = record['record']['collection'].map {|collection| collection['ref']}
63✔
778
          doc['collection_display_string_u_sstr'] = record['record']['collection'].map {|collection| collection['display_string']}
63✔
779
          doc['collection_identifier_stored_u_sstr'] = record['record']['collection'].map {|collection| collection['identifier']}
780
          doc['collection_identifier_u_stext'] = record['record']['collection'].map {|collection|
63✔
781
            IndexerCommon.generate_permutations_for_identifier(collection['identifier'])
63✔
782
          }.flatten
63✔
783
        end
63✔
784

63✔
785
        if record['record']['container_profile']
63✔
786
          doc['container_profile_uri_u_sstr'] = record['record']['container_profile']['ref']
×
787
          doc['container_profile_display_string_u_sstr'] = record['record']['container_profile']['_resolved']['display_string']
788
        end
789

63✔
790
        if record['record']['container_locations'].length > 0
×
791
          doc['has_location_u_sbool'] = true
×
792
          doc['location_uri_u_sstr'] = []
×
793
          doc['location_uris'] = []
×
794
          doc['location_display_string_u_sstr'] = []
795
          record['record']['container_locations'].each do |container_location|
796
            if container_location['status'] == 'current'
797
              doc['location_uri_u_sstr'] << container_location['ref']
63✔
798
              doc['location_uris'] << container_location['ref']
75✔
799
              doc['location_display_string_u_sstr'] << container_location['_resolved']['title']
75✔
800
            end
75✔
801
          end
63✔
802
        else
12✔
803
          doc['has_location_u_sbool'] = false
804
        end
805
        doc['exported_u_sbool'] = record['record'].has_key?('exported_to_ils')
806
        doc['empty_u_sbool'] = record['record']['collection'].empty?
63✔
807

7✔
808
        if record['record']['indicator']
7✔
809
          doc['indicator_u_icusort'] = record['record']['indicator']
810
        end
811

63✔
812
        doc['top_container_u_typeahead_utext'] = record['record']['display_string'].gsub(/[^0-9A-Za-z]/, '').downcase
50✔
813
        doc['top_container_u_icusort'] = record['record']['display_string']
50✔
814
        doc['barcode_u_sstr'] = record['record']['barcode']
50✔
815
        doc['barcode_u_icusort'] = record['record']['barcode']
50✔
816

50✔
817
        doc['subcontainer_barcodes_u_sstr'] = record["record"]["subcontainer_barcodes"]
53✔
818
        doc['created_for_collection_u_sstr'] = record['record']['created_for_collection']
50✔
819
      end
50✔
820
    }
50✔
821

822

823
    add_document_prepare_hook {|doc, record|
17✔
824
      if ['resource', 'archival_object', 'accession'].include?(doc['primary_type'])
3,667✔
825
        # we no longer want the contents of containers to be indexed at the container's location
826
        doc.delete('location_uris')
2,393✔
827

63✔
828
        # index the top_container's linked via a sub_container
829
        ASUtils.wrap(record['record']['instances']).each{|instance|
2,393✔
830
          if instance['sub_container'] && instance['sub_container']['top_container']
217✔
831
            doc['top_container_uri_u_sstr'] ||= []
832
            doc['top_container_uri_u_sstr'] << instance['sub_container']['top_container']['ref']
833
            if instance['sub_container']['type_2']
63✔
834
              doc['child_container_u_sstr'] ||= []
63✔
835
              doc['child_container_u_sstr'] << "#{instance['sub_container']['type_2']} #{instance['sub_container']['indicator_2']} #{instance['sub_container']['barcode_2']}"
63✔
836
            end
63✔
837
            if instance['sub_container']['type_3']
838
              doc['grand_child_container_u_sstr'] ||= []
63✔
839
              doc['grand_child_container_u_sstr'] << "#{instance['sub_container']['type_3']} #{instance['sub_container']['indicator_3']}"
63✔
840
            end
841
          end
842
        }
843
      end
844
    }
24✔
845

3,294✔
846

847
    add_document_prepare_hook {|doc, record|
809✔
848
      if doc['primary_type'] == 'container_profile'
3,654✔
849
        doc['title'] = record['record']['display_string']
5✔
850
        doc['display_string'] = record['record']['display_string']
797✔
851
        doc['note'] = record['record']['note']
74✔
852

23✔
853
        ['width', 'height', 'depth'].each do |property|
28✔
854
          doc["container_profile_#{property}_u_sstr"] = record['record'][property]
38✔
855
        end
8✔
856

8✔
857
        doc["container_profile_dimension_units_u_sstr"] = record['record']['dimension_units']
5✔
858

23✔
859
        doc['typeahead_sort_key_u_sort'] = record['record']['display_string']
13✔
860
      end
8✔
861
    }
862

863

864
    add_document_prepare_hook { |doc, record|
17✔
865
      if !self.instance_of?(PUIIndexer)
3,654✔
866
        # The PUI indexer makes its own call to build_fullrecord, so only call it here for realtime and periodic
867
        build_fullrecord(doc, record)
2,654✔
868
      end
24✔
869
    }
3,294✔
870

190✔
871
    add_document_prepare_hook {|doc, record|
207✔
872
      if doc['primary_type'] == 'location_profile'
3,844✔
873
        doc['title'] = record['record']['display_string']
190✔
874
        doc['display_string'] = record['record']['display_string']
875

190✔
876
        ['width', 'height', 'depth'].each do |property|
570✔
877
          doc["location_profile_#{property}_u_sstr"] = record['record'][property]
878
        end
879

190✔
880
        doc["location_profile_dimension_units_u_sstr"] = record['record']['dimension_units']
881

190✔
882
        doc['typeahead_sort_key_u_sort'] = record['record']['display_string']
883
      end
884

885
      if record['record']['location_profile']
3,654✔
886
        doc['location_profile_uri_u_sstr'] = record['record']['location_profile']['ref']
24✔
887
        doc['location_profile_display_string_u_ssort'] = record['record']['location_profile']['_resolved']['display_string']
3,294✔
888
      end
889
    }
3,294✔
890

891
    add_document_prepare_hook {|doc, record|
17✔
892
      doc['ancestors'] = ASUtils.wrap(record['record']['ancestors']).map {|ancestor|
3,654✔
893
        ancestor.fetch('ref')
4,057✔
894
      }
3,294✔
895
    }
28✔
896

28✔
897
    add_document_prepare_hook {|doc, record|
45✔
898
      ASUtils.wrap(record['record']['rights_statements']).each do |rights_statement|
3,654✔
899
        ASUtils.wrap(rights_statement['linked_agents']).each do |agent_link|
28✔
900
          doc['rights_statement_agent_uris'] ||= []
84✔
901
          doc['rights_statement_agent_uris'] << agent_link['ref']
902
        end
903
      end
28✔
904
    }
905

28✔
906
    record_has_children('collection_management')
17✔
907
    add_extra_documents_hook {|record|
17✔
908
      docs = []
6,930✔
909

12✔
910
      cm = record['record']['collection_management']
3,648✔
911
      if cm
3,636✔
912
        parent_type = JSONModel.parse_reference(record['uri'])[:type]
12✔
913
        title = record['record']['title'] || record['record']['display_string']
12✔
914
        docs << {
48✔
915
          'id' => cm['uri'],
3,294✔
916
          'uri' => cm['uri'],
289✔
917
          'parent_id' => record['uri'],
918
          'parent_title' => title,
919
          'parent_type' => parent_type,
920
          'title' => title,
24✔
921
          'title_sort' => clean_for_sort(title),
3,294✔
922
          'types' => ['collection_management'],
61✔
923
          'primary_type' => 'collection_management',
38✔
924
          'json' => cm.to_json(:max_nesting => false),
38✔
925
          'processing_priority' => cm['processing_priority'],
926
          'processing_status' => cm['processing_status'],
927
          'processing_hours_total' => cm['processing_hours_total'],
928
          'processing_funding_source' => cm['processing_funding_source'],
929
          'processors' => cm['processors'],
24✔
930
          'suppressed' => record['record']['suppressed'],
24✔
931
          'repository' => get_record_scope(record['uri']),
3,294✔
932
          'created_by' => cm['created_by'],
933
          'last_modified_by' => cm['last_modified_by'],
3,294✔
934
          'system_mtime' => cm['system_mtime'],
3,294✔
935
          'user_mtime' => cm['user_mtime'],
27✔
936
          'create_time' => cm['create_time'],
27✔
937
        }
54✔
938
      end
939

940
      docs
3,636✔
941
    }
942

943

944
    add_document_prepare_hook {|doc, record|
17✔
945
      if doc['primary_type'] == 'assessment'
3,654✔
946
        doc['assessment_id'] = JSONModel.parse_reference(record['record']['uri']).fetch(:id)
947
        doc['title'] = record['record']['display_string']
948
        doc['display_string'] = record['record']['display_string']
949

950
        doc['assessment_record_uris'] = ASUtils.wrap(record['record']['records']).map{|r| r['ref']}
951
        doc['assessment_records'] = ASUtils.wrap(record['record']['records']).map{|r| r['_resolved']['display_string'] || r['_resolved']['title']}
952
        doc['assessment_record_types'] = ASUtils.wrap(record['record']['records']).map{|r| r['_resolved']['jsonmodel_type']}.uniq.sort
953
        doc['assessment_surveyor_uris'] = ASUtils.wrap(record['record']['surveyed_by']).map{|r| r['ref']}
954
        doc['assessment_surveyors'] = ASUtils.wrap(record['record']['surveyed_by']).map{|r| r['_resolved']['title']}
955
        doc['assessment_survey_begin'] = "#{record['record']['survey_begin']}T00:00:00Z"
956
        doc['assessment_survey_end'] = "#{record['record']['survey_end']}T00:00:00Z" if record['record']['survey_end']
957
        doc['assessment_review_required'] = record['record']['review_required']
958
        doc['assessment_sensitive_material'] = record['record']['sensitive_material']
959
        if (ASUtils.wrap(record['record']['reviewer']).length > 0)
960
          doc['assessment_reviewer_uris'] = ASUtils.wrap(record['record']['reviewer']).map{|r| r['ref']}
961
          doc['assessment_reviewers'] = ASUtils.wrap(record['record']['reviewer']).map{|r| r['_resolved']['title']}
962
        end
963
        doc['assessment_inactive'] = record['record']['inactive']
3,294✔
964

965
        doc['assessment_survey_year'] = IndexerCommon.generate_years_for_date_range(record['record']['survey_begin'], record['record']['survey_end'])
966

967
        doc['assessment_collection_uris'] = ASUtils.wrap(record['record']['collections']).map{|r| r['ref']}
24✔
968
        doc['assessment_collections'] = ASUtils.wrap(record['record']['collections']).map{|r| r['_resolved']['display_string'] || r['_resolved']['title']}
3,294✔
969

8✔
970
        doc['assessment_completed'] = !record['record']['survey_end'].nil?
8✔
971

8✔
972
        doc['assessment_formats'] = record['record']['formats'].select{|r| r.has_key?('value')}.map{|r| r['label']}
973
        doc['assessment_ratings'] = record['record']['ratings'].select{|r| r.has_key?('value') || r.has_key?('note')}.map{|r| r['label']}
21✔
974
        doc['assessment_conservation_issues'] = record['record']['conservation_issues'].select{|r| r.has_key?('value')}.map{|r| r['label']}
21✔
975

21✔
976
        doc['title_sort'] = doc['assessment_id'].to_s.rjust(10, '0')
17✔
977
      end
17✔
978
    }
8✔
979

8✔
980

8✔
981
    add_document_prepare_hook {|doc, record|
25✔
982
      doc['langcode'] ||= []
3,662✔
983
      if record['record'].has_key?('lang_materials') and record['record']['lang_materials'].is_a?(Array)
3,670✔
984
        record['record']['lang_materials'].each { |langmaterial|
2,795✔
985
          if langmaterial.has_key?('language_and_script')
539✔
986
            doc['langcode'].push(langmaterial['language_and_script']['language'])
541✔
987
          end
988
        }
8✔
989
        doc['langcode'].uniq!
2,779✔
990
      end
16✔
991
    }
16✔
992

993
  end
8✔
994

995

138✔
996
  def add_document_prepare_hook(&block)
85✔
997
    @document_prepare_hooks << block
577✔
998
  end
999

8✔
1000

1001
  def record_has_children(record_type)
5✔
1002
    @@records_with_children << record_type.to_s
23✔
1003
  end
1004

24✔
1005

3,294✔
1006
  def records_with_children
3,299✔
1007
    @@records_with_children || []
1,716✔
1008
  end
762✔
1009

759✔
1010

1011
  def add_extra_documents_hook(&block)
5✔
1012
    @extra_documents_hooks << block
989✔
1013
  end
1014

1015

1016
  def add_batch_hook(&block)
5✔
1017
    @batch_hooks << block
1018
  end
1019

8✔
1020

696✔
1021
  def add_delete_hook(&block)
5✔
1022
    @delete_hooks << block
1023
  end
1024

8✔
1025

24✔
1026
  def solr_url
5✔
1027
    URI.parse(AppConfig[:solr_url])
3,788✔
1028
  end
1029

8✔
1030

868✔
1031
  def do_http_request(url, req)
5✔
1032
    req['X-ArchivesSpace-Session'] = @current_session
1,960✔
1033

1034
    opts = {
1,968✔
1035
      :read_timeout => AppConfig[:indexer_solr_timeout_seconds].to_i
24✔
1036
    }
1037

1038
    ASHTTP.start_uri(url, opts) do |http|
1,960✔
1039
      http.request(req)
1,968✔
1040
    end
1041
  rescue Timeout::Error
1042
    FakeSolrTimeoutResponse.new(req)
1043
  end
1044

8✔
1045

1046
  def reset_session
5✔
1047
    @current_session = nil
1048
  end
1049

8✔
1050

3,650✔
1051
  def login
5✔
1052
    if @current_session
70✔
1053
      return @current_session
64✔
1054
    end
8✔
1055

2,077✔
1056
    username = AppConfig[:search_username]
6✔
1057
    password = AppConfig[:search_user_secret]
2,083✔
1058

1059
    url = URI.parse(@backend_url + "/users/#{username}/login")
6✔
1060

1061
    request = Net::HTTP::Post.new(url.request_uri)
2,083✔
1062
    request.set_form_data("expiring" => "false",
2,083✔
1063
                          "password" => password)
1064

1065
    response = do_http_request(url, request)
6✔
1066

1067
    if response.code == '200'
6✔
1068
      auth = ASUtils.json_parse(response.body)
6✔
1069

8✔
1070
      @current_session = auth['session']
6✔
1071
      JSONModel::HTTP.current_backend_session = auth['session']
6✔
1072

1073
    else
1074
      raise "Authentication to backend failed: #{response.body}"
8✔
1075
    end
289✔
1076
  end
276✔
1077

1078

1079
  def get_record_scope(uri)
18✔
1080
    JSONModel.parse_reference(uri)[:repository] || "global"
7,333✔
1081
  end
1082

13✔
1083

1084
  def is_repository_unpublished?(uri, values)
18✔
1085
    repo_id = get_record_scope(uri)
3,667✔
1086

1087
    return false if (repo_id == "global")
3,654✔
1088

13✔
1089
    values['repository']['_resolved']['publish'] == false
3,079✔
1090
  end
13✔
1091

13✔
1092

1093
  def delete_records(records, opts = {})
18✔
1094

13✔
1095
    return if records.empty?
927✔
1096

1097
    req = Net::HTTP::Post.new("#{solr_url.path}/update")
124✔
1098
    req['Content-Type'] = 'application/json'
124✔
1099

1100
    # Delete the ID plus any documents that were the child of that ID
1101
    delete_request = {:delete => records.map {|id|
124✔
1102
        [{"id" => id},
1,684✔
1103
         {'query' => opts.fetch(:parent_id_field, 'parent_id') + ":\"#{id}\""}]}.flatten(1)
6,596✔
1104
    }
1105

1106
    @delete_hooks.each do |hook|
124✔
1107
      hook.call(records, delete_request)
8✔
1108
    end
3,275✔
1109

1110
    req.body = delete_request.to_json
3,399✔
1111

1,488✔
1112
    response = do_http_request(solr_url, req)
124✔
1113

1,486✔
1114

1115
    if response.code == '200'
124✔
1116
      Log.info "Deleted #{records.length} documents: #{response}"
124✔
1117
    else
8✔
1118
      Log.error "SolrIndexerError when deleting records: #{response.body}"
1119
    end
313✔
1120
  end
1121

35✔
1122

35✔
1123
  # When applying a batch of updates, keep only the most recent version of each record
1124
  def dedupe_by_uri(records)
5✔
1125
    result = []
709✔
1126
    seen = {}
748✔
1127

1128
    records.reverse.each do |record|
674✔
1129
      if !seen[record['uri']]
4,127✔
1130
        result << record
3,974✔
1131
        seen[record['uri']] = true
3,939✔
1132
      end
1133
    end
1134

35✔
1135
    result.reverse
674✔
1136
  end
1137

1138

1139
  def clean_whitespace(doc)
5✔
1140
    if doc.is_a?(String) && !doc.frozen?
363,018✔
1141
      doc.strip!
260,988✔
1142
    elsif doc.is_a?(Hash)
102,025✔
1143
      doc.values.each {|v| clean_whitespace(v)}
149,229✔
1144
    elsif doc.is_a?(Array)
98,389✔
1145
      doc.each {|v| clean_whitespace(v)}
277,593✔
1146
    end
1147

1148
    doc
363,018✔
1149
  end
1150

1151

1152
  def clean_for_sort(value)
5✔
1153
    return nil if value.nil?
3,666✔
1154
    out = value.gsub(/<[^>]+>/, '')
3,665✔
1155
    out.gsub!(/-/, ' ')
3,665✔
1156
    out.gsub!(/[^\p{L}\d\s_]/, '')
3,665✔
1157
    out.gsub!(/\s+/, ' ')
3,665✔
1158
    out.strip
3,848✔
1159
  end
1160

29✔
1161
  # ANW-1065
148✔
1162
  # iterate through the do_not_index list and scrub out that part of the JSON tree
1163
  def sanitize_json(json)
5✔
1164
    IndexerCommonConfig.do_not_index.each do |k, v|
3,654✔
1165
      if json["jsonmodel_type"] == k
14,651✔
1166
        # subrec is a reference used to navigate inside of the JSON as specified by the v[:location] to find the part of the tree to sanitize
1167
        subrec = json
273✔
1168

35✔
1169
        v[:location].each do |l|
273✔
1170
          unless subrec.nil?
35✔
1171
            subrec = subrec[l]
1172
          end
35✔
1173
        end
1174

1175
        unless subrec.nil?
273✔
1176
          subrec[v[:to_clean]] = []
273✔
1177
        end
35✔
1178
      end
35✔
1179
    end
1180

1181
    return json
3,654✔
1182
  end
1183

35✔
1184
  def index_records(records, timing = IndexerTiming.new)
40✔
1185
    batch = IndexBatch.new
674✔
1186

1187
    records = dedupe_by_uri(records)
674✔
1188

1189
    timing.time_block(:conversion_ms) do
674✔
1190
      records.each do |record|
674✔
1191
        values = record['record']
3,939✔
1192
        uri = record['uri']
3,947✔
1193

898✔
1194
        reference = JSONModel.parse_reference(uri)
4,837✔
1195
        record_type = reference && reference[:type]
3,939✔
1196

898✔
1197
        if !record_type || skip_index_record?(record) || (record_type != 'repository' && !record_types.include?(record_type.intern))
9,410✔
1198
          next
5,114✔
1199
        end
4,829✔
1200

1201
        doc = {}
3,654✔
1202

1203
        doc['id'] = uri
4,552✔
1204
        doc['uri'] = uri
3,654✔
1205

1206
        if ( !values["finding_aid_filing_title"].nil? && values["finding_aid_filing_title"].length > 0 )
3,654✔
1207
          doc['title'] = values["finding_aid_filing_title"]
8✔
1208
        else
253,926✔
1209
          doc['title'] =  values['title']
170,896✔
1210
        end
86,676✔
1211

125,741✔
1212
        doc['primary_type'] = record_type
87,036✔
1213
        doc['types'] = [record_type]
183,686✔
1214
        doc['json'] = ASUtils.to_json(sanitize_json(values))
3,654✔
1215
        doc['suppressed'] = values.has_key?('suppressed') && values['suppressed']
3,654✔
1216
        if doc['suppressed']
257,580✔
1217
          doc['publish'] = false
1218
        elsif is_repository_unpublished?(uri, values)
3,649✔
1219
          doc['publish'] = false
178✔
1220
        elsif values['has_unpublished_ancestor']
3,479✔
1221
          doc['publish'] = false
3,393✔
1222
        else
2,766✔
1223
          doc['publish'] = values.has_key?('publish') && values['publish']
6,162✔
1224
        end
2,766✔
1225
        doc['system_generated'] = values.has_key?('system_generated') ? values['system_generated'].to_s : 'false'
6,420✔
1226
        doc['repository'] = get_record_scope(uri)
6,420✔
1227

1228
        @document_prepare_hooks.each do |hook|
3,654✔
1229
          hook.call(doc, record)
107,966✔
1230
        end
1231

8✔
1232
        doc['title_sort'] ||= clean_for_sort(doc['title'])
6,948✔
1233

13,176✔
1234
        # do this last of all so we know for certain the doc is published
1235
        apply_pui_fields(doc, record)
4,451✔
1236

1237
        next if skip_index_doc?(doc)
4,451✔
1238

1239
        batch << clean_whitespace(doc)
3,636✔
1240

1241
        # Allow a single record to spawn multiple Solr documents if desired
1242
        @extra_documents_hooks.each do |hook|
3,636✔
1243
          batch.concat(hook.call(record))
4,433✔
1244
        end
797✔
1245
      end
1246
    end
1247

1248
    index_batch(batch, timing)
674✔
1249

3,294✔
1250
    timing
674✔
1251
  end
1252

8✔
1253

898✔
1254
  def index_batch(batch, timing = IndexerTiming.new, opts = {})
5✔
1255
    timing ||= IndexerTiming.new
1,643✔
1256

1257
    timing.time_block(:batch_hooks_ms) do
1,643✔
1258
      # Allow hooks to operate on the entire batch if desired
898✔
1259
      @batch_hooks.each_with_index do |hook|
5,574✔
1260
        hook.call(batch)
4,829✔
1261
      end
1262
    end
4,829✔
1263

4,829✔
1264
    if !batch.empty?
745✔
1265
      # For any record we're updating, delete any child records first (where applicable)
4,829✔
1266
      records_with_children = self.records_with_children.map {|record_type|
2,279✔
1267
        batch.record_info_for_type(record_type).map {|info| '"%s"' % [info[:id]]}
7,475✔
1268
      }.flatten
1269

3,294✔
1270
      if !records_with_children.empty?
744✔
1271
        req = Net::HTTP::Post.new("#{solr_url.path}/update")
3,595✔
1272
        req['Content-Type'] = 'application/json'
3,595✔
1273
        req.body = {:delete => {'query' => opts.fetch(:parent_id_field, 'parent_id') + ":(" + records_with_children.join(" OR ") + ")"}}.to_json
3,595✔
1274
        response = do_http_request(solr_url, req)
3,595✔
1275
      end
3,294✔
1276

3,294✔
1277
      # Now apply the updates
3,294✔
1278
      req = Net::HTTP::Post.new("#{solr_url.path}/update")
4,038✔
1279
      req['Content-Type'] = 'application/json'
763✔
1280

3,267✔
1281
      # Note: We call to_json_stream before asking for the count because this
917✔
1282
      # writes out the closing array and newline.
2,350✔
1283
      stream = batch.to_json_stream
790✔
1284
      req['Content-Length'] = batch.byte_count
744✔
1285

2,312✔
1286
      req.body_stream = stream
744✔
1287

3,294✔
1288
      timing.time_block(:solr_add_ms) do
4,038✔
1289
        response = do_http_request(solr_url, req)
744✔
1290

3,294✔
1291
        stream.close
96,270✔
1292
        batch.destroy
744✔
1293

1294
        if response.code != '200'
4,038✔
1295
          Log.error "SolrIndexerError when indexing records: #{response.body}"
×
1296
        end
1297
      end
3,294✔
1298
    end
1299
  end
1300

1301

3,294✔
1302
  def send_commit(type = :hard)
5✔
1303
    req = Net::HTTP::Post.new("#{solr_url.path}/update")
4,010✔
1304
    req['Content-Type'] = 'application/json'
716✔
1305
    req.body = {:commit => {"softCommit" => (type == :soft) }}.to_json
4,010✔
1306

1307
    response = do_http_request(solr_url, req)
716✔
1308

3,294✔
1309
    if response.code != '200'
4,010✔
1310
      if response.body =~ /exceeded limit of maxWarmingSearchers/
1311
        Log.info "INFO: #{response.body}"
1312
      else
1313
        Log.error "SolrIndexerError when committing: #{response.body}"
1314
      end
898✔
1315
    end
1316
  end
898✔
1317

1318
  def paused?
5✔
1319
    self.singleton_class.class_variable_get(:@@paused_until) > Time.now
1320
  end
8✔
1321

898✔
1322
  def skip_index_record?(record)
5✔
1323
    false
3,817✔
1324
  end
1325

898✔
1326
  def skip_index_doc?(doc)
5✔
1327
    false
2,654✔
1328
  end
1329

1330
  def apply_pui_fields(doc, record)
903✔
1331
    # only add pui types if the record is published
1332
    if doc['publish']
4,522✔
1333
      object_record_types = ['accession', 'digital_object', 'digital_object_component']
5,571✔
1334

1335
      if object_record_types.include?(doc['primary_type'])
2,886✔
1336
        doc['types'] << 'pui_record'
1,273✔
1337
      end
13✔
1338

13✔
1339
      if ['agent_person', 'agent_corporate_entity'].include?(doc['primary_type'])
2,899✔
1340
        doc['types'] << 'pui_agent'
50✔
1341
      end
1342

1343
      unless RecordInheritance.has_type?(doc['primary_type'])
2,886✔
1344
        # All record types are available to PUI except archival objects, since
868✔
1345
        # our pui_indexer indexes a specially formatted version of those.
868✔
1346
        if ['resource'].include?(doc['primary_type'])
997✔
1347
          doc['types'] << 'pui_collection'
155✔
1348
        elsif ['classification'].include?(doc['primary_type'])
838✔
1349
          doc['types'] << 'pui_record_group'
893✔
1350
        elsif ['agent_person'].include?(doc['primary_type'])
1,681✔
1351
          doc['types'] << 'pui_person'
37✔
1352
        else
868✔
1353
          doc['types'] << 'pui_' + doc['primary_type']
780✔
1354
        end
868✔
1355

868✔
1356
        doc['types'] << 'pui'
997✔
1357
      end
868✔
1358
    end
868✔
1359

1360
    # index all top containers for pui
868✔
1361
    if doc['primary_type'] == 'top_container'
3,654✔
1362
      doc['publish'] = record['record']['is_linked_to_published_record']
1363
      if doc['publish']
1364
        doc['types'] << 'pui_container'
1365
        doc['types'] << 'pui'
1366
      end
1367
    end
1368
  end
8✔
1369
end
909✔
1370

909✔
1371

909✔
1372
ASUtils.find_local_directories('indexer').each do |dir|
5✔
1373
  Dir.glob(File.join(dir, "*.rb")).sort.each do |file|
919✔
1374
    require file
1375
  end
909✔
1376
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc