• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tulibraries / isilon-tracker / 25507167079

07 May 2026 03:59PM UTC coverage: 78.298%. First build
25507167079

Pull #379

github

cdoyle-temple
workaround for different db adaptors
Pull Request #379: IMT-208 match-isilon-assets-to-content-dm

4 of 5 new or added lines in 1 file covered. (80.0%)

920 of 1175 relevant lines covered (78.3%)

15.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.14
/app/services/sync_service/contentdm_filename_sync.rb
1
# frozen_string_literal: true
2

3
require "csv"
1✔
4
require "fileutils"
1✔
5

6
module SyncService
1✔
7
  class ContentdmFilenameSync
1✔
8
    CONTENTDM_FILENAME_MATCH_NOTE = "Filename exists in CONTENTdm"
1✔
9
    SyncResult = Struct.new(
1✔
10
      :updated_count,
11
      :rows_touched,
12
      :rows_matched,
13
      :rows_unmatched,
14
      :rows_discarded,
15
      keyword_init: true
16
    )
17
    CSV_FOLDER = nil
1✔
18
    FILENAME_HEADER = "File Name"
1✔
19
    COLLECTION_HEADER = "Collection"
1✔
20
    NON_MATCHES_CSV_PATH = Rails.root.join("tmp", "contentdm_filename_non_matches.csv")
1✔
21
    BATCH_SIZE = 500
1✔
22
    CONFLICT_WINNERS = {
23
      [ "ambler_filenames.csv", "scrc_photographs_filenames.csv" ] => "ambler_filenames.csv",
1✔
24
      [ "bulletin_photos_filenames.csv", "bulletin_photos_restricted_filenames.csv" ] => "bulletin_photos_filenames.csv",
25
      [ "bulletin_photos_filenames.csv", "inquirer_filenames.csv" ] => "bulletin_photos_filenames.csv",
26
      [ "bulletin_photos_restricted_filenames.csv", "inquirer_filenames.csv" ] => "bulletin_photos_restricted_filenames.csv",
27
      [ "cityparks_filenames.csv", "hadv_filenames.csv" ] => "cityparks_filenames.csv",
28
      [ "inquirer_filenames.csv", "scrc_photographs_filenames.csv" ] => "inquirer_filenames.csv"
29
    }
30

31
    def self.call(csv_folder: CSV_FOLDER)
1✔
32
      new(csv_folder: csv_folder).sync
10✔
33
    end
34

35
    def initialize(csv_folder:)
1✔
36
      @csv_folder = csv_folder.to_s.strip.presence
11✔
37
    end
38

39
    def sync
1✔
40
      validate_csv_folder!
10✔
41
      csv_files = Dir.glob(File.join(@csv_folder, "*.csv")).sort
10✔
42
      raise ArgumentError, "No CSV files found in #{@csv_folder}" if csv_files.empty?
10✔
43

44
      load_result = load_filename_map(csv_files)
9✔
45
      filename_map = load_result[:filename_map]
8✔
46
      non_matches = []
8✔
47
      summary = {
8✔
48
        updated_count: 0,
49
        rows_touched: load_result[:rows_touched],
50
        rows_matched: 0,
51
        rows_unmatched: 0,
52
        rows_discarded: load_result[:rows_discarded]
53
      }
54

55
      if filename_map.empty?
8✔
56
        return SyncResult.new(**summary)
×
57
      end
58

59
      filename_map.each do |collection_name, filenames|
8✔
60
        result = update_matching_assets(collection_name, filenames, non_matches)
11✔
61
        summary[:updated_count] += result[:updated_count]
10✔
62
        summary[:rows_matched] += result[:rows_matched]
10✔
63
        summary[:rows_unmatched] += result[:rows_unmatched]
10✔
64
      end
65
      SyncResult.new(**summary)
7✔
66
    ensure
67
      write_non_matches_csv(non_matches || [])
10✔
68
    end
69

70
    private
1✔
71

72
    def load_filename_map(csv_files)
1✔
73
      filename_entries = Hash.new { |hash, key| hash[key] = [] }
23✔
74
      rows_touched = 0
9✔
75

76
      csv_files.each do |csv_path|
9✔
77
        raise ArgumentError, "CSV file not found: #{csv_path}" unless File.exist?(csv_path)
14✔
78

79
        source_file = File.basename(csv_path)
14✔
80
        CSV.foreach(csv_path, headers: true, liberal_parsing: true) do |row|
14✔
81
          original_filename = row[FILENAME_HEADER].to_s.strip
18✔
82
          filename = normalize_filename(original_filename)
18✔
83
          collection_name = normalize_collection_name(row[COLLECTION_HEADER])
18✔
84
          next if filename.blank? || collection_name.blank?
18✔
85
          rows_touched += 1
18✔
86

87
          filename_entries[filename] << {
18✔
88
            collection_name: collection_name,
89
            original_filename: original_filename,
90
            source_file: source_file
91
          }
92
        end
93
      end
94

95
      rows_discarded = 0
9✔
96
      filename_to_collection = filename_entries.each_with_object({}) do |(filename, entries), resolved|
9✔
97
        winner = entries.first
14✔
98
        entries.drop(1).each do |candidate_entry|
14✔
99
          winner = preferred_entry(filename, winner, candidate_entry)
4✔
100
        end
101
        rows_discarded += entries.size - 1
13✔
102
        resolved[filename] = winner
13✔
103
      end
104

105
      filename_map = filename_to_collection.each_with_object(Hash.new { |hash, key| hash[key] = [] }) do |(filename, entry), grouped|
19✔
106
        grouped[entry[:collection_name]] << {
13✔
107
          normalized_filename: filename,
108
          original_filename: entry[:original_filename]
109
        }
110
      end
111

112
      {
113
        filename_map: filename_map,
8✔
114
        rows_touched: rows_touched,
115
        rows_discarded: rows_discarded
116
      }
117
    end
118

119
    def validate_csv_folder!
1✔
120
      raise ArgumentError, "csv_folder is required" if @csv_folder.blank?
10✔
121

122
      raise ArgumentError, "CSV folder not found: #{@csv_folder}" unless Dir.exist?(@csv_folder)
10✔
123
    end
124

125
    def normalize_filename(filename)
1✔
126
      filename.to_s.strip.downcase.presence
18✔
127
    end
128

129
    def normalize_collection_name(collection_name)
1✔
130
      collection_name.to_s.strip.presence
18✔
131
    end
132

133
    def preferred_entry(filename, existing_entry, candidate_entry)
1✔
134
      return existing_entry if existing_entry[:collection_name] == candidate_entry[:collection_name]
4✔
135

136
      if existing_entry[:source_file] == candidate_entry[:source_file]
3✔
137
        raise ArgumentError,
×
138
              "Conflicting collections for filename '#{filename}' within #{existing_entry[:source_file]}: " \
139
              "'#{existing_entry[:collection_name]}' and '#{candidate_entry[:collection_name]}'"
140
      end
141

142
      winning_file = CONFLICT_WINNERS[[ existing_entry[:source_file], candidate_entry[:source_file] ].sort]
3✔
143
      if winning_file.blank?
3✔
144
        raise ArgumentError,
1✔
145
              "Conflicting collections for filename '#{filename}': '#{existing_entry[:collection_name]}' " \
146
              "(#{existing_entry[:source_file]}) and '#{candidate_entry[:collection_name]}' " \
147
              "(#{candidate_entry[:source_file]})"
148
      end
149

150
      winning_file == candidate_entry[:source_file] ? candidate_entry : existing_entry
2✔
151
    end
152

153
    def update_matching_assets(collection_name, filenames, non_matches)
1✔
154
      collection = find_collection!(collection_name)
11✔
155
      updated_count = 0
10✔
156
      rows_matched = 0
10✔
157
      rows_unmatched = 0
10✔
158

159
      filenames.each_slice(BATCH_SIZE) do |filename_batch|
10✔
160
        normalized_filenames = filename_batch.map { |entry| entry[:normalized_filename] }
22✔
161
        matched_filenames = matching_assets_scope(normalized_filenames)
10✔
162
          .distinct
163
          .pluck(Arel.sql("LOWER(TRIM(isilon_name))"))
164
        rows_matched += matched_filenames.size
10✔
165

166
        unmatched_entries = filename_batch.reject { |entry| matched_filenames.include?(entry[:normalized_filename]) }
22✔
167
        rows_unmatched += unmatched_entries.size
10✔
168
        non_matches.concat(
10✔
169
          unmatched_entries.map do |entry|
170
            {
171
              original_filename: entry[:original_filename],
2✔
172
              collection_name: collection_name
173
            }
174
          end
175
        )
176

177
        updated_count += matching_assets_scope(normalized_filenames).update_all(
10✔
178
          contentdm_collection_id: collection.id,
179
          notes: notes_update_sql,
180
          updated_at: Time.current
181
        )
182
      end
183

184
      {
185
        updated_count: updated_count,
10✔
186
        rows_matched: rows_matched,
187
        rows_unmatched: rows_unmatched
188
      }
189
    end
190

191
    def find_collection!(collection_name)
1✔
192
      ContentdmCollection.find_by!(name: collection_name)
11✔
193
    end
194

195
    def matching_assets_scope(filename_batch)
1✔
196
      IsilonAsset.where("LOWER(TRIM(isilon_name)) IN (?)", filename_batch)
20✔
197
    end
198

199
    def write_non_matches_csv(non_matches)
1✔
200
      FileUtils.mkdir_p(NON_MATCHES_CSV_PATH.dirname)
10✔
201

202
      CSV.open(NON_MATCHES_CSV_PATH, "w") do |csv|
10✔
203
        csv << [ FILENAME_HEADER, COLLECTION_HEADER ]
10✔
204

205
        non_matches.each do |entry|
10✔
206
          csv << [ entry[:original_filename], entry[:collection_name] ]
2✔
207
        end
208
      end
209
    end
210

211
    def notes_update_sql
1✔
212
      quoted_note = ActiveRecord::Base.connection.quote(CONTENTDM_FILENAME_MATCH_NOTE)
11✔
213
      contains_note_sql = note_contains_sql(quoted_note)
11✔
214

215
      Arel.sql(<<~SQL.squish)
11✔
216
        CASE
217
          WHEN notes IS NULL OR TRIM(notes) = '' THEN #{quoted_note}
218
          WHEN #{contains_note_sql} = 0 THEN notes || '; ' || #{quoted_note}
219
          ELSE notes
220
        END
221
      SQL
222
    end
223

224
    def note_contains_sql(quoted_note)
1✔
225
      if ActiveRecord::Base.connection.adapter_name.downcase.include?("postgres")
11✔
NEW
226
        "strpos(notes, #{quoted_note})"
×
227
      else
228
        "instr(notes, #{quoted_note})"
11✔
229
      end
230
    end
231
  end
232
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc