• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

hathitrust / feed / 21404401524

27 Jan 2026 04:10PM UTC coverage: 83.384% (-0.2%) from 83.616%
21404401524

push

github

8421 of 10099 relevant lines covered (83.38%)

471.03 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.38
/lib/HTFeed/ModuleValidator.pm
1
package HTFeed::ModuleValidator;
2

3
use warnings;
23✔
4
use strict;
23✔
5

6
use Carp;
23✔
7
use Data::Dumper qw(Dumper);
23✔
8
use HTFeed::Config qw(get_config);
23✔
9
use HTFeed::XMLNamespaces qw(register_namespaces);
23✔
10
use Log::Log4perl qw(get_logger);
23✔
11
use XML::LibXML;
23✔
12

13
use base qw(HTFeed::XPathValidator);
23✔
14

15
=head1 NAME
16

17
HTFeed::ModuleValidator
18

19
=head1 DESCRIPTION
20

21
        parent class/factory for HTFeed validation plugins
22
        a plugin is responsible for validating Jhove output for **one Jhove
23
        module** as well as runnging any external filetype specific validation
24

25
        For general Jhove output processing see HTFeed::Validator
26

27
=cut
28

29
=head1 SYNOPSIS
30

31
        my $context_node = $xpc->findnodes("$repInfo/$format"."Metadata");
32
        my $validator = HTFeed::ModuleValidator::JPEG2000_hul->new(xpc => $xpc, qlib => $querylib);
33
        if ($validator->validate) {
34
                # SUCCESS code...
35
        } else {
36
                my $errors = $validator->getErrors;
37
                # FAILURE code...
38
        }
39

40
=cut
41

42
sub new {
43
    my $class = shift;
15✔
44

45
    # make empty object, populate with passed parameters
46
    my $object = {
15✔
47
        xpc      => undef,    # XML::LibXML::XPathContext object
48
        volume   => undef,    # HTFeed::Volume
49
        filename => undef,    # string, filename
50
        @_,                   # override blank placeholders with proper values
51

52
        volume_id    => "",
53
        datetime     => "",
54
        artist       => "",
55
        documentname => "",    # set in _setdocumentname
56
    };
57

58
    if ( $class ne __PACKAGE__ ) {
15✔
59
        croak "use __PACKAGE__ constructor to create $class object";
×
60
    }
61

62
    # check parameters
63
    croak "invalid args"
64
      unless ( $object->{xpc}
65
        and $object->{volume}
66
        and $object->{filename}
67
        and $object->{xpc}->isa("XML::LibXML::XPathContext")
68
        and $object->{volume}->isa("HTFeed::Volume") );
15✔
69

70
    # get volume_id
71
    $object->{volume_id} = $object->{volume}->get_objid();
15✔
72

73
    # get file extension
74
    $object->{filename} =~ /\.([0-9a-zA-Z]+)$/;
15✔
75
    my $file_ext = $1;
15✔
76

77
    my $module_validators = $object->{volume}->get_nspkg()->get('module_validators');
15✔
78
    defined $module_validators or croak("No module_validators found for " . $object->{filename});
15✔
79
    my $ext_validator = $module_validators->{$file_ext};
15✔
80
    defined $ext_validator or croak("None of the module_validators match file_ext $file_ext");
15✔
81

82
    bless( $object, $ext_validator );
15✔
83
    $object->_xpathInit();
15✔
84
    $object->_set_validators();
15✔
85

86
    my $overrides = $object->{volume}->get_validation_overrides($ext_validator);
15✔
87
    while ( my ( $k, $v ) = each(%$overrides) ) {
15✔
88
        $object->{validators}{$k}{valid} = $v;
44✔
89
        $object->{validators}{$k}{desc} = $k if not defined $object->{validators}{$k}{desc};
44✔
90
        $object->{validators}{$k}{detail} = "Package type specific - see $ext_validator" if not defined $object->{validators}{$k}{detail};
44✔
91
    }
92

93
    return $object;
15✔
94
}
95

96
sub _setdatetime {
97
    my $self     = shift;
15✔
98
    my $datetime = shift;
15✔
99

100
    # validate
101
    unless ( defined($datetime)
15✔
102
        and $datetime =~
103
        /^(\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d)(\+\d\d:\d\d|)(Z|[+-]\d{2}:\d{2})?$/ )
104
    {
105
        $self->set_error(
×
106
            "BadValue",
107
            field  => 'datetime',
108
            actual => $datetime,
109
            remediable => 1,
110
            expected => 'yyyy-mm-ddThh:mm:ss[+-]hh:mm'
111
        );
112
        return 0;
×
113
    }
114

115
    # trim
116
    $datetime = $1;
15✔
117

118
    # match
119
    if ( $self->{datetime} ) {
15✔
120
        if ( $self->{datetime} eq $datetime ) {
×
121
            return 1;
×
122
        }
123
        $self->set_error(
124
            "NotMatchedValue",
125
            field    => 'datetime',
126
            expected => $self->{datetime},
127
            actual   => $datetime
×
128
        );
129
        return 0;
×
130
    }
131

132
    # store
133
    $$self{datetime} = $datetime;
15✔
134
    return 1;
15✔
135
}
136

137
sub _setartist {
138
    my $self   = shift;
15✔
139
    my $artist = shift;
15✔
140

141
    # match
142
    if ( $self->{artist} ) {
15✔
143
        if ( $self->{artist} eq $artist ) {
×
144
            return 1;
×
145
        }
146
        $self->set_error(
147
            "NotMatchedValue",
148
            field    => 'artist',
149
            expected => $self->{artist},
150
            actual   => $artist
×
151
        );
152
        return 0;
×
153
    }
154

155
    # store
156
    $$self{artist} = $artist;
15✔
157
    return 1;
15✔
158
}
159

160
sub _setdocumentname {
161
    my $self         = shift;
15✔
162
    my $documentname = shift;
15✔
163

164
    if( not defined $documentname or $documentname eq '') {
15✔
165
        $self->set_error(
×
166
            "MissingField",
167
            field    => 'DocumentName / dc:source',
168
            remediable => 1,
169
        );
170
        return 0;
×
171
    }
172
    # match
173
    if ( $self->{documentname} ) {
15✔
174
        if ( $self->{documentname} eq $documentname ) {
×
175
            return 1;
×
176
        }
177
        $self->set_error(
178
            "NotMatchedValue",
179
            field    => 'DocumentName / dc:source',
180
            remediable => 1,
181
            expected => $self->{documentname},
182
            actual   => $documentname
×
183
        );
184
        return 0;
×
185
    }
186

187
    # validate
188
    my $id   = $$self{volume_id};
15✔
189
    my $file = $$self{filename};
15✔
190
    my $stripped_file = $file;
15✔
191

192
    # If the filename is like UCAL_BARCODE_00000001.tif, the dc:source can
193
    # match either that or the plain 00000001.tif.
194
    if($file =~ /^.*(\d{8}.(tif|jp2))/) {
15✔
195
      $stripped_file = $1;
15✔
196
    }
197

198
    my $pattern = "$id/$file";
15✔
199
    my $stripped_pattern = "$id/$stripped_file";
15✔
200

201
    # $documentname should look like "$id/$file", but "UOM_$id/$file" is allowed
202
    # so don't use m|^\Q$pattern\E$|i
203
    unless ( $documentname =~ m|\Q$pattern\E|i or $documentname =~ m|\Q$stripped_pattern\E|i) {
15✔
204
        $self->set_error(
×
205
            "BadValue",
206
            field    => 'DocumentName / dc:source',
207
            remediable => 1,
208
            expected => $pattern,
209
            actual   => $documentname
210
        );
211
        return 0;
×
212
    }
213

214
    # store
215
    $$self{documentname} = $documentname;
15✔
216
    return 1;
15✔
217
}
218

219

220
# setupXMPcontext($mxlstring)
221
# takes a string containing XML and creates a new XML::LibXML::XPathContext object with it
222
sub _setupXMPcontext {
223
    my $self = shift;
10✔
224
    my $xml  = shift;
10✔
225

226
    my $xpc;
10✔
227
    eval {
10✔
228
        my $parser = XML::LibXML->new();
10✔
229
        my $doc    = $parser->parse_string($xml);
10✔
230
        $xpc = XML::LibXML::XPathContext->new($doc);
10✔
231

232
        # register namespaces
233
        register_namespaces($xpc);
10✔
234

235
    };
236
    if ($@) {
10✔
237
        $self->set_error( "BadField", detail => $@, field => 'xmp' );
×
238
        return 0;
×
239
    }
240
    else {
241
        $self->_setcontext( name => "xmp", xpc => $xpc, desc => 'XMP metadata');
10✔
242
        return 1;
10✔
243
    }
244
}
245

246
sub set_error {
247
    my $self  = shift;
×
248
    my $error = shift;
×
249
    $self->{fail}++;
×
250

251
    # log error w/ l4p
252
    get_logger( ref($self) )->error(
253
        $error,
254
        objid     => $self->{volume_id},
255
        namespace => $self->{volume}->get_namespace(),
256
        file      => $self->{filename},
257
        @_
258
    );
×
259
    if(get_config('stop_on_error')) {
×
260
        croak("STAGE_ERROR");
×
261
    }
262
    
263
    return 1;
×
264
}
265

266
sub run {
267

268
    my $self = shift;
15✔
269

270
    while ( my ( $valname, $validator ) = each( %{ $self->{validators} } ) ) {
15✔
271
        next unless defined $validator->{valid};
255✔
272
        get_logger( ref($self) )->trace(
273
            "Validating $validator->{desc}",
274
            objid     => $self->{volume_id},
275
            namespace => $self->{volume}->get_namespace(),
276
            file      => $self->{filename},
277
            @_
278
        );
240✔
279

280
        if(!&{$validator->{valid}}($self)) {
240✔
281
            get_logger( ref($self) ) ->warn("Validation failed",
282
                objid     => $self->{volume_id},
283
                namespace => $self->{volume}->get_namespace(),
284
                file      => $self->{filename},
285
                field     => $validator->{desc},
286
                detail    => $validator->{detail},
287
            );
×
288
        }
289
    }
290

291
    return $self->succeeded();
15✔
292
}
293

294
package HTFeed::QueryLib;
295

296
# parent class for HTFeed query plugins
297

298
# we may get some speed benefit from the precompile stage (see _compile)
299
# but the main reason for this class is to
300
# neatly organize a lot of dirty work (the queries) in one spot (the plugins)
301

302
# see HTFeed::QueryLib::JPEG2000_hul for typical subclass example
303

304
# compile all queries, this call is REQUIRED in constructor
305
sub _compile{
306
        my $self = shift;
46✔
307
        
308
        foreach my $key ( keys %{$self->{contexts}} ){
46✔
309
#                print "compiling $self->{contexts}->{$key}->{query}\n";
310
        next unless defined $self->{contexts}->{$key}->{query};
253✔
311
                $self->{contexts}->{$key}->{query} = XML::LibXML::XPathExpression->new($self->{contexts}->{$key}->{query});
207✔
312
        }
313
        foreach my $ikey ( keys %{$self->{queries}} ){
46✔
314
                foreach my $jkey ( keys %{$self->{queries}->{$ikey}} ){
230✔
315
#                        print "compiling $self->{queries}->{$ikey}->{$jkey}->{query}\n";
316
            next unless defined $self->{contexts}->{$ikey}->{$jkey}->{query};
1,610✔
317
                        $self->{queries}->{$ikey}->{$jkey}->{query} = XML::LibXML::XPathExpression->new($self->{queries}->{$ikey}->{$jkey}->{query});
×
318
                }
319
        }
320
        return 1;
46✔
321
}
322

323
# accessors
324
sub context{
325
        my $self = shift;
75✔
326
        my $key = shift;
75✔
327
        return $self->{contexts}->{$key}->{query};
75✔
328
}
329
sub context_parent{
330
        my $self = shift;
162✔
331
        my $key = shift;
162✔
332
        return $self->{contexts}->{$key}->{parent};
162✔
333
}
334
sub context_name {
335
    my $self = shift;
409✔
336
    my $key = shift;
409✔
337
    return $self->{contexts}->{$key}->{desc};
409✔
338
}
339
sub query{
340
        my $self = shift;
416✔
341
        my $parent = shift;
416✔
342
        my $key = shift;
416✔
343
        return $self->{queries}->{$parent}->{$key}->{query};
416✔
344
}
345
sub query_info {
346
    my $self = shift;
409✔
347
    my $parent = shift;
409✔
348
    my $key = shift;
409✔
349
        return $self->{queries}->{$parent}->{$key};
409✔
350
}
351

352
1;
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc