• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

hathitrust / feed / 22963608862

11 Mar 2026 04:47PM UTC coverage: 83.718% (+0.1%) from 83.585%
22963608862

push

github

8664 of 10349 relevant lines covered (83.72%)

463.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.81
/lib/HTFeed/RepositoryIterator.pm
1
#!/usr/bin/perl
2

3
use strict;
1✔
4
use warnings;
1✔
5

6
use Cwd;
7
use File::Basename;
8
use File::Pairtree qw(ppath2id s2ppchars);
9

1✔
10
package HTFeed::RepositoryIterator;
1✔
11

12
# The only restriction on `path` is that it must have a component ending with `sdrX`
13
# where X is one or more digits
14
sub new {
15
  my $class = shift;
16
  my $path  = shift;
17

5✔
18
  # Resolve symlinks e.g., /sdr1 -> /sdr/1
5✔
19
  $path = Cwd::abs_path($path);
20
  # Remove trailing slash from path if necessary
21
  $path =~ s!/$!!;
5✔
22
  my @pathcomp = split('/', $path);
5✔
23
  # remove base & any empty components
24
  #@pathcomp = grep { $_ ne '' } @pathcomp;
25
  my $sdr_partition = undef;
5✔
26
  if ($path =~ qr!/?sdr/?(\d+)/?!) {
5✔
27
    $sdr_partition = $1;
5✔
28
  } else {
29
    die "Cannot infer SDR partition from $path";
×
30
  }
31
  my $self = {
5✔
32
    # The path to traverse. May be a subpath like /tmp/sdr1/obj/test
33
    path => $path,
34
    sdr_partition => $sdr_partition,
35
    objects_processed => 0,
36
  };
37
  bless($self, $class);
5✔
38
  return $self;
5✔
39
}
40

41
sub next_object {
42
  my $self = shift;
7✔
43

44
  my $obj = undef;
7✔
45
  while (1) {
7✔
46
    my $line = readline($self->_find_pipe);
10✔
47
    last unless defined $line;
10✔
48
    chomp $line;
8✔
49
    # Pairtree stuff
50
    next if $line =~ /pairtree_prefix$/;
8✔
51
    next if $line =~ /pairtree_version/;
52
    # ignore temporary location
8✔
53
    next if $line =~ qr(obj/\.tmp);
54
    #next if $line =~ /\Qpre_uplift.mets.xml\E/;
55
    #next if $self->_recent_previous_version($line);
56

8✔
57
    my ($file_objid, $path, $type) = File::Basename::fileparse($line);
58
    # Remove trailing slash
8✔
59
    $path =~ s!/$!!; 
8✔
60
    next if $self->{prev_path} and $path eq $self->{prev_path};
61

5✔
62
    $self->{objects_processed}++;
5✔
63
    $self->{prev_path} = $path;
64

65
    # Remove everything up to and including the `sdrX/` or `sdr/X`
5✔
66
    my $subpath = $path;
5✔
67
    $subpath =~ s!.*?sdr/?\d+/!!;
5✔
68
    my @pathcomp = split('/', $subpath);
5✔
69
    @pathcomp = grep { $_ ne '' } @pathcomp;
5✔
70
    my $namespace = $pathcomp[1];
5✔
71
    my $directory_objid = pop @pathcomp;
5✔
72
    my $objid = File::Pairtree::ppath2id(join('/', @pathcomp));
5✔
73
    $obj = {
74
      path => $path,
75
      namespace => $namespace,
76
      # Caller should make sure objid and directory_objid are equivalent,
77
      # and also that objid matches the contents
78
      objid => $objid,
79
      directory_objid => $directory_objid,
80
      contents => $self->_contents($path),
81
    };
5✔
82
    last;
83
  }
7✔
84
  return $obj;
85
}
86

87
sub close {
×
88
  my $self = shift;
89

×
90
  if ($self->{find_pipe}) {
×
91
    close $self->{find_pipe};
×
92
    $self->{find_pipe} = undef;
93
  }
94
}
95

96
# Returns a sorted arrayref with filenames (not full paths) in
97
# an object directory. Excludes . and ..
98
sub _contents {
5✔
99
  my $self = shift;
5✔
100
  my $path = shift;
101

5✔
102
  my @contents;
5✔
103
  opendir(my $dh, $path);
5✔
104
  while ( my $file = readdir($dh) ) {
20✔
105
    next if $file eq '.' or $file eq '..';
10✔
106
    push(@contents, $file);
107
  }
5✔
108
  @contents = sort @contents;
5✔
109
  return \@contents;
110
}
111

112
sub _find_pipe {
10✔
113
  my $self = shift;
114

10✔
115
  if (!$self->{find_pipe}) {
4✔
116
    my $find_pipe;
4✔
117
    my $find_cmd = "find $self->{path} -type f|";
4✔
118
    open($find_pipe, $find_cmd) or die("Can't open pipe to find: $!");
4✔
119
    $self->{find_pipe} = $find_pipe;
120
  }
10✔
121
  return $self->{find_pipe};
122
}
123

124
# NOTE: is this needed?
125
# Does file end with `.old` suffix and is it less than 48 hours old?
126
sub _recent_previous_version {
×
127
  my $self = shift;
×
128
  my $file = shift;
129

×
130
  if ($file =~ /.old$/) {
×
131
    my $ctime = ( stat($file) )[10];
×
132
    my $ctime_age = time() - $ctime;
×
133
    return 1 if $ctime_age < (86400 * 2);
134
  }
135
}
136

137
1;
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc