• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

hathitrust / feed / 25167493130

30 Apr 2026 01:23PM UTC coverage: 83.837% (+0.05%) from 83.787%
25167493130

push

github

8740 of 10425 relevant lines covered (83.84%)

461.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.58
/lib/HTFeed/RepositoryIterator.pm
1
#!/usr/bin/perl
2

3
use strict;
1✔
4
use warnings;
1✔
5

6
use Cwd;
1✔
7
use File::Basename;
1✔
8
use File::Pairtree qw(ppath2id s2ppchars);
1✔
9

10
package HTFeed::RepositoryIterator;
11

12
# The only restriction on `path` is that it must have a component ending with `sdrX`
13
# where X is one or more digits
14
sub new {
15
  my $class = shift;
5✔
16
  my $path  = shift;
5✔
17

18
  # Resolve symlinks e.g., /sdr1 -> /sdr/1
19
  $path = Cwd::abs_path($path);
5✔
20
  # Remove trailing slash from path if necessary
21
  $path =~ s!/$!!;
5✔
22
  my @pathcomp = split('/', $path);
5✔
23
  # remove base & any empty components
24
  #@pathcomp = grep { $_ ne '' } @pathcomp;
25
  my $sdr_partition = undef;
5✔
26
  if ($path =~ qr!/?sdr/?(\d+)/?!) {
5✔
27
    $sdr_partition = $1;
5✔
28
  } else {
29
    die "Cannot infer SDR partition from $path";
×
30
  }
31
  my $self = {
5✔
32
    # The path to traverse. May be a subpath like /tmp/sdr1/obj/test
33
    path => $path,
34
    sdr_partition => $sdr_partition,
35
    objects_processed => 0,
36
  };
37
  bless($self, $class);
5✔
38
  return $self;
5✔
39
}
40

41
sub next_object {
42
  my $self = shift;
10✔
43

44
  my $obj = undef;
10✔
45
  while (1) {
10✔
46
    my $line = readline($self->_find_pipe);
16✔
47
    last unless defined $line;
16✔
48
    chomp $line;
12✔
49
    # Pairtree stuff
50
    next if $line =~ /pairtree_prefix$/;
12✔
51
    next if $line =~ /pairtree_version/;
12✔
52
    # ignore temporary location
53
    next if $line =~ qr(obj/\.tmp);
12✔
54
    #next if $line =~ /\Qpre_uplift.mets.xml\E/;
55
    #next if $self->_recent_previous_version($line);
56

57
    my ($file_objid, $path, $type) = File::Basename::fileparse($line);
12✔
58
    # Remove trailing slash
59
    $path =~ s!/$!!; 
12✔
60
    next if $self->{prev_path} and $path eq $self->{prev_path};
12✔
61

62
    $self->{objects_processed}++;
6✔
63
    $self->{prev_path} = $path;
6✔
64

65
    # Remove everything up to and including the `sdrX/` or `sdr/X`
66
    my $subpath = $path;
6✔
67
    $subpath =~ s!.*?sdr/?\d+/!!;
6✔
68
    my @pathcomp = split('/', $subpath);
6✔
69
    @pathcomp = grep { $_ ne '' } @pathcomp;
6✔
70
    my $namespace = $pathcomp[1];
6✔
71
    my $directory_objid = pop @pathcomp;
6✔
72
    my $objid = File::Pairtree::ppath2id(join('/', @pathcomp));
6✔
73
    $obj = {
6✔
74
      path => $path,
75
      namespace => $namespace,
76
      # Caller should make sure objid and directory_objid are equivalent,
77
      # and also that objid matches the contents
78
      objid => $objid,
79
      directory_objid => $directory_objid,
80
      contents => $self->_contents($path),
81
    };
82
    last;
6✔
83
  }
84
  return $obj;
10✔
85
}
86

87
sub close {
88
  my $self = shift;
×
89

90
  if ($self->{find_pipe}) {
×
91
    close $self->{find_pipe};
×
92
    $self->{find_pipe} = undef;
×
93
  }
94
}
95

96
# Returns a sorted arrayref with filenames (not full paths) in
97
# an object directory. Excludes . and ..
98
sub _contents {
99
  my $self = shift;
6✔
100
  my $path = shift;
6✔
101

102
  my @contents;
6✔
103
  opendir(my $dh, $path);
6✔
104
  while ( my $file = readdir($dh) ) {
6✔
105
    next if $file eq '.' or $file eq '..';
24✔
106
    push(@contents, $file);
12✔
107
  }
108
  @contents = sort @contents;
6✔
109
  return \@contents;
6✔
110
}
111

112
sub _find_pipe {
113
  my $self = shift;
16✔
114

115
  if (!$self->{find_pipe}) {
16✔
116
    my $find_pipe;
4✔
117
    my $find_cmd = "find $self->{path} -type f|";
4✔
118
    open($find_pipe, $find_cmd) or die("Can't open pipe to find: $!");
4✔
119
    $self->{find_pipe} = $find_pipe;
4✔
120
  }
121
  return $self->{find_pipe};
16✔
122
}
123

124
# NOTE: is this needed?
125
# Does file end with `.old` suffix and is it less than 48 hours old?
126
sub _recent_previous_version {
127
  my $self = shift;
×
128
  my $file = shift;
×
129

130
  if ($file =~ /.old$/) {
×
131
    my $ctime = ( stat($file) )[10];
×
132
    my $ctime_age = time() - $ctime;
×
133
    return 1 if $ctime_age < (86400 * 2);
×
134
  }
135
}
136

137
1;
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc