• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

oracle / opengrok / #3648

24 Oct 2023 03:07PM UTC coverage: 75.777% (+1.3%) from 74.441%
#3648

push

vladak
refactory repository history check

9 of 9 new or added lines in 1 file covered. (100.0%)

44388 of 58577 relevant lines covered (75.78%)

0.76 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.98
/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryLookupCached.java
1
/*
2
 * CDDL HEADER START
3
 *
4
 * The contents of this file are subject to the terms of the
5
 * Common Development and Distribution License (the "License").
6
 * You may not use this file except in compliance with the License.
7
 *
8
 * See LICENSE.txt included in this distribution for the specific
9
 * language governing permissions and limitations under the License.
10
 *
11
 * When distributing Covered Code, include this CDDL HEADER in each
12
 * file and include the License file at LICENSE.txt.
13
 * If applicable, add the following below this CDDL HEADER, with the
14
 * fields enclosed by brackets "[]" replaced with your own identifying
15
 * information: Portions Copyright [yyyy] [name of copyright owner]
16
 *
17
 * CDDL HEADER END
18
 */
19

20
/*
21
 * Copyright (c) 2020, Anatoly Akkerman <akkerman@gmail.com>, <anatoly.akkerman@twosigma.com>.
22
 */
23
package org.opengrok.indexer.history;
24

25
import org.opengrok.indexer.logger.LoggerFactory;
26

27
import java.io.IOException;
28
import java.nio.file.FileSystem;
29
import java.nio.file.Files;
30
import java.nio.file.Path;
31
import java.nio.file.Paths;
32
import java.util.ArrayList;
33
import java.util.Collection;
34
import java.util.Comparator;
35
import java.util.List;
36
import java.util.Map;
37
import java.util.Optional;
38
import java.util.Set;
39
import java.util.TreeSet;
40
import java.util.concurrent.ConcurrentHashMap;
41
import java.util.concurrent.ConcurrentMap;
42
import java.util.logging.Level;
43
import java.util.logging.Logger;
44
import java.util.stream.Collectors;
45

46
public class RepositoryLookupCached implements RepositoryLookup {
1✔
47
    private static final Logger LOGGER = LoggerFactory.getLogger(RepositoryLookupCached.class);
1✔
48

49
    /**
50
     * Cache of directories to their enclosing repositories.
51
     *
52
     * This cache will contain Optional.empty() for any directory that has so far not been mapped to a repo.
53
     * See findRepository for more information.
54
     */
55
    private final ConcurrentMap<String, Optional<Repository>> dirToRepoCache = new ConcurrentHashMap<>();
1✔
56

57
    /**
58
     * Find enclosing Repository for a given file, limiting canonicalization operations
59
     * to a given set of Repository parent dirs.
60
     * Search results are cached in an internal cache to speed up subsequent lookups for all directories
61
     * between path and the root path of the enclosing Repository (if found). Negative results are also
62
     * cached as empty Optional values in the cache. However, negative results are not treated as definitive.
63
     * <p>
64
     * The cache is only invalidated on Repository invalidation or removal
65
     * which means that if the filesystem changes under HistoryGuru, it may return stale results
66
     *
67
     * @param filePath      path to find enclosing repository for
68
     * @param repoRoots     set of Repository parent dirs
69
     * @param repositories  map of repository roots to repositories
70
     * @param canonicalizer path canonicalizer implementation to use
71
     * @return Optional Repository (empty if not found)
72
     */
73
    private Optional<Repository> findRepository(final Path filePath, final Set<String> repoRoots,
74
        Map<String, Repository> repositories, PathCanonicalizer canonicalizer) {
75
        Path path = filePath;
1✔
76
        FileSystem fileSystem = filePath.getFileSystem();
1✔
77
        Optional<Repository> maybeRepo = Optional.empty();
1✔
78

79
        // If we find repo mapping, we backfill the cache with these entries as well
80
        List<String> backfillPaths = new ArrayList<>();
1✔
81
        // Walk up the file's path until we find a matching enclosing Repository
82
        while (maybeRepo.isEmpty() && path != null) {
1✔
83
            String nextPath = path.toString();
1✔
84
            boolean isDirectory = Files.isDirectory(path);
1✔
85
            if (isDirectory) {
1✔
86
                /*
87
                 * Only store (and lookup) directory entries in the cache to avoid cache explosion
88
                 * This may fail to find a Repository (e.g. nextPath is inside a root), so this will
89
                 * insert an Optional.empty() into the map. However, as we walk up, we may eventually
90
                 * find a Repo, in which case we need to backfill these placeholder Optional.empty() entries
91
                 * with the final answer
92
                 */
93
                maybeRepo = dirToRepoCache
1✔
94
                    .computeIfAbsent(nextPath, p -> repoForPath(p, repoRoots, repositories, fileSystem, canonicalizer));
1✔
95
            } else {
96
                maybeRepo = repoForPath(nextPath, repoRoots, repositories, fileSystem, canonicalizer);
1✔
97
            }
98
            /*
99
             * Given that this class has to be thread-safe, Optional.empty() value cannot be assumed to be a definitive
100
             * answer that a given directory is not inside a repo, because of backfilling.
101
             * E.g.
102
             * During lookup of repository /a/b/c/d in repository /a, the following happens:
103
             * - /a/b/c/d: no entry in the cache, insert empty
104
             * - /a/b/c: no entry in the cache, insert empty
105
             * - /a/b: no entry in the cache, insert empty
106
             * - /a: found Repo(a), insert it into cache
107
             *
108
             * Now, the code needs to replace (backfill) entries for /a/b/c/d, /a/b/c and /a/b
109
             * However, before that happens, a concurrent findRepository() call can come in and find empty mappings
110
             * for /a/b/c/d, /a/b/c and /a/b, before they were backfilled. So, instead of returning an incorrect
111
             * cached result, we continue confirming negative result fully. This is a reasonable choice because
112
             * most lookups will be made for files that do live inside repositories, so non-empty cached values will
113
             * be the dominant cache hits.
114
             *
115
             * An alternative to provide definitive negative cache result would be to use a special sentinel
116
             * Repository object that represents NO_REPOSITORY as a definitive negative cached value, while Optional.empty
117
             * would represent a tentative negative value.
118
             */
119
            if (maybeRepo.isEmpty()) {
1✔
120
                if (isDirectory) {
1✔
121
                    backfillPaths.add(nextPath);
1✔
122
                }
123
                path = path.getParent();
1✔
124
            }
125
        }
1✔
126
        /*
127
         * If the lookup was for /a/b/c/d and we found the repo at /a, then
128
         *   - /a -> Repo(/a) is done by the computeIfAbsent call
129
         * And backfill fills in:
130
         *   - /a/b -> Repo(/a)
131
         *   - /a/b/c-> Repo(/a)
132
         */
133
        maybeRepo.ifPresent(repo ->
1✔
134
            // Replace empty values with newly found Repository
135
            backfillPaths.forEach(backfillPath -> dirToRepoCache.replace(backfillPath, Optional.empty(), Optional.of(repo))));
1✔
136

137
        return maybeRepo;
1✔
138
    }
139

140
    /**
141
     * Try to find path's enclosing repository by attempting to canonicalize it relative to
142
     * a given set of repository roots. This method can be pretty expensive, with complexity
143
     * of O(N*M) where N is path's depth and M is number of repository parent dirs.
144
     *
145
     * @param path           path to find enclosing Repository for
146
     * @param repoParentDirs limit canonicalization search only to these Repository parent dirs
147
     * @param canonicalizer  path canonicalizer implementation to use
148
     * @return Optional of matching Repository (empty if not found)
149
     */
150
    private Optional<Repository> repoForPath(String path, Set<String> repoParentDirs,
151
        Map<String, Repository> repositories,
152
        FileSystem fileSystem, PathCanonicalizer canonicalizer) {
153
        Optional<Repository> repo = Optional.ofNullable(repositories.get(path));
1✔
154

155
        if (repo.isPresent()) {
1✔
156
            return repo;
1✔
157
        }
158

159
        for (String repoRoot : repoParentDirs) {
1✔
160
            try {
161
                String rel = canonicalizer.resolve(fileSystem.getPath(path), fileSystem.getPath(repoRoot));
1✔
162
                if (!rel.equals(path)) {
1✔
163
                    // Resolve the relative against root key and look it up
164
                    String canonicalPath = Paths.get(repoRoot, rel).toString();
1✔
165
                    repo = Optional.ofNullable(repositories.get(canonicalPath));
1✔
166
                    if (repo.isPresent()) {
1✔
167
                        return repo;
×
168
                    }
169
                }
170
            } catch (IOException e) {
×
171
                LOGGER.log(Level.WARNING, e, () ->
×
172
                    "Failed to get relative to canonical for " + path + " and " + repoRoot);
×
173
            }
1✔
174
        }
1✔
175
        return Optional.empty();
1✔
176
    }
177

178
    @Override
179
    public Repository getRepository(Path path, Set<String> repoParentDirs, Map<String, Repository> repositories,
180
        PathCanonicalizer canonicalizer) {
181
        Comparator<String> stringComparator = String::compareTo;
1✔
182
        Comparator<String> reversedComparator = stringComparator.reversed();
1✔
183
        TreeSet<String> allRoots = new TreeSet<>(repoParentDirs);
1✔
184
        String pathStr = path.toString();
1✔
185
        // parentRepoDirs are canonicalized, so we need to filter them against canonical path representation
186
        Path canonicalPath;
187
        try {
188
            canonicalPath = path.toRealPath();
1✔
189
        } catch (IOException e) {
1✔
190
            canonicalPath = path.normalize().toAbsolutePath();
1✔
191
        }
1✔
192

193
        if (!canonicalPath.equals(path)) {
1✔
194
            pathStr = canonicalPath.toString();
1✔
195
        }
196

197
        /*
198
         * Find all potential Repository parent dirs by matching their roots to file's prefix
199
         * This is useful to limit the number of iterations in repoForPath call.
200
         * Store matches in reverse-ordered TreeSet so that longest prefixes appear first
201
         * (There may be multiple entries if we allow nested repositories)
202
         */
203
        TreeSet<String> filteredParentDirs = allRoots.stream().filter(pathStr::startsWith)
1✔
204
            .collect(Collectors.toCollection(() -> new TreeSet<>(reversedComparator)));
1✔
205

206
        return findRepository(path, filteredParentDirs, repositories, canonicalizer).orElse(null);
1✔
207
    }
208

209
    @Override
210
    public void repositoriesRemoved(Collection<Repository> removedRepos) {
211
        dirToRepoCache.entrySet().stream().filter(entry -> entry.getValue().filter(removedRepos::contains).isPresent())
1✔
212
            .map(Map.Entry::getKey).forEach(dirToRepoCache::remove);
1✔
213
    }
1✔
214

215
    @Override
216
    public void clear() {
217
        dirToRepoCache.clear();
1✔
218
    }
1✔
219

220
    public int size() {
221
        return dirToRepoCache.size();
1✔
222
    }
223
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc