• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OpenRefine / OpenRefine / 23475931655

24 Mar 2026 06:16AM UTC coverage: 71.076% (+20.3%) from 50.749%
23475931655

Pull #7729

github

web-flow
Merge 6d1de176b into b114f52dc
Pull Request #7729: Make clustering operations cancellable with progress feedback

3472 of 5593 branches covered (62.08%)

Branch coverage included in aggregate %.

24 of 57 new or added lines in 3 files covered. (42.11%)

10041 of 13419 relevant lines covered (74.83%)

3.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.0
/main/src/com/google/refine/commands/browsing/ComputeClustersCommand.java
1
/*
2

3
Copyright 2010, Google Inc.
4
All rights reserved.
5

6
Redistribution and use in source and binary forms, with or without
7
modification, are permitted provided that the following conditions are
8
met:
9

10
    * Redistributions of source code must retain the above copyright
11
notice, this list of conditions and the following disclaimer.
12
    * Redistributions in binary form must reproduce the above
13
copyright notice, this list of conditions and the following disclaimer
14
in the documentation and/or other materials provided with the
15
distribution.
16
    * Neither the name of Google Inc. nor the names of its
17
contributors may be used to endorse or promote products derived from
18
this software without specific prior written permission.
19

20
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,           
27
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY           
28
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31

32
*/
33

34
package com.google.refine.commands.browsing;
35

36
import java.io.IOException;
37
import java.util.concurrent.ConcurrentHashMap;
38

39
import javax.servlet.ServletException;
40
import javax.servlet.http.HttpServletRequest;
41
import javax.servlet.http.HttpServletResponse;
42

43
import com.fasterxml.jackson.databind.JsonNode;
44
import org.slf4j.Logger;
45
import org.slf4j.LoggerFactory;
46

47
import com.google.refine.browsing.Engine;
48
import com.google.refine.clustering.Clusterer;
49
import com.google.refine.clustering.ClustererConfig;
50
import com.google.refine.clustering.ClusteringProcess;
51
import com.google.refine.clustering.binning.KeyerFactory;
52
import com.google.refine.clustering.binning.UserDefinedKeyer;
53
import com.google.refine.clustering.knn.DistanceFactory;
54
import com.google.refine.clustering.knn.UserDefinedDistance;
55
import com.google.refine.commands.Command;
56
import com.google.refine.model.Project;
57
import com.google.refine.util.ParsingUtilities;
58

59
public class ComputeClustersCommand extends Command {
3✔
60

61
    final static Logger logger = LoggerFactory.getLogger("compute-clusters_command");
3✔
62

63
    private static final ConcurrentHashMap<Long, ClusteringProcess> _activeProcesses = new ConcurrentHashMap<>();
5✔
64

65
    @Override
66
    public void doPost(HttpServletRequest request, HttpServletResponse response)
67
            throws ServletException, IOException {
68
        if (!hasValidCSRFToken(request)) {
4✔
69
            respondCSRFError(response);
2✔
70
            return;
1✔
71
        }
72

73
        try {
74
            Project project = getProject(request);
4✔
75
            Engine engine = getEngine(request, project);
4✔
76
            String clusterer_conf = request.getParameter("clusterer");
4✔
77

78
            JsonNode jsonObject = ParsingUtilities.mapper.readTree(clusterer_conf);
4✔
79
            JsonNode params = jsonObject.get("params");
4✔
80

81
            if (params != null && params.has("expression")) {
6!
82
                String expression = params.get("expression").asText();
5✔
83
                if (jsonObject.has("function") && "UserDefinedKeyer".equals(jsonObject.get("function").asText())) {
11!
84
                    KeyerFactory.put("userdefinedkeyer", new UserDefinedKeyer(expression));
7✔
85
                } else {
86
                    DistanceFactory.put("userdefineddistance", new UserDefinedDistance(expression));
×
87
                }
88
            }
89

90
            ClustererConfig clustererConfig = ParsingUtilities.mapper.readValue(clusterer_conf, ClustererConfig.class);
6✔
91
            Clusterer clusterer = clustererConfig.apply(project);
4✔
92

93
            // Cancel any existing clustering for this project
94
            cancelActiveProcess(project.id);
3✔
95

96
            ClusteringProcess process = new ClusteringProcess(
5✔
97
                    clusterer, engine,
98
                    "Clustering column by " + clustererConfig.getType());
4✔
99

100
            _activeProcesses.put(project.id, process);
7✔
101

102
            project.getProcessManager().queueProcess(process);
5✔
103

104
            respondJSON(response, ParsingUtilities.mapper.createObjectNode().put("code", "pending"));
7✔
105
            logger.info("queued clustering process [{}] for project {}", clustererConfig.getType(), project.id);
8✔
106
        } catch (Exception e) {
×
107
            respondException(response, e);
×
108
        }
1✔
109
    }
1✔
110

111
    static ClusteringProcess getActiveProcess(long projectId) {
112
        return _activeProcesses.get(projectId);
6✔
113
    }
114

115
    static void removeActiveProcess(long projectId) {
NEW
116
        _activeProcesses.remove(projectId);
×
NEW
117
    }
×
118

119
    static void cancelActiveProcess(long projectId) {
120
        ClusteringProcess existing = _activeProcesses.remove(projectId);
6✔
121
        if (existing != null && existing.isRunning()) {
2!
NEW
122
            existing.cancel();
×
123
        }
124
    }
1✔
125
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc