• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

DaveFoss / DAVE_data / 10622329495

29 Aug 2024 08:40PM UTC coverage: 78.502% (-7.9%) from 86.4%
10622329495

Pull #7

github

uvchik
Revert "Remove file from reverted merge"

This reverts commit cf6ea2f8c.
Pull Request #7: Area to polygon2

59 of 84 branches covered (70.24%)

Branch coverage included in aggregate %.

266 of 330 relevant lines covered (80.61%)

4.03 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.62
/src/dave_data/datapool/osm_request.py
1
from collections import namedtuple
5✔
2
from time import sleep
5✔
3
from urllib.parse import urlencode
5✔
4
from urllib.request import urlopen
5✔
5

6
import pandas as pd
5✔
7
from defusedxml.ElementTree import fromstring
5✔
8
from geopandas import GeoDataFrame
5✔
9
from pandas import DataFrame
5✔
10
from pandas import concat
5✔
11
from pandas import to_datetime
5✔
12
from shapely.geometry import LineString
5✔
13
from shapely.geometry import Point
5✔
14
from six import string_types
5✔
15

16
from dave_data.core import Data
5✔
17
from dave_data.core import MetaData
5✔
18
from dave_data.settings import dave_data_settings
5✔
19

20

21
def osm_request(data_type, area):
5✔
22
    """
23
    This function requests OSM data from OSM
24

25
    Examples
26
    --------
27
    >>> from shapely import box
28
    >>> streets = osm_request("road", box(13.409, 52.519, 13.41, 52.52))
29
    >>> len(streets.data) > 0
30
    True
31

32
    """
33
    data_param = dave_data_settings["osm_tags"][data_type]
5✔
34
    request_data = GeoDataFrame([])
5✔
35
    meta_data = None
5✔
36
    data = GeoDataFrame
5✔
37
    for osm_type in data_param[2]:
5✔
38
        # create tags
39
        tags = (
5✔
40
            f'{data_param[0]}~"{"|".join(data_param[1])}"'
41
            if isinstance(data_param[1], list)
42
            else f"{data_param[0]}"
43
        )
44
        # get data from OSM directly via API query
45
        data, meta_data = query_osm(osm_type, area, recurse="down", tags=tags)
5✔
46
        request_data = concat([request_data, data], ignore_index=True)
5✔
47
    meta = MetaData(
5✔
48
        source_license="ODBL", source_date=None, organisation="OpenStreetMap"
49
    )
50
    return Data(
5✔
51
        name="OSM roads filtered",
52
        description="Some description",
53
        data=data,
54
        meta=meta,
55
        polygon=area,
56
        tags=["roads", "osm"],
57
    )
58

59

60
# --- request directly from OSM via Overpass API and geopandas_osm package
61

62
# This functions are based on the geopandas_osm python package, which was
63
# published under the # following license:
64

65
# The MIT License (MIT)
66

67
# Copyright (c) 2014 Jacob Wasserman
68

69
# Permission is hereby granted, free of charge, to any person obtaining a copy
70
# of this software and associated documentation files (the "Software"), to deal
71
# in the Software without restriction, including without limitation the rights
72
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
73
# copies of the Software, and to permit persons to whom the Software is
74
# furnished to do so, subject to the following conditions:
75

76
# The above copyright notice and this permission notice shall be included in
77
# all copies or substantial portions of the Software.
78

79
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
80
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
81
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
82
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
83
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
84
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
85
# SOFTWARE.
86

87

88
OSMData = namedtuple(
5✔
89
    "OSMData", ("nodes", "waynodes", "waytags", "relmembers", "reltags")
90
)
91
_crs = "epsg:4326"
5✔
92

93
# Tags to remove so we don't clobber the output. This list comes from
94
# osmtogeojson's index.js (https://github.com/tyrasd/osmtogeojson)
95
uninteresting_tags = {
5✔
96
    "source",
97
    "source_ref",
98
    "source:ref",
99
    "history",
100
    "attribution",
101
    "created_by",
102
    "tiger:county",
103
    "tiger:tlid",
104
    "tiger:upload_uuid",
105
}
106

107

108
# http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
109
def query_osm(
5✔
110
    typ, bbox=None, recurse=None, tags="", raw=False, meta=False, **kwargs
111
):
112
    """
113
    Query the Overpass API to obtain OpenStreetMap data.
114

115
    See also:
116
    http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
117

118
    The OSM XML data is parsed into an intermediate set of DataFrames.
119
    By passing in 'render=False', this will return these DataFrames stored
120
    as the OSMData namedtuple. If render is True, then the DataFrames
121
    are built into their corresponding geometries.
122

123
    Parameters
124
    ----------
125
    typ : {'node', 'way', 'relation'}
126
        The type of OSM data to query
127
    bbox : (min lon, min lat, max lon, max lat) bounding box
128
        Optional bounding box to restrict the query. Unless the query
129
        is extremely restricted, you usually want to specify this.
130
        It can be retrieved from GeoPandas objects as 'df.total_bounds' or
131
        from Shapely objects as 'geom.bounds'
132
    recurse : {'up, 'down', 'uprel', 'downrel'}
133
        This is used to get more data than the original query. If 'typ' is
134
        'way', you'll usually want this set to 'down' which grabs all nodes
135
        of the matching ways
136
    tags : string or list of query strings
137
        See also the OverpassQL (referenced above) for more tag options
138
        Examples:
139
            tags='highway'
140
                Matches objects with a 'highway' tag
141
            tags='highway=motorway' <-- Matches ob
142
                Matches objects where the 'highway' tag is 'motorway'
143
            tags='name~[Mm]agazine'
144
                Match if the 'name' tag matches the regular expression
145

146
            Specify a list of tag requests to match all of them
147
            tags=['highway', 'name~"^Magazine"']
148
                Match tags that have 'highway' and where 'name' starts
149
                with 'Magazine'
150
    raw : boolean, default False
151
        Return the raw XML data returned by the request
152
    meta : boolean, default False
153
        Indicates whether to query the metadata with each OSM object. This
154
        includes the changeset, timestamp, uid, user, and version.
155

156
    Returns
157
    -------
158
    df - GeoDataFrame
159
    Note that there's probably a bit more filtering required to get the
160
    exact desired data. For example if you only want ways, you may want
161
    to grab only the linestrings like:
162

163
    Examples
164
    --------
165
    >>> #  df = df[df.type == 'LineString']
166

167
    """
168
    url = _build_url(typ, bbox, recurse, tags, meta)
5✔
169
    # add time delay because osm doesn't alowed more than 1 request per second.
170
    time_delay = dave_data_settings["osm_time_delay"]
5✔
171

172
    # TODO: Raise on non-200 (or 400-599)
173
    # with urlopen(url) as response:
174
    #     content = response.read()
175
    while 1:
5✔
176
        try:
5✔
177
            if not url.startswith(("http:", "https:")):
5✔
178
                raise ValueError("URL must start with 'http:' or 'https:'")
×
179

180
            with urlopen(url) as response:  # noqa: S310
5✔
181
                content = response.read()
5✔
182
                if response.getcode() == 200:
5✔
183
                    break
5✔
184
        except Exception as inst:
×
185
            print(f'\n Retry OSM query because of "{inst}"')
×
186
            # add time delay
187
            sleep(time_delay)
×
188

189
    # get meta informations
190
    meta_data = pd.Series({"meta": "coming soon"})
5✔
191

192
    if raw:
5✔
193
        return content, meta_data
×
194
    return read_osm(content, **kwargs), meta_data
5✔
195

196

197
def _build_url(typ, bbox=None, recurse=None, tags="", meta=False):
5✔
198
    recurse_map = {
5✔
199
        "up": "<",
200
        "uprel": "<<",
201
        "down": ">",
202
        "downrel": ">>",
203
    }
204
    if recurse is None:
5✔
205
        recursestr = ""
×
206
    else:
207
        try:
5✔
208
            recursestr = recurse_map[recurse]
5✔
209
        except KeyError as k_exception:
×
210
            raise ValueError(
×
211
                "Unrecognized recurse value '{}'. "
212
                "Must be one of: {}.".format(
213
                    recurse, ", ".join(recurse_map.keys())
214
                )
215
            ) from k_exception
216

217
    # Allow tags to be a single string
218
    if isinstance(tags, string_types) and tags:
5✔
219
        tags = [tags]
5✔
220
    queries = "".join(f"[{t}]" for t in tags)
5✔
221

222
    # Overpass QL takes the bounding box as
223
    # (min latitude, min longitude, max latitude, max longitude)
224
    if bbox is None:
5✔
225
        bboxstr = ""
×
226
    else:
227
        bboxstr = '(poly:"{}")'.format(
5✔
228
            " ".join(f"{c[1]} {c[0]}" for c in bbox.exterior.coords)
229
        )
230

231
    metastr = "meta" if meta else ""
5✔
232

233
    query = f"({typ}{bboxstr}{queries};{recursestr};);out {metastr};"
5✔
234

235
    url = "".join(
5✔
236
        [
237
            "http://www.overpass-api.de/api/interpreter?",
238
            urlencode({"data": query}),
239
        ]
240
    )
241

242
    return url
5✔
243

244

245
def read_osm(content, render=True, **kwargs):
5✔
246
    """
247
    Parse OSM XML data and store as several DataFrames. Optionally "render"
248
    the DataFrames to GeoDataFrames.
249

250
    """
251
    doc = fromstring(content)
5✔
252

253
    nodes = read_nodes(doc)
5✔
254
    waynodes, waytags = read_ways(doc)
5✔
255
    relmembers, reltags = read_relations(doc)
5✔
256

257
    # check if all requested variables are empty
258
    # if nodes.empty and waynodes.empty and waytags.empty and relmembers.empty
259
    # and reltags.empty:
260

261
    data = OSMData(nodes, waynodes, waytags, relmembers, reltags)
5✔
262

263
    if render:
5✔
264
        data = render_to_gdf(data, **kwargs)
5✔
265
    return data
5✔
266

267

268
def read_nodes(doc):
5✔
269
    #   Example:
270
    #   <node id="1705717514" lat="42.3630798" lon="-71.0997601">
271
    #       <tag k="crossing" v="zebra"/>
272
    #       <tag k="highway" v="crossing"/>
273
    #       <tag k="source" v="Bing"/>
274
    #   </node>
275
    nodes = [_element_to_dict(xmlnode) for xmlnode in doc.findall("node")]
5✔
276
    nodes = _dict_to_dataframe(nodes)
5✔
277
    if not nodes.empty:
5✔
278
        nodes["lon"] = nodes["lon"].astype(float)
5✔
279
        nodes["lat"] = nodes["lat"].astype(float)
5✔
280

281
    return nodes
5✔
282

283

284
def _element_to_dict(element):
5✔
285
    d = element.attrib.copy()
5✔
286
    for t in element.findall("tag"):
5✔
287
        k = t.attrib["k"]
5✔
288
        if k not in uninteresting_tags:
5✔
289
            d[k] = t.attrib["v"]
5✔
290

291
    return d
5✔
292

293

294
def _dict_to_dataframe(d):
5✔
295
    df = DataFrame.from_dict(d)
5✔
296
    if "timestamp" in df:
5✔
297
        df["timestamp"] = to_datetime(df["timestamp"])
×
298

299
    return df
5✔
300

301

302
def read_ways(doc):
5✔
303
    #   Example:
304
    #   <way id="8614593">
305
    #       <nd ref="61326730"/>
306
    #       <nd ref="61326036"/>
307
    #       <nd ref="61321194"/>
308
    #       <tag k="attribution" v="Office of Geographic and Environmental
309
    #           Information (MassGIS)"/>
310
    #       <tag k="condition" v="fair"/>
311
    #       <tag k="created_by" v="JOSM"/>
312
    #       <tag k="highway" v="residential"/>
313
    #       <tag k="lanes" v="2"/>
314
    #       <tag k="massgis:way_id" v="171099"/>
315
    #       <tag k="name" v="Centre Street"/>
316
    #       <tag k="source" v="massgis_import_v0.1_20071008165629"/>
317
    #       <tag k="width" v="13.4"/>
318
    #   </way>
319
    waytags = []
5✔
320
    waynodes = []
5✔
321
    for xmlway in doc.findall("way"):
5✔
322
        wayid = xmlway.attrib["id"]
5✔
323
        for i, xmlnd in enumerate(xmlway.findall("nd")):
5✔
324
            d = xmlnd.attrib.copy()
5✔
325
            d["id"] = wayid
5✔
326
            d["index"] = i
5✔
327
            waynodes.append(d)
5✔
328

329
        tags = _element_to_dict(xmlway)
5✔
330
        waytags.append(tags)
5✔
331

332
    waynodes = _dict_to_dataframe(waynodes)
5✔
333
    waytags = _dict_to_dataframe(waytags)
5✔
334

335
    return waynodes, waytags
5✔
336

337

338
def read_relations(doc):
5✔
339
    # Example:
340
    #   <relation id="1933745">
341
    #     <member type="way" ref="134055159" role="outer"/>
342
    #     <member type="way" ref="260533047" role="outer"/>
343
    #     <member type="way" ref="142867799" role="outer"/>
344
    #     <member type="way" ref="134063352" role="outer"/>
345
    #     <member type="way" ref="142803038" role="outer"/>
346
    #     <member type="way" ref="134056144" role="outer"/>
347
    #     <member type="way" ref="134056141" role="outer"/>
348
    #     <tag k="admin_level" v="8"/>
349
    #     <tag k="boundary" v="administrative"/>
350
    #     <tag k="name" v="Cambridge"/>
351
    #     <tag k="type" v="boundary"/>
352
    #     <tag k="wikipedia" v="en:Cambridge, Massachusetts"/>
353
    #   </relation>
354
    reltags = []
5✔
355
    relmembers = []
5✔
356
    for xmlrel in doc.findall("relation"):
5✔
357
        relid = xmlrel.attrib["id"]
×
358
        for i, xmlmember in enumerate(xmlrel.findall("member")):
×
359
            d = xmlmember.attrib.copy()
×
360
            d["id"] = relid
×
361
            d["index"] = i
×
362
            relmembers.append(d)
×
363

364
        tags = _element_to_dict(xmlrel)
×
365
        reltags.append(tags)
×
366

367
    relmembers = _dict_to_dataframe(relmembers)
5✔
368
    reltags = _dict_to_dataframe(reltags)
5✔
369
    return relmembers, reltags
5✔
370

371

372
def render_to_gdf(osmdata, drop_untagged=True):
5✔
373
    nodes = render_nodes(osmdata.nodes, drop_untagged)
5✔
374
    ways = render_ways(osmdata.nodes, osmdata.waynodes, osmdata.waytags)
5✔
375

376
    # set landuse tag from origin relation at relation members who has no
377
    # landuse tag
378
    if (
5✔
379
        (ways is not None)
380
        and ("landuse" in ways.keys())
381
        and (not osmdata.relmembers.empty)
382
    ):
383
        for i, way in ways.iterrows():
×
384
            # get and add origin relation id
385
            rel_id = (
×
386
                osmdata.relmembers[osmdata.relmembers.ref == way.id].iloc[0].id
387
            )
388
            ways.at[i, "relation_id"] = rel_id
×
389
            # get and add origin relation landuse if needed
390
            osm_reltag = osmdata.reltags[osmdata.reltags.id == rel_id].iloc[0]
×
391
            if "landuse" in osm_reltag.keys() and str(way.landuse) == "nan":
×
392
                ways.at[i, "landuse"] = osm_reltag.landuse
×
393

394
    if ways is not None:
5✔
395
        nodes = concat([nodes, ways], ignore_index=True)
5✔
396
        nodes = nodes.set_geometry("geometry", crs=_crs)
5✔
397

398
    return nodes
5✔
399

400

401
def render_nodes(nodes, drop_untagged=True):
5✔
402
    # check if their are nodes
403
    if not nodes.empty:
5✔
404
        # Drop nodes that have no tags, convert lon/lat to points
405
        if drop_untagged:
5✔
406
            nodes = nodes.dropna(
5✔
407
                subset=nodes.columns.drop(["id", "lon", "lat"]), how="all"
408
            )
409
        points = [Point(x["lon"], x["lat"]) for i, x in nodes.iterrows()]
5✔
410
        nodes = nodes.drop(["lon", "lat"], axis=1)
5✔
411
        nodes = nodes.set_geometry(points, crs=_crs)
5✔
412

413
    return nodes
5✔
414

415

416
def render_ways(nodes, waynodes, waytags):
5✔
417
    if waynodes is None or waynodes.empty:
5✔
418
        return None
×
419

420
    node_points = nodes[["id", "lon", "lat"]]
5✔
421

422
    def wayline(df):
5✔
423
        df = df.sort_values(by="index")[["lon", "lat"]]
5✔
424
        if len(df) > 1:
5✔
425
            return LineString(df.values)
5✔
426

427
    # Group the ways and create a LineString for each one.  way_lines is a
428
    # Series where the index is the way id and the value is the LineString.
429
    # Merge it with the waytags to get a single GeoDataFrame of ways
430
    waynodes = waynodes.merge(
5✔
431
        node_points, left_on="ref", right_on="id", suffixes=("", "_nodes")
432
    )
433
    way_lines = waynodes.groupby("id", group_keys=False).apply(
5✔
434
        wayline, include_groups=False
435
    )
436
    ways = waytags.set_index("id").set_geometry(way_lines, crs=_crs)
5✔
437
    ways.reset_index(inplace=True)
5✔
438

439
    return ways
5✔
440

441

442
if __name__ == "__main__":
5✔
443
    pass
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc