• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

DaveFoss / DAVE_data / 10505325618

22 Aug 2024 09:18AM UTC coverage: 86.4%. Remained the same
10505325618

push

github

tbanze
changed project name in rtd config

41 of 47 branches covered (87.23%)

Branch coverage included in aggregate %.

175 of 203 relevant lines covered (86.21%)

4.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.76
/src/dave_data/datapool/osm_request.py
1
from collections import namedtuple
5✔
2
from time import sleep
5✔
3
from urllib.parse import urlencode
5✔
4
from urllib.request import urlopen
5✔
5

6
import pandas as pd
5✔
7
from defusedxml.ElementTree import fromstring
5✔
8
from geopandas import GeoDataFrame
5✔
9
from pandas import DataFrame
5✔
10
from pandas import concat
5✔
11
from pandas import to_datetime
5✔
12
from shapely.geometry import LineString
5✔
13
from shapely.geometry import Point
5✔
14
from six import string_types
5✔
15

16
from dave_data.core import Data
5✔
17
from dave_data.core import MetaData
5✔
18

19

20
def osm_settings():
5✔
21
    """
22
    This function returns a dictonary with the DaVe settings for used data and
23
    assumptions
24
    """
25
    settings = {
5✔
26
        # osm time delay (because osm doesn't alowed more than 1 request per
27
        # second)
28
        "osm_time_delay": 60,  # in seconds
29
        # osm considered area (data for this area will be downloaded and
30
        # impplemented in database)
31
        "osm_area": "germany",
32
        # osm tags: (type: (osm key, osm tags, osm type, parameter))
33
        "osm_tags": {
34
            "road": (
35
                "highway",
36
                [
37
                    "secondary",
38
                    "tertiary",
39
                    "unclassified",
40
                    "residential",
41
                    "living_street",
42
                    "footway",
43
                    "track",
44
                    "path",
45
                ],
46
                ["way"],
47
                ["geometry", "name", "highway", "surface"],
48
                "id",
49
            ),
50
            "road_plot": (
51
                "highway",
52
                ["motorway", "trunk", "primary"],
53
                ["way"],
54
                ["geometry", "name", "id", "surface"],
55
            ),
56
            "landuse": (
57
                "landuse",
58
                True,
59
                ["way", "relation"],
60
                ["landuse", "geometry", "name", "id", "surface"],
61
            ),
62
            "leisure": (
63
                "leisure",
64
                ["golf_course", "garden", "park"],
65
                ["way", "relation"],
66
                [
67
                    "leisure",
68
                    "landuse",
69
                    "natural",
70
                    "name",
71
                    "geometry",
72
                    "id",
73
                    "surface",
74
                ],
75
            ),
76
            "natural": (
77
                "natural",
78
                ["scrub", "grassland", "water", "wood"],
79
                ["way", "relation"],
80
                [
81
                    "natural",
82
                    "landuse",
83
                    "leisure",
84
                    "name",
85
                    "geometry",
86
                    "id",
87
                    "surface",
88
                ],
89
            ),
90
            "building": (
91
                "building",
92
                True,
93
                ["way"],
94
                [
95
                    "addr:housenumber",
96
                    "addr:street",
97
                    "addr:suburb",
98
                    "amenity",
99
                    "building",
100
                    "building:levels",
101
                    "geometry",
102
                    "name",
103
                    "id",
104
                ],
105
            ),
106
            "railway": (
107
                "railway",
108
                [
109
                    "construction",
110
                    "disused",
111
                    "light_rail",
112
                    "monorail",
113
                    "narrow_gauge",
114
                    "rail",
115
                    "subway",
116
                    "tram",
117
                ],
118
                ["way"],
119
                [
120
                    "name",
121
                    "railway",
122
                    "geometry",
123
                    "tram",
124
                    "train",
125
                    "usage",
126
                    "voltage",
127
                    "id",
128
                ],
129
            ),
130
            "waterway": (
131
                "waterway",
132
                [
133
                    "river",
134
                    "stream",
135
                    "canal",
136
                    "tidal_channel ",
137
                    "pressurised",
138
                    "drain",
139
                ],
140
                ["way"],
141
                ["name", "waterway", "geometry", "depth", "width", "id"],
142
            ),
143
        },
144
        # osm categories
145
        "buildings_residential": [
146
            "apartments",
147
            "detached",
148
            "dormitory",
149
            "dwelling_house",
150
            "farm",
151
            "house",
152
            "houseboat",
153
            "residential",
154
            "semidetached_house",
155
            "static_caravan",
156
            "terrace",
157
            "yes",
158
        ],
159
        "buildings_commercial": [
160
            "commercial",
161
            "hall",
162
            "industrial",
163
            "kindergarten",
164
            "kiosk",
165
            "office",
166
            "retail",
167
            "school",
168
            "supermarket",
169
            "warehouse",
170
        ],
171
    }
172
    return settings
5✔
173

174

175
def osm_request(data_type, area):
5✔
176
    """
177
    This function requests OSM data from database or OSM directly
178

179
    Examples
180
    --------
181
    >>> from shapely import box
182
    >>> streets = osm_request("road", box(13.409, 52.519, 13.41, 52.52))
183
    >>> len(streets.data) > 0
184
    True
185

186
    """
187
    data_param = osm_settings()["osm_tags"][data_type]
5✔
188
    request_data = GeoDataFrame([])
5✔
189
    meta_data = None
5✔
190
    data = GeoDataFrame
5✔
191
    for osm_type in data_param[2]:
5✔
192
        # create tags
193
        tags = (
5✔
194
            f'{data_param[0]}~"{"|".join(data_param[1])}"'
195
            if isinstance(data_param[1], list)
196
            else f"{data_param[0]}"
197
        )
198
        # get data from OSM directly via API query
199
        data, meta_data = query_osm(osm_type, area, recurse="down", tags=tags)
5✔
200
        request_data = concat([request_data, data], ignore_index=True)
5✔
201
    meta = MetaData(
5✔
202
        source_license="ODBL", source_date=None, organisation="OpenStreetMap"
203
    )
204
    return Data(
5✔
205
        name="OSM roads filtered",
206
        description="Some description",
207
        data=data,
208
        meta=meta,
209
        polygon=area,
210
        tags=["roads", "osm"],
211
    )
212

213

214
# --- request directly from OSM via Overpass API and geopandas_osm package
215

216
# This functions are based on the geopandas_osm python package, which was
217
# published under the # following license:
218

219
# The MIT License (MIT)
220

221
# Copyright (c) 2014 Jacob Wasserman
222

223
# Permission is hereby granted, free of charge, to any person obtaining a copy
224
# of this software and associated documentation files (the "Software"), to deal
225
# in the Software without restriction, including without limitation the rights
226
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
227
# copies of the Software, and to permit persons to whom the Software is
228
# furnished to do so, subject to the following conditions:
229

230
# The above copyright notice and this permission notice shall be included in
231
# all copies or substantial portions of the Software.
232

233
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
234
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
235
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
236
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
237
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
238
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
239
# SOFTWARE.
240

241

242
OSMData = namedtuple(
5✔
243
    "OSMData", ("nodes", "waynodes", "waytags", "relmembers", "reltags")
244
)
245
_crs = "epsg:4326"
5✔
246

247
# Tags to remove so we don't clobber the output. This list comes from
248
# osmtogeojson's index.js (https://github.com/tyrasd/osmtogeojson)
249
uninteresting_tags = {
5✔
250
    "source",
251
    "source_ref",
252
    "source:ref",
253
    "history",
254
    "attribution",
255
    "created_by",
256
    "tiger:county",
257
    "tiger:tlid",
258
    "tiger:upload_uuid",
259
}
260

261

262
# http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
263
def query_osm(
5✔
264
    typ, bbox=None, recurse=None, tags="", raw=False, meta=False, **kwargs
265
):
266
    """
267
    Query the Overpass API to obtain OpenStreetMap data.
268

269
    See also:
270
    http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
271

272
    The OSM XML data is parsed into an intermediate set of DataFrames.
273
    By passing in 'render=False', this will return these DataFrames stored
274
    as the OSMData namedtuple. If render is True, then the DataFrames
275
    are built into their corresponding geometries.
276

277
    Parameters
278
    ----------
279
    typ : {'node', 'way', 'relation'}
280
        The type of OSM data to query
281
    bbox : (min lon, min lat, max lon, max lat) bounding box
282
        Optional bounding box to restrict the query. Unless the query
283
        is extremely restricted, you usually want to specify this.
284
        It can be retrieved from GeoPandas objects as 'df.total_bounds' or
285
        from Shapely objects as 'geom.bounds'
286
    recurse : {'up, 'down', 'uprel', 'downrel'}
287
        This is used to get more data than the original query. If 'typ' is
288
        'way', you'll usually want this set to 'down' which grabs all nodes
289
        of the matching ways
290
    tags : string or list of query strings
291
        See also the OverpassQL (referenced above) for more tag options
292
        Examples:
293
            tags='highway'
294
                Matches objects with a 'highway' tag
295
            tags='highway=motorway' <-- Matches ob
296
                Matches objects where the 'highway' tag is 'motorway'
297
            tags='name~[Mm]agazine'
298
                Match if the 'name' tag matches the regular expression
299

300
            Specify a list of tag requests to match all of them
301
            tags=['highway', 'name~"^Magazine"']
302
                Match tags that have 'highway' and where 'name' starts
303
                with 'Magazine'
304
    raw : boolean, default False
305
        Return the raw XML data returned by the request
306
    meta : boolean, default False
307
        Indicates whether to query the metadata with each OSM object. This
308
        includes the changeset, timestamp, uid, user, and version.
309

310
    Returns
311
    -------
312
    df - GeoDataFrame
313
    Note that there's probably a bit more filtering required to get the
314
    exact desired data. For example if you only want ways, you may want
315
    to grab only the linestrings like:
316

317
    Examples
318
    --------
319
    >>> #  df = df[df.type == 'LineString']
320

321
    """
322
    url = _build_url(typ, bbox, recurse, tags, meta)
5✔
323
    # add time delay because osm doesn't alowed more than 1 request per second.
324
    time_delay = osm_settings()["osm_time_delay"]
5✔
325

326
    # TODO: Raise on non-200 (or 400-599)
327
    # with urlopen(url) as response:
328
    #     content = response.read()
329
    while 1:
5✔
330
        try:
5✔
331
            if not url.startswith(("http:", "https:")):
5✔
332
                raise ValueError("URL must start with 'http:' or 'https:'")
×
333

334
            with urlopen(url) as response:  # noqa: S310
5✔
335
                content = response.read()
5✔
336
                if response.getcode() == 200:
5✔
337
                    break
5✔
338
        except Exception as inst:
×
339
            print(f'\n Retry OSM query because of "{inst}"')
×
340
            # add time delay
341
            sleep(time_delay)
×
342

343
    # get meta informations
344
    meta_data = pd.Series({"meta": "coming soon"})
5✔
345

346
    if raw:
5✔
347
        return content, meta_data
×
348
    return read_osm(content, **kwargs), meta_data
5✔
349

350

351
def _build_url(typ, bbox=None, recurse=None, tags="", meta=False):
5✔
352
    recurse_map = {
5✔
353
        "up": "<",
354
        "uprel": "<<",
355
        "down": ">",
356
        "downrel": ">>",
357
    }
358
    if recurse is None:
5✔
359
        recursestr = ""
×
360
    else:
361
        try:
5✔
362
            recursestr = recurse_map[recurse]
5✔
363
        except KeyError as k_exception:
×
364
            raise ValueError(
×
365
                "Unrecognized recurse value '{}'. "
366
                "Must be one of: {}.".format(
367
                    recurse, ", ".join(recurse_map.keys())
368
                )
369
            ) from k_exception
370

371
    # Allow tags to be a single string
372
    if isinstance(tags, string_types) and tags:
5✔
373
        tags = [tags]
5✔
374
    queries = "".join(f"[{t}]" for t in tags)
5✔
375

376
    # Overpass QL takes the bounding box as
377
    # (min latitude, min longitude, max latitude, max longitude)
378
    if bbox is None:
5✔
379
        bboxstr = ""
×
380
    else:
381
        bboxstr = '(poly:"{}")'.format(
5✔
382
            " ".join(f"{c[1]} {c[0]}" for c in bbox.exterior.coords)
383
        )
384

385
    metastr = "meta" if meta else ""
5✔
386

387
    query = f"({typ}{bboxstr}{queries};{recursestr};);out {metastr};"
5✔
388

389
    url = "".join(
5✔
390
        [
391
            "http://www.overpass-api.de/api/interpreter?",
392
            urlencode({"data": query}),
393
        ]
394
    )
395

396
    return url
5✔
397

398

399
def read_osm(content, render=True, **kwargs):
5✔
400
    """
401
    Parse OSM XML data and store as several DataFrames. Optionally "render"
402
    the DataFrames to GeoDataFrames.
403

404
    """
405
    doc = fromstring(content)
5✔
406

407
    nodes = read_nodes(doc)
5✔
408
    waynodes, waytags = read_ways(doc)
5✔
409
    relmembers, reltags = read_relations(doc)
5✔
410

411
    # check if all requested variables are empty
412
    # if nodes.empty and waynodes.empty and waytags.empty and relmembers.empty
413
    # and reltags.empty:
414

415
    data = OSMData(nodes, waynodes, waytags, relmembers, reltags)
5✔
416

417
    if render:
5✔
418
        data = render_to_gdf(data, **kwargs)
5✔
419
    return data
5✔
420

421

422
def read_nodes(doc):
5✔
423
    #   Example:
424
    #   <node id="1705717514" lat="42.3630798" lon="-71.0997601">
425
    #       <tag k="crossing" v="zebra"/>
426
    #       <tag k="highway" v="crossing"/>
427
    #       <tag k="source" v="Bing"/>
428
    #   </node>
429
    nodes = [_element_to_dict(xmlnode) for xmlnode in doc.findall("node")]
5✔
430
    nodes = _dict_to_dataframe(nodes)
5✔
431
    if not nodes.empty:
5✔
432
        nodes["lon"] = nodes["lon"].astype(float)
5✔
433
        nodes["lat"] = nodes["lat"].astype(float)
5✔
434

435
    return nodes
5✔
436

437

438
def _element_to_dict(element):
5✔
439
    d = element.attrib.copy()
5✔
440
    for t in element.findall("tag"):
5✔
441
        k = t.attrib["k"]
5✔
442
        if k not in uninteresting_tags:
5✔
443
            d[k] = t.attrib["v"]
5✔
444

445
    return d
5✔
446

447

448
def _dict_to_dataframe(d):
5✔
449
    df = DataFrame.from_dict(d)
5✔
450
    if "timestamp" in df:
5✔
451
        df["timestamp"] = to_datetime(df["timestamp"])
×
452

453
    return df
5✔
454

455

456
def read_ways(doc):
5✔
457
    #   Example:
458
    #   <way id="8614593">
459
    #       <nd ref="61326730"/>
460
    #       <nd ref="61326036"/>
461
    #       <nd ref="61321194"/>
462
    #       <tag k="attribution" v="Office of Geographic and Environmental
463
    #           Information (MassGIS)"/>
464
    #       <tag k="condition" v="fair"/>
465
    #       <tag k="created_by" v="JOSM"/>
466
    #       <tag k="highway" v="residential"/>
467
    #       <tag k="lanes" v="2"/>
468
    #       <tag k="massgis:way_id" v="171099"/>
469
    #       <tag k="name" v="Centre Street"/>
470
    #       <tag k="source" v="massgis_import_v0.1_20071008165629"/>
471
    #       <tag k="width" v="13.4"/>
472
    #   </way>
473
    waytags = []
5✔
474
    waynodes = []
5✔
475
    for xmlway in doc.findall("way"):
5✔
476
        wayid = xmlway.attrib["id"]
5✔
477
        for i, xmlnd in enumerate(xmlway.findall("nd")):
5✔
478
            d = xmlnd.attrib.copy()
5✔
479
            d["id"] = wayid
5✔
480
            d["index"] = i
5✔
481
            waynodes.append(d)
5✔
482

483
        tags = _element_to_dict(xmlway)
5✔
484
        waytags.append(tags)
5✔
485

486
    waynodes = _dict_to_dataframe(waynodes)
5✔
487
    waytags = _dict_to_dataframe(waytags)
5✔
488

489
    return waynodes, waytags
5✔
490

491

492
def read_relations(doc):
5✔
493
    # Example:
494
    #   <relation id="1933745">
495
    #     <member type="way" ref="134055159" role="outer"/>
496
    #     <member type="way" ref="260533047" role="outer"/>
497
    #     <member type="way" ref="142867799" role="outer"/>
498
    #     <member type="way" ref="134063352" role="outer"/>
499
    #     <member type="way" ref="142803038" role="outer"/>
500
    #     <member type="way" ref="134056144" role="outer"/>
501
    #     <member type="way" ref="134056141" role="outer"/>
502
    #     <tag k="admin_level" v="8"/>
503
    #     <tag k="boundary" v="administrative"/>
504
    #     <tag k="name" v="Cambridge"/>
505
    #     <tag k="type" v="boundary"/>
506
    #     <tag k="wikipedia" v="en:Cambridge, Massachusetts"/>
507
    #   </relation>
508
    reltags = []
5✔
509
    relmembers = []
5✔
510
    for xmlrel in doc.findall("relation"):
5✔
511
        relid = xmlrel.attrib["id"]
×
512
        for i, xmlmember in enumerate(xmlrel.findall("member")):
×
513
            d = xmlmember.attrib.copy()
×
514
            d["id"] = relid
×
515
            d["index"] = i
×
516
            relmembers.append(d)
×
517

518
        tags = _element_to_dict(xmlrel)
×
519
        reltags.append(tags)
×
520

521
    relmembers = _dict_to_dataframe(relmembers)
5✔
522
    reltags = _dict_to_dataframe(reltags)
5✔
523
    return relmembers, reltags
5✔
524

525

526
def render_to_gdf(osmdata, drop_untagged=True):
5✔
527
    nodes = render_nodes(osmdata.nodes, drop_untagged)
5✔
528
    ways = render_ways(osmdata.nodes, osmdata.waynodes, osmdata.waytags)
5✔
529

530
    # set landuse tag from origin relation at relation members who has no
531
    # landuse tag
532
    if (
5✔
533
        (ways is not None)
534
        and ("landuse" in ways.keys())
535
        and (not osmdata.relmembers.empty)
536
    ):
537
        for i, way in ways.iterrows():
×
538
            # get and add origin relation id
539
            rel_id = (
×
540
                osmdata.relmembers[osmdata.relmembers.ref == way.id].iloc[0].id
541
            )
542
            ways.at[i, "relation_id"] = rel_id
×
543
            # get and add origin relation landuse if needed
544
            osm_reltag = osmdata.reltags[osmdata.reltags.id == rel_id].iloc[0]
×
545
            if "landuse" in osm_reltag.keys() and str(way.landuse) == "nan":
×
546
                ways.at[i, "landuse"] = osm_reltag.landuse
×
547

548
    if ways is not None:
5✔
549
        nodes = concat([nodes, ways], ignore_index=True)
5✔
550
        nodes = nodes.set_geometry("geometry", crs=_crs)
5✔
551

552
    return nodes
5✔
553

554

555
def render_nodes(nodes, drop_untagged=True):
5✔
556
    # check if their are nodes
557
    if not nodes.empty:
5✔
558
        # Drop nodes that have no tags, convert lon/lat to points
559
        if drop_untagged:
5✔
560
            nodes = nodes.dropna(
5✔
561
                subset=nodes.columns.drop(["id", "lon", "lat"]), how="all"
562
            )
563
        points = [Point(x["lon"], x["lat"]) for i, x in nodes.iterrows()]
5✔
564
        nodes = nodes.drop(["lon", "lat"], axis=1)
5✔
565
        nodes = nodes.set_geometry(points, crs=_crs)
5✔
566

567
    return nodes
5✔
568

569

570
def render_ways(nodes, waynodes, waytags):
5✔
571
    if waynodes is None or waynodes.empty:
5✔
572
        return None
×
573

574
    node_points = nodes[["id", "lon", "lat"]]
5✔
575

576
    def wayline(df):
5✔
577
        df = df.sort_values(by="index")[["lon", "lat"]]
5✔
578
        if len(df) > 1:
5✔
579
            return LineString(df.values)
5✔
580

581
    # Group the ways and create a LineString for each one.  way_lines is a
582
    # Series where the index is the way id and the value is the LineString.
583
    # Merge it with the waytags to get a single GeoDataFrame of ways
584
    waynodes = waynodes.merge(
5✔
585
        node_points, left_on="ref", right_on="id", suffixes=("", "_nodes")
586
    )
587
    way_lines = waynodes.groupby("id", group_keys=False).apply(
5✔
588
        wayline, include_groups=False
589
    )
590
    ways = waytags.set_index("id").set_geometry(way_lines, crs=_crs)
5✔
591
    ways.reset_index(inplace=True)
5✔
592

593
    return ways
5✔
594

595

596
if __name__ == "__main__":
5✔
597
    pass
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc