• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

DaveFoss / DAVE_data / 10490042032

21 Aug 2024 12:37PM UTC coverage: 86.4% (+5.5%) from 80.894%
10490042032

push

github

uvchik
Fix doctest

41 of 47 branches covered (87.23%)

Branch coverage included in aggregate %.

175 of 203 relevant lines covered (86.21%)

4.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.76
/src/dave_data/datapool/osm_request.py
1
from collections import namedtuple
5✔
2
from time import sleep
5✔
3
from urllib.parse import urlencode
5✔
4
from urllib.request import urlopen
5✔
5

6
import pandas as pd
5✔
7
from defusedxml.ElementTree import fromstring
5✔
8
from geopandas import GeoDataFrame
5✔
9
from pandas import DataFrame
5✔
10
from pandas import concat
5✔
11
from pandas import to_datetime
5✔
12
from shapely.geometry import LineString
5✔
13
from shapely.geometry import Point
5✔
14
from six import string_types
5✔
15

16
from dave_data.core import Data
5✔
17
from dave_data.core import MetaData
5✔
18

19

20
def osm_settings():
5✔
21
    """
22
    This function returns a dictonary with the DaVe settings for used data and
23
    assumptions
24
    """
25
    settings = {
5✔
26
        # osm time delay (because osm doesn't alowed more than 1 request per
27
        # second)
28
        "osm_time_delay": 60,  # in seconds
29
        # osm considered area (data for this area will be downloaded and
30
        # impplemented in database)
31
        "osm_area": "germany",
32
        # osm tags: (type: (osm key, osm tags, osm type, parameter))
33
        "osm_tags": {
34
            "road": (
35
                "highway",
36
                [
37
                    "secondary",
38
                    "tertiary",
39
                    "unclassified",
40
                    "residential",
41
                    "living_street",
42
                    "footway",
43
                    "track",
44
                    "path",
45
                ],
46
                ["way"],
47
                ["geometry", "name", "highway", "surface"],
48
                "id",
49
            ),
50
            "road_plot": (
51
                "highway",
52
                ["motorway", "trunk", "primary"],
53
                ["way"],
54
                ["geometry", "name", "id", "surface"],
55
            ),
56
            "landuse": (
57
                "landuse",
58
                True,
59
                ["way", "relation"],
60
                ["landuse", "geometry", "name", "id", "surface"],
61
            ),
62
            "leisure": (
63
                "leisure",
64
                ["golf_course", "garden", "park"],
65
                ["way", "relation"],
66
                [
67
                    "leisure",
68
                    "landuse",
69
                    "natural",
70
                    "name",
71
                    "geometry",
72
                    "id",
73
                    "surface",
74
                ],
75
            ),
76
            "natural": (
77
                "natural",
78
                ["scrub", "grassland", "water", "wood"],
79
                ["way", "relation"],
80
                [
81
                    "natural",
82
                    "landuse",
83
                    "leisure",
84
                    "name",
85
                    "geometry",
86
                    "id",
87
                    "surface",
88
                ],
89
            ),
90
            "building": (
91
                "building",
92
                True,
93
                ["way"],
94
                [
95
                    "addr:housenumber",
96
                    "addr:street",
97
                    "addr:suburb",
98
                    "amenity",
99
                    "building",
100
                    "building:levels",
101
                    "geometry",
102
                    "name",
103
                    "id",
104
                ],
105
            ),
106
            "railway": (
107
                "railway",
108
                [
109
                    "construction",
110
                    "disused",
111
                    "light_rail",
112
                    "monorail",
113
                    "narrow_gauge",
114
                    "rail",
115
                    "subway",
116
                    "tram",
117
                ],
118
                ["way"],
119
                [
120
                    "name",
121
                    "railway",
122
                    "geometry",
123
                    "tram",
124
                    "train",
125
                    "usage",
126
                    "voltage",
127
                    "id",
128
                ],
129
            ),
130
            "waterway": (
131
                "waterway",
132
                [
133
                    "river",
134
                    "stream",
135
                    "canal",
136
                    "tidal_channel ",
137
                    "pressurised",
138
                    "drain",
139
                ],
140
                ["way"],
141
                ["name", "waterway", "geometry", "depth", "width", "id"],
142
            ),
143
        },
144
        # osm categories
145
        "buildings_residential": [
146
            "apartments",
147
            "detached",
148
            "dormitory",
149
            "dwelling_house",
150
            "farm",
151
            "house",
152
            "houseboat",
153
            "residential",
154
            "semidetached_house",
155
            "static_caravan",
156
            "terrace",
157
            "yes",
158
        ],
159
        "buildings_commercial": [
160
            "commercial",
161
            "hall",
162
            "industrial",
163
            "kindergarten",
164
            "kiosk",
165
            "office",
166
            "retail",
167
            "school",
168
            "supermarket",
169
            "warehouse",
170
        ],
171
        # --- assumptions at power grid generating:
172
        # mv level
173
        "mv_voltage": 20,
174
        # hours per year
175
        "h_per_a": 8760,
176
        # power factors for loads
177
        "cos_phi_residential": 0.95,  # induktiv
178
        "cos_phi_industrial": 0.75,  # induktiv
179
        "cos_phi_commercial": 0.75,  # induktiv
180
        # avarage load values for ehv, hv, and mv loads
181
        "residential_load": 2,  # in MW/km²
182
        "industrial_load": 10,  # in MW/km²
183
        "commercial_load": 3,  # in MW/km²
184
        # --- assumptions at pandapower convert:
185
        # lines standard types
186
        # dummy value, must be changed
187
        "mv_line_std_type": "NA2XS2Y 1x240 RM/25 12/20 kV",
188
        "lv_line_std_type": "NAYY 4x150 SE",  # dummy value, must be changed
189
        # trafo parameters for ehv/ehv and  ehv/hv. The dummy values are
190
        # based on the pandapower
191
        # standarttype "160 MVA 380/110 kV" which is the biggest model
192
        "trafo_vkr_percent": 0.25,  # dummy value
193
        "trafo_vk_percent": 12.2,  # dummy value
194
        "trafo_pfe_kw": 60,  # dummy value
195
        "trafo_i0_percent": 0.06,  # dummy value
196
        # trafo standard types
197
        # dummy value, must be changed
198
        "hvmv_trafo_std_type": "63 MVA 110/20 kV",
199
        # dummy value, must be changed
200
        "mvlv_trafo_std_type": "0.63 MVA 20/0.4 kV",
201
        # --- assumptions at gas grid generating:
202
        # hp level
203
        "hp_nodes_height_m": 1,  # dummy value, must be changed
204
        # value based on shutterwald data, must be changed
205
        "hp_pipes_k_mm": 0.1,
206
        "hp_pipes_tfluid_k": 273.15,  # dummy value , must be changed
207
        # --- assumptions at model utils:
208
        "min_number_nodes": 4,
209
    }
210
    return settings
5✔
211

212

213
def osm_request(data_type, area):
5✔
214
    """
215
    This function requests OSM data from database or OSM directly
216

217
    Examples
218
    --------
219
    >>> from shapely import box
220
    >>> streets = osm_request("road", box(13.409, 52.519, 13.41, 52.52))
221
    >>> len(streets.data) > 0
222
    True
223

224
    """
225
    data_param = osm_settings()["osm_tags"][data_type]
5✔
226
    request_data = GeoDataFrame([])
5✔
227
    meta_data = None
5✔
228
    data = GeoDataFrame
5✔
229
    for osm_type in data_param[2]:
5✔
230
        # create tags
231
        tags = (
5✔
232
            f'{data_param[0]}~"{"|".join(data_param[1])}"'
233
            if isinstance(data_param[1], list)
234
            else f"{data_param[0]}"
235
        )
236
        # get data from OSM directly via API query
237
        data, meta_data = query_osm(osm_type, area, recurse="down", tags=tags)
5✔
238
        request_data = concat([request_data, data], ignore_index=True)
5✔
239
    meta = MetaData(
5✔
240
        source_license="ODBL", source_date=None, organisation="OpenStreetMap"
241
    )
242
    return Data(
5✔
243
        name="OSM roads filtered",
244
        description="Some description",
245
        data=data,
246
        meta=meta,
247
        polygon=area,
248
        tags=["roads", "osm"],
249
    )
250

251

252
# --- request directly from OSM via Overpass API and geopandas_osm package
253

254
# This functions are based on the geopandas_osm python package, which was
255
# published under the # following license:
256

257
# The MIT License (MIT)
258

259
# Copyright (c) 2014 Jacob Wasserman
260

261
# Permission is hereby granted, free of charge, to any person obtaining a copy
262
# of this software and associated documentation files (the "Software"), to deal
263
# in the Software without restriction, including without limitation the rights
264
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
265
# copies of the Software, and to permit persons to whom the Software is
266
# furnished to do so, subject to the following conditions:
267

268
# The above copyright notice and this permission notice shall be included in
269
# all copies or substantial portions of the Software.
270

271
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
272
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
273
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
274
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
275
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
276
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
277
# SOFTWARE.
278

279

280
OSMData = namedtuple(
5✔
281
    "OSMData", ("nodes", "waynodes", "waytags", "relmembers", "reltags")
282
)
283
_crs = "epsg:4326"
5✔
284

285
# Tags to remove so we don't clobber the output. This list comes from
286
# osmtogeojson's index.js (https://github.com/tyrasd/osmtogeojson)
287
uninteresting_tags = {
5✔
288
    "source",
289
    "source_ref",
290
    "source:ref",
291
    "history",
292
    "attribution",
293
    "created_by",
294
    "tiger:county",
295
    "tiger:tlid",
296
    "tiger:upload_uuid",
297
}
298

299

300
# http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
301
def query_osm(
5✔
302
    typ, bbox=None, recurse=None, tags="", raw=False, meta=False, **kwargs
303
):
304
    """
305
    Query the Overpass API to obtain OpenStreetMap data.
306

307
    See also:
308
    http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
309

310
    The OSM XML data is parsed into an intermediate set of DataFrames.
311
    By passing in 'render=False', this will return these DataFrames stored
312
    as the OSMData namedtuple. If render is True, then the DataFrames
313
    are built into their corresponding geometries.
314

315
    Parameters
316
    ----------
317
    typ : {'node', 'way', 'relation'}
318
        The type of OSM data to query
319
    bbox : (min lon, min lat, max lon, max lat) bounding box
320
        Optional bounding box to restrict the query. Unless the query
321
        is extremely restricted, you usually want to specify this.
322
        It can be retrieved from GeoPandas objects as 'df.total_bounds' or
323
        from Shapely objects as 'geom.bounds'
324
    recurse : {'up, 'down', 'uprel', 'downrel'}
325
        This is used to get more data than the original query. If 'typ' is
326
        'way', you'll usually want this set to 'down' which grabs all nodes
327
        of the matching ways
328
    tags : string or list of query strings
329
        See also the OverpassQL (referenced above) for more tag options
330
        Examples:
331
            tags='highway'
332
                Matches objects with a 'highway' tag
333
            tags='highway=motorway' <-- Matches ob
334
                Matches objects where the 'highway' tag is 'motorway'
335
            tags='name~[Mm]agazine'
336
                Match if the 'name' tag matches the regular expression
337

338
            Specify a list of tag requests to match all of them
339
            tags=['highway', 'name~"^Magazine"']
340
                Match tags that have 'highway' and where 'name' starts
341
                with 'Magazine'
342
    raw : boolean, default False
343
        Return the raw XML data returned by the request
344
    meta : boolean, default False
345
        Indicates whether to query the metadata with each OSM object. This
346
        includes the changeset, timestamp, uid, user, and version.
347

348
    Returns
349
    -------
350
    df - GeoDataFrame
351
    Note that there's probably a bit more filtering required to get the
352
    exact desired data. For example if you only want ways, you may want
353
    to grab only the linestrings like:
354

355
    Examples
356
    --------
357
    >>> #  df = df[df.type == 'LineString']
358

359
    """
360
    url = _build_url(typ, bbox, recurse, tags, meta)
5✔
361
    # add time delay because osm doesn't alowed more than 1 request per second.
362
    time_delay = osm_settings()["osm_time_delay"]
5✔
363

364
    # TODO: Raise on non-200 (or 400-599)
365
    # with urlopen(url) as response:
366
    #     content = response.read()
367
    while 1:
5✔
368
        try:
5✔
369
            if not url.startswith(("http:", "https:")):
5✔
370
                raise ValueError("URL must start with 'http:' or 'https:'")
×
371

372
            with urlopen(url) as response:  # noqa: S310
5✔
373
                content = response.read()
5✔
374
                if response.getcode() == 200:
5✔
375
                    break
5✔
376
        except Exception as inst:
×
377
            print(f'\n Retry OSM query because of "{inst}"')
×
378
            # add time delay
379
            sleep(time_delay)
×
380

381
    # get meta informations
382
    meta_data = pd.Series({"meta": "coming soon"})
5✔
383

384
    if raw:
5✔
385
        return content, meta_data
×
386
    return read_osm(content, **kwargs), meta_data
5✔
387

388

389
def _build_url(typ, bbox=None, recurse=None, tags="", meta=False):
5✔
390
    recurse_map = {
5✔
391
        "up": "<",
392
        "uprel": "<<",
393
        "down": ">",
394
        "downrel": ">>",
395
    }
396
    if recurse is None:
5✔
397
        recursestr = ""
×
398
    else:
399
        try:
5✔
400
            recursestr = recurse_map[recurse]
5✔
401
        except KeyError as k_exception:
×
402
            raise ValueError(
×
403
                "Unrecognized recurse value '{}'. "
404
                "Must be one of: {}.".format(
405
                    recurse, ", ".join(recurse_map.keys())
406
                )
407
            ) from k_exception
408

409
    # Allow tags to be a single string
410
    if isinstance(tags, string_types) and tags:
5✔
411
        tags = [tags]
5✔
412
    queries = "".join(f"[{t}]" for t in tags)
5✔
413

414
    # Overpass QL takes the bounding box as
415
    # (min latitude, min longitude, max latitude, max longitude)
416
    if bbox is None:
5✔
417
        bboxstr = ""
×
418
    else:
419
        bboxstr = '(poly:"{}")'.format(
5✔
420
            " ".join(f"{c[1]} {c[0]}" for c in bbox.exterior.coords)
421
        )
422

423
    metastr = "meta" if meta else ""
5✔
424

425
    query = f"({typ}{bboxstr}{queries};{recursestr};);out {metastr};"
5✔
426

427
    url = "".join(
5✔
428
        [
429
            "http://www.overpass-api.de/api/interpreter?",
430
            urlencode({"data": query}),
431
        ]
432
    )
433

434
    return url
5✔
435

436

437
def read_osm(content, render=True, **kwargs):
5✔
438
    """
439
    Parse OSM XML data and store as several DataFrames. Optionally "render"
440
    the DataFrames to GeoDataFrames.
441

442
    """
443
    doc = fromstring(content)
5✔
444

445
    nodes = read_nodes(doc)
5✔
446
    waynodes, waytags = read_ways(doc)
5✔
447
    relmembers, reltags = read_relations(doc)
5✔
448

449
    # check if all requested variables are empty
450
    # if nodes.empty and waynodes.empty and waytags.empty and relmembers.empty
451
    # and reltags.empty:
452

453
    data = OSMData(nodes, waynodes, waytags, relmembers, reltags)
5✔
454

455
    if render:
5✔
456
        data = render_to_gdf(data, **kwargs)
5✔
457
    return data
5✔
458

459

460
def read_nodes(doc):
5✔
461
    #   Example:
462
    #   <node id="1705717514" lat="42.3630798" lon="-71.0997601">
463
    #       <tag k="crossing" v="zebra"/>
464
    #       <tag k="highway" v="crossing"/>
465
    #       <tag k="source" v="Bing"/>
466
    #   </node>
467
    nodes = [_element_to_dict(xmlnode) for xmlnode in doc.findall("node")]
5✔
468
    nodes = _dict_to_dataframe(nodes)
5✔
469
    if not nodes.empty:
5✔
470
        nodes["lon"] = nodes["lon"].astype(float)
5✔
471
        nodes["lat"] = nodes["lat"].astype(float)
5✔
472

473
    return nodes
5✔
474

475

476
def _element_to_dict(element):
5✔
477
    d = element.attrib.copy()
5✔
478
    for t in element.findall("tag"):
5✔
479
        k = t.attrib["k"]
5✔
480
        if k not in uninteresting_tags:
5✔
481
            d[k] = t.attrib["v"]
5✔
482

483
    return d
5✔
484

485

486
def _dict_to_dataframe(d):
5✔
487
    df = DataFrame.from_dict(d)
5✔
488
    if "timestamp" in df:
5✔
489
        df["timestamp"] = to_datetime(df["timestamp"])
×
490

491
    return df
5✔
492

493

494
def read_ways(doc):
5✔
495
    #   Example:
496
    #   <way id="8614593">
497
    #       <nd ref="61326730"/>
498
    #       <nd ref="61326036"/>
499
    #       <nd ref="61321194"/>
500
    #       <tag k="attribution" v="Office of Geographic and Environmental
501
    #           Information (MassGIS)"/>
502
    #       <tag k="condition" v="fair"/>
503
    #       <tag k="created_by" v="JOSM"/>
504
    #       <tag k="highway" v="residential"/>
505
    #       <tag k="lanes" v="2"/>
506
    #       <tag k="massgis:way_id" v="171099"/>
507
    #       <tag k="name" v="Centre Street"/>
508
    #       <tag k="source" v="massgis_import_v0.1_20071008165629"/>
509
    #       <tag k="width" v="13.4"/>
510
    #   </way>
511
    waytags = []
5✔
512
    waynodes = []
5✔
513
    for xmlway in doc.findall("way"):
5✔
514
        wayid = xmlway.attrib["id"]
5✔
515
        for i, xmlnd in enumerate(xmlway.findall("nd")):
5✔
516
            d = xmlnd.attrib.copy()
5✔
517
            d["id"] = wayid
5✔
518
            d["index"] = i
5✔
519
            waynodes.append(d)
5✔
520

521
        tags = _element_to_dict(xmlway)
5✔
522
        waytags.append(tags)
5✔
523

524
    waynodes = _dict_to_dataframe(waynodes)
5✔
525
    waytags = _dict_to_dataframe(waytags)
5✔
526

527
    return waynodes, waytags
5✔
528

529

530
def read_relations(doc):
5✔
531
    # Example:
532
    #   <relation id="1933745">
533
    #     <member type="way" ref="134055159" role="outer"/>
534
    #     <member type="way" ref="260533047" role="outer"/>
535
    #     <member type="way" ref="142867799" role="outer"/>
536
    #     <member type="way" ref="134063352" role="outer"/>
537
    #     <member type="way" ref="142803038" role="outer"/>
538
    #     <member type="way" ref="134056144" role="outer"/>
539
    #     <member type="way" ref="134056141" role="outer"/>
540
    #     <tag k="admin_level" v="8"/>
541
    #     <tag k="boundary" v="administrative"/>
542
    #     <tag k="name" v="Cambridge"/>
543
    #     <tag k="type" v="boundary"/>
544
    #     <tag k="wikipedia" v="en:Cambridge, Massachusetts"/>
545
    #   </relation>
546
    reltags = []
5✔
547
    relmembers = []
5✔
548
    for xmlrel in doc.findall("relation"):
5✔
549
        relid = xmlrel.attrib["id"]
×
550
        for i, xmlmember in enumerate(xmlrel.findall("member")):
×
551
            d = xmlmember.attrib.copy()
×
552
            d["id"] = relid
×
553
            d["index"] = i
×
554
            relmembers.append(d)
×
555

556
        tags = _element_to_dict(xmlrel)
×
557
        reltags.append(tags)
×
558

559
    relmembers = _dict_to_dataframe(relmembers)
5✔
560
    reltags = _dict_to_dataframe(reltags)
5✔
561
    return relmembers, reltags
5✔
562

563

564
def render_to_gdf(osmdata, drop_untagged=True):
5✔
565
    nodes = render_nodes(osmdata.nodes, drop_untagged)
5✔
566
    ways = render_ways(osmdata.nodes, osmdata.waynodes, osmdata.waytags)
5✔
567

568
    # set landuse tag from origin relation at relation members who has no
569
    # landuse tag
570
    if (
5✔
571
        (ways is not None)
572
        and ("landuse" in ways.keys())
573
        and (not osmdata.relmembers.empty)
574
    ):
575
        for i, way in ways.iterrows():
×
576
            # get and add origin relation id
577
            rel_id = (
×
578
                osmdata.relmembers[osmdata.relmembers.ref == way.id].iloc[0].id
579
            )
580
            ways.at[i, "relation_id"] = rel_id
×
581
            # get and add origin relation landuse if needed
582
            osm_reltag = osmdata.reltags[osmdata.reltags.id == rel_id].iloc[0]
×
583
            if "landuse" in osm_reltag.keys() and str(way.landuse) == "nan":
×
584
                ways.at[i, "landuse"] = osm_reltag.landuse
×
585

586
    if ways is not None:
5✔
587
        nodes = concat([nodes, ways], ignore_index=True)
5✔
588
        nodes = nodes.set_geometry("geometry", crs=_crs)
5✔
589

590
    return nodes
5✔
591

592

593
def render_nodes(nodes, drop_untagged=True):
5✔
594
    # check if their are nodes
595
    if not nodes.empty:
5✔
596
        # Drop nodes that have no tags, convert lon/lat to points
597
        if drop_untagged:
5✔
598
            nodes = nodes.dropna(
5✔
599
                subset=nodes.columns.drop(["id", "lon", "lat"]), how="all"
600
            )
601
        points = [Point(x["lon"], x["lat"]) for i, x in nodes.iterrows()]
5✔
602
        nodes = nodes.drop(["lon", "lat"], axis=1)
5✔
603
        nodes = nodes.set_geometry(points, crs=_crs)
5✔
604

605
    return nodes
5✔
606

607

608
def render_ways(nodes, waynodes, waytags):
5✔
609
    if waynodes is None or waynodes.empty:
5✔
610
        return None
×
611

612
    node_points = nodes[["id", "lon", "lat"]]
5✔
613

614
    def wayline(df):
5✔
615
        df = df.sort_values(by="index")[["lon", "lat"]]
5✔
616
        if len(df) > 1:
5✔
617
            return LineString(df.values)
5✔
618

619
    # Group the ways and create a LineString for each one.  way_lines is a
620
    # Series where the index is the way id and the value is the LineString.
621
    # Merge it with the waytags to get a single GeoDataFrame of ways
622
    waynodes = waynodes.merge(
5✔
623
        node_points, left_on="ref", right_on="id", suffixes=("", "_nodes")
624
    )
625
    way_lines = waynodes.groupby("id", group_keys=False).apply(
5✔
626
        wayline, include_groups=False
627
    )
628
    ways = waytags.set_index("id").set_geometry(way_lines, crs=_crs)
5✔
629
    ways.reset_index(inplace=True)
5✔
630

631
    return ways
5✔
632

633

634
if __name__ == "__main__":
5✔
635
    pass
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc