10622329495

Committed 29 Aug 2024 08:40PM UTC coverage: 78.502% (-7.9%) from 86.4%

Build # 10622329495

Build Type

Pull #7

github

Committed by

uvchik

Commit Message

Revert "Remove file from reverted merge"

This reverts commit cf6ea2f8c.

Pull Request Pull Request #7: Area to polygon2

Run Details

59 of 84 branches covered (70.24%)

Branch coverage included in aggregate %.

266 of 330 relevant lines covered (80.61%)

4.03 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.62

/src/dave_data/datapool/osm_request.py

from collections import namedtuple
from time import sleep
from urllib.parse import urlencode
from urllib.request import urlopen

import pandas as pd
from defusedxml.ElementTree import fromstring
from geopandas import GeoDataFrame
from pandas import DataFrame
from pandas import concat
from pandas import to_datetime
from shapely.geometry import LineString
from shapely.geometry import Point
from six import string_types

from dave_data.core import Data
from dave_data.core import MetaData
from dave_data.settings import dave_data_settings


def osm_request(data_type, area):
    """
    This function requests OSM data from OSM

    Examples
    --------
    >>> from shapely import box
    >>> streets = osm_request("road", box(13.409, 52.519, 13.41, 52.52))
    >>> len(streets.data) > 0
    True

    """
    data_param = dave_data_settings["osm_tags"][data_type]
    request_data = GeoDataFrame([])
    meta_data = None
    data = GeoDataFrame
    for osm_type in data_param[2]:
        # create tags
        tags = (
            f'{data_param[0]}~"{"|".join(data_param[1])}"'
            if isinstance(data_param[1], list)
            else f"{data_param[0]}"
        )
        # get data from OSM directly via API query
        data, meta_data = query_osm(osm_type, area, recurse="down", tags=tags)
        request_data = concat([request_data, data], ignore_index=True)
    meta = MetaData(
        source_license="ODBL", source_date=None, organisation="OpenStreetMap"
    )
    return Data(
        name="OSM roads filtered",
        description="Some description",
        data=data,
        meta=meta,
        polygon=area,
        tags=["roads", "osm"],
    )


# --- request directly from OSM via Overpass API and geopandas_osm package

# This functions are based on the geopandas_osm python package, which was
# published under the # following license:

# The MIT License (MIT)

# Copyright (c) 2014 Jacob Wasserman

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


OSMData = namedtuple(
    "OSMData", ("nodes", "waynodes", "waytags", "relmembers", "reltags")
)
_crs = "epsg:4326"

# Tags to remove so we don't clobber the output. This list comes from
# osmtogeojson's index.js (https://github.com/tyrasd/osmtogeojson)
uninteresting_tags = {
    "source",
    "source_ref",
    "source:ref",
    "history",
    "attribution",
    "created_by",
    "tiger:county",
    "tiger:tlid",
    "tiger:upload_uuid",
}


# http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
def query_osm(
    typ, bbox=None, recurse=None, tags="", raw=False, meta=False, **kwargs
):
    """
    Query the Overpass API to obtain OpenStreetMap data.

    See also:
    http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide

    The OSM XML data is parsed into an intermediate set of DataFrames.
    By passing in 'render=False', this will return these DataFrames stored
    as the OSMData namedtuple. If render is True, then the DataFrames
    are built into their corresponding geometries.

    Parameters
    ----------
    typ : {'node', 'way', 'relation'}
        The type of OSM data to query
    bbox : (min lon, min lat, max lon, max lat) bounding box
        Optional bounding box to restrict the query. Unless the query
        is extremely restricted, you usually want to specify this.
        It can be retrieved from GeoPandas objects as 'df.total_bounds' or
        from Shapely objects as 'geom.bounds'
    recurse : {'up, 'down', 'uprel', 'downrel'}
        This is used to get more data than the original query. If 'typ' is
        'way', you'll usually want this set to 'down' which grabs all nodes
        of the matching ways
    tags : string or list of query strings
        See also the OverpassQL (referenced above) for more tag options
        Examples:
            tags='highway'
                Matches objects with a 'highway' tag
            tags='highway=motorway' <-- Matches ob
                Matches objects where the 'highway' tag is 'motorway'
            tags='name~[Mm]agazine'
                Match if the 'name' tag matches the regular expression

            Specify a list of tag requests to match all of them
            tags=['highway', 'name~"^Magazine"']
                Match tags that have 'highway' and where 'name' starts
                with 'Magazine'
    raw : boolean, default False
        Return the raw XML data returned by the request
    meta : boolean, default False
        Indicates whether to query the metadata with each OSM object. This
        includes the changeset, timestamp, uid, user, and version.

    Returns
    -------
    df - GeoDataFrame
    Note that there's probably a bit more filtering required to get the
    exact desired data. For example if you only want ways, you may want
    to grab only the linestrings like:

    Examples
    --------
    >>> #  df = df[df.type == 'LineString']

    """
    url = _build_url(typ, bbox, recurse, tags, meta)
    # add time delay because osm doesn't alowed more than 1 request per second.
    time_delay = dave_data_settings["osm_time_delay"]

    # TODO: Raise on non-200 (or 400-599)
    # with urlopen(url) as response:
    #     content = response.read()
    while 1:
        try:
            if not url.startswith(("http:", "https:")):
                raise ValueError("URL must start with 'http:' or 'https:'")

            with urlopen(url) as response:  # noqa: S310
                content = response.read()
                if response.getcode() == 200:
                    break
        except Exception as inst:
            print(f'\n Retry OSM query because of "{inst}"')
            # add time delay
            sleep(time_delay)

    # get meta informations
    meta_data = pd.Series({"meta": "coming soon"})

    if raw:
        return content, meta_data
    return read_osm(content, **kwargs), meta_data


def _build_url(typ, bbox=None, recurse=None, tags="", meta=False):
    recurse_map = {
        "up": "<",
        "uprel": "<<",
        "down": ">",
        "downrel": ">>",
    }
    if recurse is None:
        recursestr = ""
    else:
        try:
            recursestr = recurse_map[recurse]
        except KeyError as k_exception:
            raise ValueError(
                "Unrecognized recurse value '{}'. "
                "Must be one of: {}.".format(
                    recurse, ", ".join(recurse_map.keys())
                )
            ) from k_exception

    # Allow tags to be a single string
    if isinstance(tags, string_types) and tags:
        tags = [tags]
    queries = "".join(f"[{t}]" for t in tags)

    # Overpass QL takes the bounding box as
    # (min latitude, min longitude, max latitude, max longitude)
    if bbox is None:
        bboxstr = ""
    else:
        bboxstr = '(poly:"{}")'.format(
            " ".join(f"{c[1]} {c[0]}" for c in bbox.exterior.coords)
        )

    metastr = "meta" if meta else ""

    query = f"({typ}{bboxstr}{queries};{recursestr};);out {metastr};"

    url = "".join(
        [
            "http://www.overpass-api.de/api/interpreter?",
            urlencode({"data": query}),
        ]
    )

    return url


def read_osm(content, render=True, **kwargs):
    """
    Parse OSM XML data and store as several DataFrames. Optionally "render"
    the DataFrames to GeoDataFrames.

    """
    doc = fromstring(content)

    nodes = read_nodes(doc)
    waynodes, waytags = read_ways(doc)
    relmembers, reltags = read_relations(doc)

    # check if all requested variables are empty
    # if nodes.empty and waynodes.empty and waytags.empty and relmembers.empty
    # and reltags.empty:

    data = OSMData(nodes, waynodes, waytags, relmembers, reltags)

    if render:
        data = render_to_gdf(data, **kwargs)
    return data


def read_nodes(doc):
    #   Example:
    #   <node id="1705717514" lat="42.3630798" lon="-71.0997601">
    #       <tag k="crossing" v="zebra"/>
    #       <tag k="highway" v="crossing"/>
    #       <tag k="source" v="Bing"/>
    #   </node>
    nodes = [_element_to_dict(xmlnode) for xmlnode in doc.findall("node")]
    nodes = _dict_to_dataframe(nodes)
    if not nodes.empty:
        nodes["lon"] = nodes["lon"].astype(float)
        nodes["lat"] = nodes["lat"].astype(float)

    return nodes


def _element_to_dict(element):
    d = element.attrib.copy()
    for t in element.findall("tag"):
        k = t.attrib["k"]
        if k not in uninteresting_tags:
            d[k] = t.attrib["v"]

    return d


def _dict_to_dataframe(d):
    df = DataFrame.from_dict(d)
    if "timestamp" in df:
        df["timestamp"] = to_datetime(df["timestamp"])

    return df


def read_ways(doc):
    #   Example:
    #   <way id="8614593">
    #       <nd ref="61326730"/>
    #       <nd ref="61326036"/>
    #       <nd ref="61321194"/>
    #       <tag k="attribution" v="Office of Geographic and Environmental
    #           Information (MassGIS)"/>
    #       <tag k="condition" v="fair"/>
    #       <tag k="created_by" v="JOSM"/>
    #       <tag k="highway" v="residential"/>
    #       <tag k="lanes" v="2"/>
    #       <tag k="massgis:way_id" v="171099"/>
    #       <tag k="name" v="Centre Street"/>
    #       <tag k="source" v="massgis_import_v0.1_20071008165629"/>
    #       <tag k="width" v="13.4"/>
    #   </way>
    waytags = []
    waynodes = []
    for xmlway in doc.findall("way"):
        wayid = xmlway.attrib["id"]
        for i, xmlnd in enumerate(xmlway.findall("nd")):
            d = xmlnd.attrib.copy()
            d["id"] = wayid
            d["index"] = i
            waynodes.append(d)

        tags = _element_to_dict(xmlway)
        waytags.append(tags)

    waynodes = _dict_to_dataframe(waynodes)
    waytags = _dict_to_dataframe(waytags)

    return waynodes, waytags


def read_relations(doc):
    # Example:
    #   <relation id="1933745">
    #     <member type="way" ref="134055159" role="outer"/>
    #     <member type="way" ref="260533047" role="outer"/>
    #     <member type="way" ref="142867799" role="outer"/>
    #     <member type="way" ref="134063352" role="outer"/>
    #     <member type="way" ref="142803038" role="outer"/>
    #     <member type="way" ref="134056144" role="outer"/>
    #     <member type="way" ref="134056141" role="outer"/>
    #     <tag k="admin_level" v="8"/>
    #     <tag k="boundary" v="administrative"/>
    #     <tag k="name" v="Cambridge"/>
    #     <tag k="type" v="boundary"/>
    #     <tag k="wikipedia" v="en:Cambridge, Massachusetts"/>
    #   </relation>
    reltags = []
    relmembers = []
    for xmlrel in doc.findall("relation"):
        relid = xmlrel.attrib["id"]
        for i, xmlmember in enumerate(xmlrel.findall("member")):
            d = xmlmember.attrib.copy()
            d["id"] = relid
            d["index"] = i
            relmembers.append(d)

        tags = _element_to_dict(xmlrel)
        reltags.append(tags)

    relmembers = _dict_to_dataframe(relmembers)
    reltags = _dict_to_dataframe(reltags)
    return relmembers, reltags


def render_to_gdf(osmdata, drop_untagged=True):
    nodes = render_nodes(osmdata.nodes, drop_untagged)
    ways = render_ways(osmdata.nodes, osmdata.waynodes, osmdata.waytags)

    # set landuse tag from origin relation at relation members who has no
    # landuse tag
    if (
        (ways is not None)
        and ("landuse" in ways.keys())
        and (not osmdata.relmembers.empty)
    ):
        for i, way in ways.iterrows():
            # get and add origin relation id
            rel_id = (
                osmdata.relmembers[osmdata.relmembers.ref == way.id].iloc[0].id
            )
            ways.at[i, "relation_id"] = rel_id
            # get and add origin relation landuse if needed
            osm_reltag = osmdata.reltags[osmdata.reltags.id == rel_id].iloc[0]
            if "landuse" in osm_reltag.keys() and str(way.landuse) == "nan":
                ways.at[i, "landuse"] = osm_reltag.landuse

    if ways is not None:
        nodes = concat([nodes, ways], ignore_index=True)
        nodes = nodes.set_geometry("geometry", crs=_crs)

    return nodes


def render_nodes(nodes, drop_untagged=True):
    # check if their are nodes
    if not nodes.empty:
        # Drop nodes that have no tags, convert lon/lat to points
        if drop_untagged:
            nodes = nodes.dropna(
                subset=nodes.columns.drop(["id", "lon", "lat"]), how="all"
            )
        points = [Point(x["lon"], x["lat"]) for i, x in nodes.iterrows()]
        nodes = nodes.drop(["lon", "lat"], axis=1)
        nodes = nodes.set_geometry(points, crs=_crs)

    return nodes


def render_ways(nodes, waynodes, waytags):
    if waynodes is None or waynodes.empty:
        return None

    node_points = nodes[["id", "lon", "lat"]]

    def wayline(df):
        df = df.sort_values(by="index")[["lon", "lat"]]
        if len(df) > 1:
            return LineString(df.values)

    # Group the ways and create a LineString for each one.  way_lines is a
    # Series where the index is the way id and the value is the LineString.
    # Merge it with the waytags to get a single GeoDataFrame of ways
    waynodes = waynodes.merge(
        node_points, left_on="ref", right_on="id", suffixes=("", "_nodes")
    )
    way_lines = waynodes.groupby("id", group_keys=False).apply(
        wayline, include_groups=False
    )
    ways = waytags.set_index("id").set_geometry(way_lines, crs=_crs)
    ways.reset_index(inplace=True)

    return ways


if __name__ == "__main__":
    pass

1	from collections import namedtuple	5✔
2	from time import sleep	5✔
3	from urllib.parse import urlencode	5✔
4	from urllib.request import urlopen	5✔
5
6	import pandas as pd	5✔
7	from defusedxml.ElementTree import fromstring	5✔
8	from geopandas import GeoDataFrame	5✔
9	from pandas import DataFrame	5✔
10	from pandas import concat	5✔
11	from pandas import to_datetime	5✔
12	from shapely.geometry import LineString	5✔
13	from shapely.geometry import Point	5✔
14	from six import string_types	5✔
15
16	from dave_data.core import Data	5✔
17	from dave_data.core import MetaData	5✔
18	from dave_data.settings import dave_data_settings	5✔
19
20
21	def osm_request(data_type, area):	5✔
22	"""
23	This function requests OSM data from OSM
24
25	Examples
26	--------
27	>>> from shapely import box
28	>>> streets = osm_request("road", box(13.409, 52.519, 13.41, 52.52))
29	>>> len(streets.data) > 0
30	True
31
32	"""
33	data_param = dave_data_settings["osm_tags"][data_type]	5✔
34	request_data = GeoDataFrame([])	5✔
35	meta_data = None	5✔
36	data = GeoDataFrame	5✔
37	for osm_type in data_param[2]:	5✔
38	# create tags
39	tags = (	5✔
40	f'{data_param[0]}~"{"\|".join(data_param[1])}"'
41	if isinstance(data_param[1], list)
42	else f"{data_param[0]}"
43	)
44	# get data from OSM directly via API query
45	data, meta_data = query_osm(osm_type, area, recurse="down", tags=tags)	5✔
46	request_data = concat([request_data, data], ignore_index=True)	5✔
47	meta = MetaData(	5✔
48	source_license="ODBL", source_date=None, organisation="OpenStreetMap"
49	)
50	return Data(	5✔
51	name="OSM roads filtered",
52	description="Some description",
53	data=data,
54	meta=meta,
55	polygon=area,
56	tags=["roads", "osm"],
57	)
58
59
60	# --- request directly from OSM via Overpass API and geopandas_osm package
61
62	# This functions are based on the geopandas_osm python package, which was
63	# published under the # following license:
64
65	# The MIT License (MIT)
66
67	# Copyright (c) 2014 Jacob Wasserman
68
69	# Permission is hereby granted, free of charge, to any person obtaining a copy
70	# of this software and associated documentation files (the "Software"), to deal
71	# in the Software without restriction, including without limitation the rights
72	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
73	# copies of the Software, and to permit persons to whom the Software is
74	# furnished to do so, subject to the following conditions:
75
76	# The above copyright notice and this permission notice shall be included in
77	# all copies or substantial portions of the Software.
78
79	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
80	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
81	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
82	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
83	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
84	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
85	# SOFTWARE.
86
87
88	OSMData = namedtuple(	5✔
89	"OSMData", ("nodes", "waynodes", "waytags", "relmembers", "reltags")
90	)
91	_crs = "epsg:4326"	5✔
92
93	# Tags to remove so we don't clobber the output. This list comes from
94	# osmtogeojson's index.js (https://github.com/tyrasd/osmtogeojson)
95	uninteresting_tags = {	5✔
96	"source",
97	"source_ref",
98	"source:ref",
99	"history",
100	"attribution",
101	"created_by",
102	"tiger:county",
103	"tiger:tlid",
104	"tiger:upload_uuid",
105	}
106
107
108	# http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
109	def query_osm(	5✔
110	typ, bbox=None, recurse=None, tags="", raw=False, meta=False, **kwargs
111	):
112	"""
113	Query the Overpass API to obtain OpenStreetMap data.
114
115	See also:
116	http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
117
118	The OSM XML data is parsed into an intermediate set of DataFrames.
119	By passing in 'render=False', this will return these DataFrames stored
120	as the OSMData namedtuple. If render is True, then the DataFrames
121	are built into their corresponding geometries.
122
123	Parameters
124	----------
125	typ : {'node', 'way', 'relation'}
126	The type of OSM data to query
127	bbox : (min lon, min lat, max lon, max lat) bounding box
128	Optional bounding box to restrict the query. Unless the query
129	is extremely restricted, you usually want to specify this.
130	It can be retrieved from GeoPandas objects as 'df.total_bounds' or
131	from Shapely objects as 'geom.bounds'
132	recurse : {'up, 'down', 'uprel', 'downrel'}
133	This is used to get more data than the original query. If 'typ' is
134	'way', you'll usually want this set to 'down' which grabs all nodes
135	of the matching ways
136	tags : string or list of query strings
137	See also the OverpassQL (referenced above) for more tag options
138	Examples:
139	tags='highway'
140	Matches objects with a 'highway' tag
141	tags='highway=motorway' <-- Matches ob
142	Matches objects where the 'highway' tag is 'motorway'
143	tags='name~[Mm]agazine'
144	Match if the 'name' tag matches the regular expression
145
146	Specify a list of tag requests to match all of them
147	tags=['highway', 'name~"^Magazine"']
148	Match tags that have 'highway' and where 'name' starts
149	with 'Magazine'
150	raw : boolean, default False
151	Return the raw XML data returned by the request
152	meta : boolean, default False
153	Indicates whether to query the metadata with each OSM object. This
154	includes the changeset, timestamp, uid, user, and version.
155
156	Returns
157	-------
158	df - GeoDataFrame
159	Note that there's probably a bit more filtering required to get the
160	exact desired data. For example if you only want ways, you may want
161	to grab only the linestrings like:
162
163	Examples
164	--------
165	>>> # df = df[df.type == 'LineString']
166
167	"""
168	url = _build_url(typ, bbox, recurse, tags, meta)	5✔
169	# add time delay because osm doesn't alowed more than 1 request per second.
170	time_delay = dave_data_settings["osm_time_delay"]	5✔
171
172	# TODO: Raise on non-200 (or 400-599)
173	# with urlopen(url) as response:
174	# content = response.read()
175	while 1:	5✔
176	try:	5✔
177	if not url.startswith(("http:", "https:")):	5✔
178	raise ValueError("URL must start with 'http:' or 'https:'")	×
179
180	with urlopen(url) as response: # noqa: S310	5✔
181	content = response.read()	5✔
182	if response.getcode() == 200:	5✔
183	break	5✔
184	except Exception as inst:	×
185	print(f'\n Retry OSM query because of "{inst}"')	×
186	# add time delay
187	sleep(time_delay)	×
188
189	# get meta informations
190	meta_data = pd.Series({"meta": "coming soon"})	5✔
191
192	if raw:	5✔
193	return content, meta_data	×
194	return read_osm(content, **kwargs), meta_data	5✔
195
196
197	def _build_url(typ, bbox=None, recurse=None, tags="", meta=False):	5✔
198	recurse_map = {	5✔
199	"up": "<",
200	"uprel": "<<",
201	"down": ">",
202	"downrel": ">>",
203	}
204	if recurse is None:	5✔
205	recursestr = ""	×
206	else:
207	try:	5✔
208	recursestr = recurse_map[recurse]	5✔
209	except KeyError as k_exception:	×
210	raise ValueError(	×
211	"Unrecognized recurse value '{}'. "
212	"Must be one of: {}.".format(
213	recurse, ", ".join(recurse_map.keys())
214	)
215	) from k_exception
216
217	# Allow tags to be a single string
218	if isinstance(tags, string_types) and tags:	5✔
219	tags = [tags]	5✔
220	queries = "".join(f"[{t}]" for t in tags)	5✔
221
222	# Overpass QL takes the bounding box as
223	# (min latitude, min longitude, max latitude, max longitude)
224	if bbox is None:	5✔
225	bboxstr = ""	×
226	else:
227	bboxstr = '(poly:"{}")'.format(	5✔
228	" ".join(f"{c[1]} {c[0]}" for c in bbox.exterior.coords)
229	)
230
231	metastr = "meta" if meta else ""	5✔
232
233	query = f"({typ}{bboxstr}{queries};{recursestr};);out {metastr};"	5✔
234
235	url = "".join(	5✔
236	[
237	"http://www.overpass-api.de/api/interpreter?",
238	urlencode({"data": query}),
239	]
240	)
241
242	return url	5✔
243
244
245	def read_osm(content, render=True, **kwargs):	5✔
246	"""
247	Parse OSM XML data and store as several DataFrames. Optionally "render"
248	the DataFrames to GeoDataFrames.
249
250	"""
251	doc = fromstring(content)	5✔
252
253	nodes = read_nodes(doc)	5✔
254	waynodes, waytags = read_ways(doc)	5✔
255	relmembers, reltags = read_relations(doc)	5✔
256
257	# check if all requested variables are empty
258	# if nodes.empty and waynodes.empty and waytags.empty and relmembers.empty
259	# and reltags.empty:
260
261	data = OSMData(nodes, waynodes, waytags, relmembers, reltags)	5✔
262
263	if render:	5✔
264	data = render_to_gdf(data, **kwargs)	5✔
265	return data	5✔
266
267
268	def read_nodes(doc):	5✔
269	# Example:
270	# <node id="1705717514" lat="42.3630798" lon="-71.0997601">
271	# <tag k="crossing" v="zebra"/>
272	# <tag k="highway" v="crossing"/>
273	# <tag k="source" v="Bing"/>
274	# </node>
275	nodes = [_element_to_dict(xmlnode) for xmlnode in doc.findall("node")]	5✔
276	nodes = _dict_to_dataframe(nodes)	5✔
277	if not nodes.empty:	5✔
278	nodes["lon"] = nodes["lon"].astype(float)	5✔
279	nodes["lat"] = nodes["lat"].astype(float)	5✔
280
281	return nodes	5✔
282
283
284	def _element_to_dict(element):	5✔
285	d = element.attrib.copy()	5✔
286	for t in element.findall("tag"):	5✔
287	k = t.attrib["k"]	5✔
288	if k not in uninteresting_tags:	5✔
289	d[k] = t.attrib["v"]	5✔
290
291	return d	5✔
292
293
294	def _dict_to_dataframe(d):	5✔
295	df = DataFrame.from_dict(d)	5✔
296	if "timestamp" in df:	5✔
297	df["timestamp"] = to_datetime(df["timestamp"])	×
298
299	return df	5✔
300
301
302	def read_ways(doc):	5✔
303	# Example:
304	# <way id="8614593">
305	# <nd ref="61326730"/>
306	# <nd ref="61326036"/>
307	# <nd ref="61321194"/>
308	# <tag k="attribution" v="Office of Geographic and Environmental
309	# Information (MassGIS)"/>
310	# <tag k="condition" v="fair"/>
311	# <tag k="created_by" v="JOSM"/>
312	# <tag k="highway" v="residential"/>
313	# <tag k="lanes" v="2"/>
314	# <tag k="massgis:way_id" v="171099"/>
315	# <tag k="name" v="Centre Street"/>
316	# <tag k="source" v="massgis_import_v0.1_20071008165629"/>
317	# <tag k="width" v="13.4"/>
318	# </way>
319	waytags = []	5✔
320	waynodes = []	5✔
321	for xmlway in doc.findall("way"):	5✔
322	wayid = xmlway.attrib["id"]	5✔
323	for i, xmlnd in enumerate(xmlway.findall("nd")):	5✔
324	d = xmlnd.attrib.copy()	5✔
325	d["id"] = wayid	5✔
326	d["index"] = i	5✔
327	waynodes.append(d)	5✔
328
329	tags = _element_to_dict(xmlway)	5✔
330	waytags.append(tags)	5✔
331
332	waynodes = _dict_to_dataframe(waynodes)	5✔
333	waytags = _dict_to_dataframe(waytags)	5✔
334
335	return waynodes, waytags	5✔
336
337
338	def read_relations(doc):	5✔
339	# Example:
340	# <relation id="1933745">
341	# <member type="way" ref="134055159" role="outer"/>
342	# <member type="way" ref="260533047" role="outer"/>
343	# <member type="way" ref="142867799" role="outer"/>
344	# <member type="way" ref="134063352" role="outer"/>
345	# <member type="way" ref="142803038" role="outer"/>
346	# <member type="way" ref="134056144" role="outer"/>
347	# <member type="way" ref="134056141" role="outer"/>
348	# <tag k="admin_level" v="8"/>
349	# <tag k="boundary" v="administrative"/>
350	# <tag k="name" v="Cambridge"/>
351	# <tag k="type" v="boundary"/>
352	# <tag k="wikipedia" v="en:Cambridge, Massachusetts"/>
353	# </relation>
354	reltags = []	5✔
355	relmembers = []	5✔
356	for xmlrel in doc.findall("relation"):	5✔
357	relid = xmlrel.attrib["id"]	×
358	for i, xmlmember in enumerate(xmlrel.findall("member")):	×
359	d = xmlmember.attrib.copy()	×
360	d["id"] = relid	×
361	d["index"] = i	×
362	relmembers.append(d)	×
363
364	tags = _element_to_dict(xmlrel)	×
365	reltags.append(tags)	×
366
367	relmembers = _dict_to_dataframe(relmembers)	5✔
368	reltags = _dict_to_dataframe(reltags)	5✔
369	return relmembers, reltags	5✔
370
371
372	def render_to_gdf(osmdata, drop_untagged=True):	5✔
373	nodes = render_nodes(osmdata.nodes, drop_untagged)	5✔
374	ways = render_ways(osmdata.nodes, osmdata.waynodes, osmdata.waytags)	5✔
375
376	# set landuse tag from origin relation at relation members who has no
377	# landuse tag
378	if (	5✔
379	(ways is not None)
380	and ("landuse" in ways.keys())
381	and (not osmdata.relmembers.empty)
382	):
383	for i, way in ways.iterrows():	×
384	# get and add origin relation id
385	rel_id = (	×
386	osmdata.relmembers[osmdata.relmembers.ref == way.id].iloc[0].id
387	)
388	ways.at[i, "relation_id"] = rel_id	×
389	# get and add origin relation landuse if needed
390	osm_reltag = osmdata.reltags[osmdata.reltags.id == rel_id].iloc[0]	×
391	if "landuse" in osm_reltag.keys() and str(way.landuse) == "nan":	×
392	ways.at[i, "landuse"] = osm_reltag.landuse	×
393
394	if ways is not None:	5✔
395	nodes = concat([nodes, ways], ignore_index=True)	5✔
396	nodes = nodes.set_geometry("geometry", crs=_crs)	5✔
397
398	return nodes	5✔
399
400
401	def render_nodes(nodes, drop_untagged=True):	5✔
402	# check if their are nodes
403	if not nodes.empty:	5✔
404	# Drop nodes that have no tags, convert lon/lat to points
405	if drop_untagged:	5✔
406	nodes = nodes.dropna(	5✔
407	subset=nodes.columns.drop(["id", "lon", "lat"]), how="all"
408	)
409	points = [Point(x["lon"], x["lat"]) for i, x in nodes.iterrows()]	5✔
410	nodes = nodes.drop(["lon", "lat"], axis=1)	5✔
411	nodes = nodes.set_geometry(points, crs=_crs)	5✔
412
413	return nodes	5✔
414
415
416	def render_ways(nodes, waynodes, waytags):	5✔
417	if waynodes is None or waynodes.empty:	5✔
418	return None	×
419
420	node_points = nodes[["id", "lon", "lat"]]	5✔
421
422	def wayline(df):	5✔
423	df = df.sort_values(by="index")[["lon", "lat"]]	5✔
424	if len(df) > 1:	5✔
425	return LineString(df.values)	5✔
426
427	# Group the ways and create a LineString for each one. way_lines is a
428	# Series where the index is the way id and the value is the LineString.
429	# Merge it with the waytags to get a single GeoDataFrame of ways
430	waynodes = waynodes.merge(	5✔
431	node_points, left_on="ref", right_on="id", suffixes=("", "_nodes")
432	)
433	way_lines = waynodes.groupby("id", group_keys=False).apply(	5✔
434	wayline, include_groups=False
435	)
436	ways = waytags.set_index("id").set_geometry(way_lines, crs=_crs)	5✔
437	ways.reset_index(inplace=True)	5✔
438
439	return ways	5✔
440
441
442	if __name__ == "__main__":	5✔
443	pass	×

DaveFoss / DAVE_data / 10622329495

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous