• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

johntruckenbrodt / pyroSAR / 16620069336

29 Jul 2025 11:31AM UTC coverage: 51.339% (-0.03%) from 51.368%
16620069336

push

github

johntruckenbrodt
[drivers.ESA.scanMetadata] call function with <key>=<value> declaration

3718 of 7242 relevant lines covered (51.34%)

0.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

66.35
/pyroSAR/drivers.py
1
###############################################################################
2
# Reading and Organizing system for SAR images
3
# Copyright (c) 2016-2025, the pyroSAR Developers.
4

5
# This file is part of the pyroSAR Project. It is subject to the
6
# license terms in the LICENSE.txt file found in the top-level
7
# directory of this distribution and at
8
# https://github.com/johntruckenbrodt/pyroSAR/blob/master/LICENSE.txt.
9
# No part of the pyroSAR project, including this file, may be
10
# copied, modified, propagated, or distributed except according
11
# to the terms contained in the LICENSE.txt file.
12
###############################################################################
13
"""
14
This is the core module of package pyroSAR.
15
It contains the drivers for the different SAR image formats and offers
16
functionality for retrieving metadata, unpacking images, downloading ancillary files like DEMs and
17
Orbit State Vector files as well as archiving scenes in a database.
18
The :class:`ID` class and its subclasses allow easy and standardized access to the metadata of
19
images from different SAR sensors.
20
"""
21

22
import sys
1✔
23
import gc
1✔
24

25
from builtins import str
1✔
26
from io import BytesIO
1✔
27

28
import abc
1✔
29
import ast
1✔
30
import csv
1✔
31
import inspect
1✔
32
import math
1✔
33
import os
1✔
34
import re
1✔
35
import shutil
1✔
36
import struct
1✔
37
import operator
1✔
38
import tarfile as tf
1✔
39
import xml.etree.ElementTree as ET
1✔
40
import zipfile as zf
1✔
41
from datetime import datetime, timezone
1✔
42
from dateutil.parser import parse as dateparse
1✔
43
from time import strptime, strftime
1✔
44
from statistics import median
1✔
45
from itertools import groupby
1✔
46
from PIL import Image
1✔
47

48
import progressbar as pb
1✔
49
from osgeo import gdal, osr, ogr
1✔
50
from osgeo.gdalconst import GA_ReadOnly
1✔
51

52
from . import S1, patterns
1✔
53
from .config import __LOCAL__
1✔
54
from .ERS import passdb_query, get_angles_resolution
1✔
55
from .xml_util import getNamespaces
1✔
56

57
from spatialist import crsConvert, sqlite3, Vector, bbox
1✔
58
from spatialist.ancillary import parse_literal, finder, multicore
1✔
59

60
from sqlalchemy import create_engine, Table, MetaData, Column, Integer, String, exc
1✔
61
from sqlalchemy import inspect as sql_inspect
1✔
62
from sqlalchemy.event import listen
1✔
63
from sqlalchemy.orm import sessionmaker
1✔
64
from sqlalchemy.sql import select, func
1✔
65
from sqlalchemy.engine.url import URL
1✔
66
from sqlalchemy.ext.automap import automap_base
1✔
67
from sqlalchemy_utils import database_exists, create_database, drop_database
1✔
68
from geoalchemy2 import Geometry
1✔
69
import socket
1✔
70
import time
1✔
71
import platform
1✔
72
import logging
1✔
73

74
log = logging.getLogger(__name__)
1✔
75

76

77
def identify(scene):
1✔
78
    """
79
    identify a SAR scene and return the appropriate metadata handler object
80

81
    Parameters
82
    ----------
83
    scene: str
84
        a file or directory name
85

86
    Returns
87
    -------
88
    pyroSAR.drivers.ID
89
        a pyroSAR metadata handler
90
    
91
    Examples
92
    --------
93

94
    >>> from pyroSAR import identify
95
    >>> filename = 'S1A_IW_GRDH_1SDV_20180829T170656_20180829T170721_023464_028DE0_F7BD.zip'
96
    >>> scene = identify(filename)
97
    >>> print(scene)
98
    pyroSAR ID object of type SAFE
99
    acquisition_mode: IW
100
    cycleNumber: 148
101
    frameNumber: 167392
102
    lines: 16703
103
    orbit: A
104
    orbitNumber_abs: 23464
105
    orbitNumber_rel: 117
106
    polarizations: ['VV', 'VH']
107
    product: GRD
108
    projection: +proj=longlat +datum=WGS84 +no_defs
109
    samples: 26056
110
    sensor: S1A
111
    spacing: (10.0, 10.0)
112
    start: 20180829T170656
113
    stop: 20180829T170721
114
    """
115
    if not os.path.exists(scene):
1✔
116
        raise OSError("No such file or directory: '{}'".format(scene))
1✔
117
    
118
    def get_subclasses(c):
1✔
119
        subclasses = c.__subclasses__()
1✔
120
        for subclass in subclasses.copy():
1✔
121
            subclasses.extend(get_subclasses(subclass))
1✔
122
        return list(set(subclasses))
1✔
123
    
124
    for handler in get_subclasses(ID):
1✔
125
        try:
1✔
126
            return handler(scene)
1✔
127
        except Exception:
1✔
128
            pass
1✔
129
    raise RuntimeError('data format not supported')
1✔
130

131

132
def identify_many(scenes, pbar=False, sortkey=None, cores=1):
1✔
133
    """
134
    wrapper function for returning metadata handlers of all valid scenes in a list,
135
    similar to function :func:`~pyroSAR.drivers.identify`.
136

137
    Parameters
138
    ----------
139
    scenes: list[str or ID]
140
        the file names of the scenes to be identified
141
    pbar: bool
142
        adds a progressbar if True
143
    sortkey: str or None
144
        sort the handler object list by an attribute
145
    cores: int
146
        the number of cores to parallelize identification
147
    
148
    Returns
149
    -------
150
    list[ID]
151
        a list of pyroSAR metadata handlers
152
    
153
    Examples
154
    --------
155
    >>> from pyroSAR import identify_many
156
    >>> files = finder('/path', ['S1*.zip'])
157
    >>> ids = identify_many(files, pbar=False, sortkey='start')
158
    """
159
    
160
    def handler(scene):
1✔
161
        if isinstance(scene, ID):
1✔
162
            return scene
1✔
163
        else:
164
            try:
1✔
165
                id = identify(scene)
1✔
166
                return id
1✔
167
            except RuntimeError:
1✔
168
                return None
1✔
169
            except PermissionError:
1✔
170
                log.warning("Permission denied: '{}'".format(scene))
×
171
    
172
    if cores == 1:
1✔
173
        idlist = []
1✔
174
        if pbar:
1✔
175
            progress = pb.ProgressBar(max_value=len(scenes)).start()
×
176
        else:
177
            progress = None
1✔
178
        for i, scene in enumerate(scenes):
1✔
179
            id = handler(scene)
1✔
180
            idlist.append(id)
1✔
181
            if progress is not None:
1✔
182
                progress.update(i + 1)
×
183
        if progress is not None:
1✔
184
            progress.finish()
×
185
    else:
186
        idlist = multicore(function=handler, multiargs={'scene': scenes},
×
187
                           pbar=pbar, cores=cores)
188
    if sortkey is not None:
1✔
189
        idlist.sort(key=operator.attrgetter(sortkey))
1✔
190
    idlist = list(filter(None, idlist))
1✔
191
    return idlist
1✔
192

193

194
def filter_processed(scenelist, outdir, recursive=False):
1✔
195
    """
196
    Filter a list of pyroSAR objects to those that have not yet been processed and stored in the defined directory.
197
    The search for processed scenes is either done in the directory only or recursively into subdirectories.
198
    The scenes must have been processed with pyroSAR in order to follow the right naming scheme.
199

200
    Parameters
201
    ----------
202
    scenelist: list[ID]
203
        a list of pyroSAR objects
204
    outdir: str
205
        the processing directory
206
    recursive: bool
207
        scan `outdir` recursively into subdirectories?
208

209
    Returns
210
    -------
211
    list[ID]
212
        a list of those scenes, which have not been processed yet
213
    """
214
    return [x for x in scenelist if not x.is_processed(outdir, recursive)]
1✔
215

216

217
class ID(object):
1✔
218
    """
219
    Abstract class for SAR meta data handlers
220
    """
221
    
222
    def __init__(self, metadict):
1✔
223
        """
224
        to be called by the __init__methods of the format drivers
225
        scans a metadata dictionary and registers entries with a standardized name as object attributes
226
        see __LOCAL__ for standard names. It must be ensured that each of these is actually read by the individual SAR format driver.
227

228
        :param metadict: a dictionary containing the metadata attributes of a SAR scene
229
        """
230
        self.locals = __LOCAL__
1✔
231
        for item in self.locals:
1✔
232
            setattr(self, item, metadict[item])
1✔
233
    
234
    def __getattr__(self, item):
1✔
235
        raise AttributeError("object has no attribute '{}'".format(item))
1✔
236
    
237
    def __str__(self):
1✔
238
        lines = ['pyroSAR ID object of type {}'.format(self.__class__.__name__)]
1✔
239
        for item in sorted(self.locals):
1✔
240
            value = getattr(self, item)
1✔
241
            if item == 'projection':
1✔
242
                value = crsConvert(value, 'proj4') if value is not None else None
1✔
243
            if value == -1:
1✔
244
                value = '<no global value per product>'
×
245
            line = '{0}: {1}'.format(item, value)
1✔
246
            lines.append(line)
1✔
247
        return '\n'.join(lines)
1✔
248
    
249
    def bbox(self, outname=None, driver=None, overwrite=True):
1✔
250
        """
251
        get the bounding box of a scene either as a vector object or written to a file
252

253
        Parameters
254
        ----------
255
        outname: str
256
            the name of the vector file to be written
257
        driver: str
258
            the output file format; needs to be defined if the format cannot
259
            be auto-detected from the filename extension
260
        overwrite: bool
261
            overwrite an existing vector file?
262

263
        Returns
264
        -------
265
        ~spatialist.vector.Vector or None
266
            the vector object if `outname` is None and None otherwise
267
        
268
        See Also
269
        --------
270
        spatialist.vector.Vector.bbox
271
        """
272
        if outname is None:
1✔
273
            return bbox(self.getCorners(), self.projection)
1✔
274
        else:
275
            bbox(self.getCorners(), self.projection, outname=outname, driver=driver,
1✔
276
                 overwrite=overwrite)
277
    
278
    def geometry(self, outname=None, driver=None, overwrite=True):
1✔
279
        """
280
        get the footprint geometry of a scene either as a vector object or written to a file
281

282
        Parameters
283
        ----------
284
        outname: str
285
            the name of the vector file to be written
286
        driver: str
287
            the output file format; needs to be defined if the format cannot
288
            be auto-detected from the filename extension
289
        overwrite: bool
290
            overwrite an existing vector file?
291

292
        Returns
293
        -------
294
        ~spatialist.vector.Vector or None
295
            the vector object if `outname` is None, None otherwise
296
        
297
        See also
298
        --------
299
        spatialist.vector.Vector.write
300
        """
301
        if 'coordinates' not in self.meta.keys():
1✔
302
            raise NotImplementedError
×
303
        srs = crsConvert(self.projection, 'osr')
1✔
304
        points = ogr.Geometry(ogr.wkbMultiPoint)
1✔
305
        for lon, lat in self.meta['coordinates']:
1✔
306
            point = ogr.Geometry(ogr.wkbPoint)
1✔
307
            point.AddPoint(lon, lat)
1✔
308
            points.AddGeometry(point)
1✔
309
        geom = points.ConvexHull()
1✔
310
        geom.FlattenTo2D()
1✔
311
        point = points = None
1✔
312
        exterior = geom.GetGeometryRef(0)
1✔
313
        if exterior.IsClockwise():
1✔
314
            points = list(exterior.GetPoints())
1✔
315
            exterior.Empty()
1✔
316
            for x, y in reversed(points):
1✔
317
                exterior.AddPoint(x, y)
1✔
318
            geom.CloseRings()
1✔
319
        exterior = points = None
1✔
320
        
321
        bbox = Vector(driver='Memory')
1✔
322
        bbox.addlayer('geometry', srs, geom.GetGeometryType())
1✔
323
        bbox.addfield('area', ogr.OFTReal)
1✔
324
        bbox.addfeature(geom, fields={'area': geom.Area()})
1✔
325
        geom = None
1✔
326
        if outname is None:
1✔
327
            return bbox
1✔
328
        else:
329
            bbox.write(outfile=outname, driver=driver, overwrite=overwrite)
×
330
    
331
    @property
1✔
332
    def compression(self):
1✔
333
        """
334
        check whether a scene is compressed into an tarfile or zipfile or not at all
335

336
        Returns
337
        -------
338
        str or None
339
            either 'zip', 'tar' or None
340
        """
341
        if os.path.isdir(self.scene):
1✔
342
            return None
1✔
343
        elif zf.is_zipfile(self.scene):
1✔
344
            return 'zip'
1✔
345
        elif tf.is_tarfile(self.scene):
1✔
346
            return 'tar'
1✔
347
        else:
348
            return None
×
349
    
350
    def export2dict(self):
1✔
351
        """
352
        Return the uuid and the metadata that is defined in `self.locals` as a dictionary
353
        """
354
        metadata = {item: self.meta[item] for item in self.locals}
1✔
355
        sq_file = os.path.basename(self.file)
1✔
356
        title = os.path.splitext(sq_file)[0]
1✔
357
        metadata['uuid'] = title
1✔
358
        return metadata
1✔
359
    
360
    def export2sqlite(self, dbfile):
1✔
361
        """
362
        Export relevant metadata to an SQLite database
363

364
        Parameters
365
        ----------
366
        dbfile: str
367
            the database file
368

369
        """
370
        with Archive(dbfile) as archive:
1✔
371
            archive.insert(self)
1✔
372
    
373
    def examine(self, include_folders=False):
1✔
374
        """
375
        check whether any items in the SAR scene structure (i.e. files/folders) match the regular expression pattern
376
        defined by the class. On success the item is registered in the object as attribute `file`.
377

378
        Parameters
379
        ----------
380
        include_folders: bool
381
            also match folder (or just files)?
382

383
        Returns
384
        -------
385

386
        Raises
387
        -------
388
        RuntimeError
389
        """
390
        files = self.findfiles(self.pattern, include_folders=include_folders)
1✔
391
        if len(files) == 1:
1✔
392
            self.file = files[0]
1✔
393
        elif len(files) == 0:
1✔
394
            raise RuntimeError('scene does not match {} naming convention'.format(type(self).__name__))
1✔
395
        else:
396
            raise RuntimeError('file ambiguity detected:\n{}'.format('\n'.join(files)))
1✔
397
    
398
    def findfiles(self, pattern, include_folders=False):
1✔
399
        """
400
        find files in the scene archive, which match a pattern.
401

402
        Parameters
403
        ----------
404
        pattern: str
405
            the regular expression to match
406
        include_folders: bool
407
             also match folders (or just files)?
408
        Returns
409
        -------
410
        list[str]
411
            the matched file names
412
        
413
        See Also
414
        --------
415
        :func:`spatialist.ancillary.finder`
416
        """
417
        foldermode = 1 if include_folders else 0
1✔
418
        
419
        try:
1✔
420
            files = finder(target=self.scene, matchlist=[pattern],
1✔
421
                           foldermode=foldermode, regex=True)
422
        except RuntimeError:
1✔
423
            # Return the scene if only a file and not zip
424
            return self.scene
1✔
425
        
426
        if os.path.isdir(self.scene) \
1✔
427
                and re.search(pattern, os.path.basename(self.scene)) \
428
                and include_folders:
429
            files.append(self.scene)
1✔
430
        
431
        return files
1✔
432
    
433
    def gdalinfo(self):
1✔
434
        """
435
        read metadata directly from the GDAL SAR image drivers
436

437
        Returns
438
        -------
439
        dict
440
            the metadata attributes
441
        """
442
        files = self.findfiles(r'(?:\.[NE][12]$|DAT_01\.001$|product\.xml|manifest\.safe$)')
1✔
443
        # If only one file return the file in array
444
        if isinstance(files, str):
1✔
445
            files = [files]
×
446
        
447
        if len(files) == 1:
1✔
448
            prefix = {'zip': '/vsizip/', 'tar': '/vsitar/', None: ''}[self.compression]
1✔
449
            header = files[0]
1✔
450
        elif len(files) > 1:
×
451
            raise RuntimeError('file ambiguity detected')
×
452
        else:
453
            raise RuntimeError('file type not supported')
×
454
        
455
        meta = {}
1✔
456
        
457
        ext_lookup = {'.N1': 'ASAR', '.E1': 'ERS1', '.E2': 'ERS2'}
1✔
458
        extension = os.path.splitext(header)[1]
1✔
459
        if extension in ext_lookup:
1✔
460
            meta['sensor'] = ext_lookup[extension]
×
461
            info = gdal.Info(prefix + header, options=gdal.InfoOptions(allMetadata=True, format='json'))
×
462
            meta['extra'] = info
×
463
        
464
        img = gdal.Open(prefix + header, GA_ReadOnly)
1✔
465
        gdalmeta = img.GetMetadata()
×
466
        meta['samples'], meta['lines'], meta['bands'] = img.RasterXSize, img.RasterYSize, img.RasterCount
×
467
        meta['projection'] = img.GetGCPProjection()
×
468
        meta['gcps'] = [((x.GCPPixel, x.GCPLine), (x.GCPX, x.GCPY, x.GCPZ)) for x in img.GetGCPs()]
×
469
        img = None
×
470
        
471
        for item in gdalmeta:
×
472
            entry = [item, parse_literal(gdalmeta[item].strip())]
×
473
            
474
            try:
×
475
                entry[1] = self.parse_date(str(entry[1]))
×
476
            except ValueError:
×
477
                pass
×
478
            
479
            if re.search('LAT|LONG', entry[0]):
×
480
                entry[1] /= 1000000.
×
481
            meta[entry[0]] = entry[1]
×
482
        return meta
×
483
    
484
    def getCorners(self):
1✔
485
        """
486
        Get the bounding box corner coordinates
487

488
        Returns
489
        -------
490
        dict
491
            the corner coordinates as a dictionary with keys `xmin`, `ymin`, `xmax`, `ymax`
492
        """
493
        if 'coordinates' not in self.meta.keys():
1✔
494
            raise NotImplementedError
×
495
        coordinates = self.meta['coordinates']
1✔
496
        lat = [x[1] for x in coordinates]
1✔
497
        lon = [x[0] for x in coordinates]
1✔
498
        return {'xmin': min(lon), 'xmax': max(lon), 'ymin': min(lat), 'ymax': max(lat)}
1✔
499
    
500
    def getFileObj(self, filename):
1✔
501
        """
502
        Load a file into a readable file object.
503

504
        Parameters
505
        ----------
506
        filename: str
507
            the name of a file in the scene archive, easiest to get with method :meth:`~ID.findfiles`
508

509
        Returns
510
        -------
511
        io.BytesIO
512
            a file pointer object
513
        """
514
        return getFileObj(self.scene, filename)
1✔
515
    
516
    def getGammaImages(self, directory=None):
1✔
517
        """
518
        list all files processed by GAMMA
519

520
        Parameters
521
        ----------
522
        directory: str or None
523
            the directory to be scanned; if left empty the object attribute `gammadir` is scanned
524

525
        Returns
526
        -------
527
        list[str]
528
            the file names of the images processed by GAMMA
529

530
        Raises
531
        -------
532
        RuntimeError
533
        """
534
        if directory is None:
1✔
535
            if hasattr(self, 'gammadir'):
1✔
536
                directory = self.gammadir
×
537
            else:
538
                raise RuntimeError(
1✔
539
                    'directory missing; please provide directory to function or define object attribute "gammadir"')
540
        return [x for x in finder(directory, [self.outname_base()], regex=True) if
1✔
541
                not re.search(r'\.(?:par|hdr|aux\.xml|swp|sh)$', x)]
542
    
543
    def getHGT(self):
1✔
544
        """
545
        get the names of all SRTM HGT tiles overlapping with the SAR scene
546

547
        Returns
548
        -------
549
        list[str]
550
            names of the SRTM HGT tiles
551
        """
552
        
553
        corners = self.getCorners()
1✔
554
        
555
        # generate sequence of integer coordinates marking the tie points of the overlapping hgt tiles
556
        lat = range(int(float(corners['ymin']) // 1), int(float(corners['ymax']) // 1) + 1)
1✔
557
        lon = range(int(float(corners['xmin']) // 1), int(float(corners['xmax']) // 1) + 1)
1✔
558
        
559
        # convert coordinates to string with leading zeros and hemisphere identification letter
560
        lat = [str(x).zfill(2 + len(str(x)) - len(str(x).strip('-'))) for x in lat]
1✔
561
        lat = [x.replace('-', 'S') if '-' in x else 'N' + x for x in lat]
1✔
562
        
563
        lon = [str(x).zfill(3 + len(str(x)) - len(str(x).strip('-'))) for x in lon]
1✔
564
        lon = [x.replace('-', 'W') if '-' in x else 'E' + x for x in lon]
1✔
565
        
566
        # concatenate all formatted latitudes and longitudes with each other as final product
567
        return [x + y + '.hgt' for x in lat for y in lon]
1✔
568
    
569
    def is_processed(self, outdir, recursive=False):
1✔
570
        """
571
        check whether a scene has already been processed and stored in the defined output directory
572
        (and subdirectories if scanned recursively)
573

574
        Parameters
575
        ----------
576
        outdir: str
577
            the directory to be checked
578

579
        Returns
580
        -------
581
        bool
582
            does an image matching the scene pattern exist?
583
        """
584
        if os.path.isdir(outdir):
1✔
585
            # '{}.*tif$'.format(self.outname_base())
586
            return len(finder(outdir, [self.outname_base()], regex=True, recursive=recursive)) != 0
1✔
587
        else:
588
            return False
×
589
    
590
    def outname_base(self, extensions=None):
1✔
591
        """
592
        parse a string containing basic information about the scene in standardized format.
593
        Currently, this id contains the sensor (4 digits), acquisition mode (4 digits), orbit (1 digit)
594
        and acquisition start time (15 digits)., e.g. `S1A__IW___A_20150523T122350`.
595
        
596
        Parameters
597
        ----------
598
        extensions: list[str]
599
            the names of additional parameters to append to the basename, e.g. ``['orbitNumber_rel']``
600
        Returns
601
        -------
602
        str
603
            a standardized name unique to the scene
604
            
605
        """
606
        
607
        fields = ('{:_<4}'.format(self.sensor),
1✔
608
                  '{:_<4}'.format(self.acquisition_mode),
609
                  self.orbit,
610
                  self.start)
611
        out = '_'.join(fields)
1✔
612
        if isinstance(extensions, list) and len(extensions) is not None:
1✔
613
            ext = '_'.join([str(getattr(self, key)) for key in extensions])
×
614
            out += '_' + ext
×
615
        return out
1✔
616
    
617
    @staticmethod
1✔
618
    def parse_date(x):
1✔
619
        """
620
        this function gathers known time formats provided in the different SAR products and converts them to a common
621
        standard of the form YYYYMMDDTHHMMSS.
622

623
        Parameters
624
        ----------
625
        x: str
626
            the time stamp
627

628
        Returns
629
        -------
630
        str
631
            the converted time stamp in format YYYYmmddTHHMMSS
632
        """
633
        return parse_date(x)
1✔
634
    
635
    @abc.abstractmethod
1✔
636
    def quicklook(self, outname, format='kmz'):
1✔
637
        """
638
        export a quick look image of the scene
639

640
        Parameters
641
        ----------
642
        outname: str
643
            the name of the output file
644
        format: str
645
            the format of the file to write;
646
            currently only kmz is supported
647

648
        Returns
649
        -------
650

651
        Examples
652
        --------
653

654
        >>> from pyroSAR import identify
655
        >>> scene = identify('S1A_IW_GRDH_1SDV_20180101T170648_20180101T170713_019964_021FFD_DA78.zip')
656
        >>> scene.quicklook('S1A__IW___A_20180101T170648.kmz')
657
        """
658
        raise NotImplementedError
×
659
    
660
    def summary(self):
1✔
661
        """
662
        print the set of standardized scene metadata attributes
663

664
        Returns
665
        -------
666

667
        """
668
        print(self.__str__())
1✔
669
    
670
    @abc.abstractmethod
1✔
671
    def scanMetadata(self):
1✔
672
        """
673
        scan SAR scenes for metadata attributes.
674
        The returned dictionary is registered as attribute `meta` by the class upon object initialization.
675
        This dictionary furthermore needs to return a set of standardized attribute keys,
676
        which are directly registered as object attributes.
677

678
        Returns
679
        -------
680
        dict
681
            the derived attributes
682

683
        """
684
        raise NotImplementedError
×
685
    
686
    @abc.abstractmethod
1✔
687
    def unpack(self, directory, overwrite=False, exist_ok=False):
1✔
688
        """
689
        Unpack the SAR scene into a defined directory.
690

691
        Parameters
692
        ----------
693
        directory: str
694
            the base directory into which the scene is unpacked
695
        overwrite: bool
696
            overwrite an existing unpacked scene?
697
        exist_ok: bool
698
            allow existing output files and do not create new ones?
699

700
        Returns
701
        -------
702

703
        """
704
        raise NotImplementedError
×
705
    
706
    def _unpack(self, directory, offset=None, overwrite=False, exist_ok=False):
1✔
707
        """
708
        general function for unpacking scene archives; to be called by implementations of ID.unpack.
709
        Will reset object attributes `scene` and `file` to point to the locations of the unpacked scene
710
        
711
        Parameters
712
        ----------
713
        directory: str
714
            the name of the directory in which the files are written
715
        offset: str
716
            an archive directory offset; to be defined if only a subdirectory is to be unpacked (see e.g. TSX.unpack)
717
        overwrite: bool
718
            should an existing directory be overwritten?
719
        exist_ok: bool
720
            do not attempt unpacking if the target directory already exists? Ignored if ``overwrite==True``
721
        
722
        Returns
723
        -------
724
        
725
        """
726
        do_unpack = True
1✔
727
        if os.path.isdir(directory):
1✔
728
            if overwrite:
×
729
                shutil.rmtree(directory)
×
730
            else:
731
                if exist_ok:
×
732
                    do_unpack = False
×
733
                else:
734
                    raise RuntimeError('target scene directory already exists: {}'.format(directory))
×
735
        os.makedirs(directory, exist_ok=True)
1✔
736
        
737
        if do_unpack:
1✔
738
            if tf.is_tarfile(self.scene):
1✔
739
                archive = tf.open(self.scene, 'r')
×
740
                names = archive.getnames()
×
741
                if offset is not None:
×
742
                    names = [x for x in names if x.startswith(offset)]
×
743
                header = os.path.commonprefix(names)
×
744
                
745
                if header in names:
×
746
                    if archive.getmember(header).isdir():
×
747
                        for item in sorted(names):
×
748
                            if item != header:
×
749
                                member = archive.getmember(item)
×
750
                                if offset is not None:
×
751
                                    member.name = member.name.replace(offset + '/', '')
×
752
                                archive.extract(member, directory)
×
753
                        archive.close()
×
754
                    else:
755
                        archive.extractall(directory)
×
756
                        archive.close()
×
757
            
758
            elif zf.is_zipfile(self.scene):
1✔
759
                archive = zf.ZipFile(self.scene, 'r')
1✔
760
                names = archive.namelist()
1✔
761
                header = os.path.commonprefix(names)
1✔
762
                if header.endswith('/'):
1✔
763
                    for item in sorted(names):
1✔
764
                        if item != header:
1✔
765
                            repl = item.replace(header, '', 1)
1✔
766
                            outname = os.path.join(directory, repl)
1✔
767
                            outname = outname.replace('/', os.path.sep)
1✔
768
                            if item.endswith('/'):
1✔
769
                                os.makedirs(outname, exist_ok=True)
1✔
770
                            else:
771
                                os.makedirs(os.path.dirname(outname), exist_ok=True)
1✔
772
                                try:
1✔
773
                                    with open(outname, 'wb') as outfile:
1✔
774
                                        outfile.write(archive.read(item))
1✔
775
                                except zf.BadZipfile:
×
776
                                    log.info('corrupt archive, unpacking failed')
×
777
                                    continue
×
778
                    archive.close()
1✔
779
                else:
780
                    archive.extractall(directory)
×
781
                    archive.close()
×
782
            else:
783
                log.info('unpacking is only supported for TAR and ZIP archives')
×
784
                return
×
785
        
786
        self.scene = directory
1✔
787
        main = os.path.join(self.scene, os.path.basename(self.file))
1✔
788
        self.file = main if os.path.isfile(main) else self.scene
1✔
789

790

791
class BEAM_DIMAP(ID):
1✔
792
    """
793
    Handler class for BEAM-DIMAP data
794

795
    Sensors:
796
        * SNAP supported sensors
797
    """
798
    
799
    def __init__(self, scene):
1✔
800
        
801
        if not scene.lower().endswith('.dim'):
1✔
802
            raise RuntimeError('Scene format is not BEAM-DIMAP')
1✔
803
        
804
        self.root = None
×
805
        self.scene = scene
×
806
        self.meta = self.scanMetadata()
×
807
        
808
        super(BEAM_DIMAP, self).__init__(self.meta)
×
809
    
810
    def scanMetadata(self):
1✔
811
        meta = dict()
×
812
        
813
        self.root = ET.parse(self.scene).getroot()
×
814
        
815
        def get_by_name(attr, section='Abstracted_Metadata'):
×
816
            element = self.root.find('.//MDElem[@name="{}"]'.format(section))
×
817
            out = element.find('.//MDATTR[@name="{}"]'.format(attr))
×
818
            if out is None or out.text == '99999.0':
×
819
                msg = 'cannot get attribute {} from section {}'
×
820
                raise RuntimeError(msg.format(attr, section))
×
821
            return out.text
×
822
        
823
        section = 'Abstracted_Metadata'
×
824
        meta['acquisition_mode'] = get_by_name('ACQUISITION_MODE', section=section)
×
825
        meta['IPF_version'] = get_by_name('Processing_system_identifier', section=section)
×
826
        meta['sensor'] = get_by_name('MISSION', section=section).replace('ENTINEL-', '')
×
827
        meta['orbit'] = get_by_name('PASS', section=section)[0]
×
828
        pols = [x.text for x in self.root.findall('.//MDATTR[@desc="Polarization"]')]
×
829
        pols = list(filter(None, pols))
×
830
        meta['polarizations'] = list(set([x for x in pols if '-' not in x]))
×
831
        meta['spacing'] = (round(float(get_by_name('range_spacing', section=section)), 6),
×
832
                           round(float(get_by_name('azimuth_spacing', section=section)), 6))
833
        meta['samples'] = int(self.root.find('.//BAND_RASTER_WIDTH').text)
×
834
        meta['lines'] = int(self.root.find('.//BAND_RASTER_HEIGHT').text)
×
835
        meta['bands'] = int(self.root.find('.//NBANDS').text)
×
836
        meta['orbitNumber_abs'] = int(get_by_name('ABS_ORBIT', section=section))
×
837
        meta['orbitNumber_rel'] = int(get_by_name('REL_ORBIT', section=section))
×
838
        meta['cycleNumber'] = int(get_by_name('orbit_cycle', section=section))
×
839
        meta['frameNumber'] = int(get_by_name('data_take_id', section=section))
×
840
        meta['product'] = self.root.find('.//PRODUCT_TYPE').text
×
841
        
842
        srgr = bool(int(get_by_name('srgr_flag', section=section)))
×
843
        meta['image_geometry'] = 'GROUND_RANGE' if srgr else 'SLANT_RANGE'
×
844
        
845
        inc_elements = self.root.findall('.//MDATTR[@name="incidenceAngleMidSwath"]')
×
846
        if len(inc_elements) > 0:
×
847
            incidence = [float(x.text) for x in inc_elements]
×
848
            meta['incidence'] = median(incidence)
×
849
        else:
850
            inc_near = float(self.root.find('.//MDATTR[@name="incidence_near"]').text)
×
851
            inc_far = float(self.root.find('.//MDATTR[@name="incidence_far"]').text)
×
852
            meta['incidence'] = (inc_near + inc_far) / 2
×
853
        
854
        # Metadata sections that need some parsing to match naming convention with SAFE format
855
        start = datetime.strptime(self.root.find('.//PRODUCT_SCENE_RASTER_START_TIME').text,
×
856
                                  '%d-%b-%Y %H:%M:%S.%f')
857
        meta['start'] = start.strftime('%Y%m%dT%H%M%S')
×
858
        stop = datetime.strptime(self.root.find('.//PRODUCT_SCENE_RASTER_STOP_TIME').text,
×
859
                                 '%d-%b-%Y %H:%M:%S.%f')
860
        meta['stop'] = stop.strftime('%Y%m%dT%H%M%S')
×
861
        
862
        if self.root.find('.//WKT') is not None:
×
863
            meta['projection'] = self.root.find('.//WKT').text.lstrip()
×
864
        else:
865
            meta['projection'] = crsConvert(4326, 'wkt')
×
866
        
867
        keys = ['{}_{}_{}'.format(a, b, c)
×
868
                for a in ['first', 'last']
869
                for b in ['far', 'near']
870
                for c in ['lat', 'long']]
871
        coords = {key: float(get_by_name(key, section=section))
×
872
                  for key in keys}
873
        
874
        meta['coordinates'] = [(coords['first_near_long'], coords['first_near_lat']),
×
875
                               (coords['last_near_long'], coords['last_near_lat']),
876
                               (coords['last_far_long'], coords['last_far_lat']),
877
                               (coords['first_far_long'], coords['first_far_lat'])]
878
        return meta
×
879
    
880
    def unpack(self, directory, overwrite=False, exist_ok=False):
1✔
881
        raise RuntimeError('unpacking of BEAM-DIMAP products is not supported')
×
882

883

884
class CEOS_ERS(ID):
1✔
885
    """
886
    Handler class for ERS data in CEOS format
887
    
888
    Sensors:
889
        * ERS1
890
        * ERS2
891
    
892
    Reference:
893
        ER-IS-EPO-GS-5902-3: Annex C. ERS SAR.SLC/SLC-I. CCT and EXABYTE
894
        (`ESA 1998 <https://earth.esa.int/documents/10174/1597298/SAR05E.pdf>`_)
895
    """
896
    
897
    def __init__(self, scene):
1✔
898
        self.pattern = patterns.ceos_ers
1✔
899
        
900
        self.pattern_pid = r'(?P<sat_id>(?:SAR|ASA))_' \
1✔
901
                           r'(?P<image_mode>(?:IM(?:S|P|G|M|_)|AP(?:S|P|G|M|_)|WV(?:I|S|W|_)|WS(?:M|S|_)))_' \
902
                           r'(?P<processing_level>[012B][CP])'
903
        
904
        self.scene = os.path.realpath(scene)
1✔
905
        
906
        self.examine()
1✔
907
        
908
        self.meta = self.scanMetadata()
1✔
909
        
910
        # register the standardized meta attributes as object attributes
911
        super(CEOS_ERS, self).__init__(self.meta)
1✔
912
    
913
    def unpack(self, directory, overwrite=False, exist_ok=False):
1✔
914
        if self.sensor in ['ERS1', 'ERS2']:
×
915
            base_file = re.sub(r'\.PS$', '', os.path.basename(self.file))
×
916
            base_dir = os.path.basename(directory.strip('/'))
×
917
            
918
            outdir = directory if base_file == base_dir else os.path.join(directory, base_file)
×
919
            
920
            self._unpack(outdir, overwrite=overwrite, exist_ok=exist_ok)
×
921
        else:
922
            raise NotImplementedError('sensor {} not implemented yet'.format(self.sensor))
×
923
    
924
    def scanMetadata(self):
1✔
925
        meta = dict()
1✔
926
        
927
        match = re.match(re.compile(self.pattern), os.path.basename(self.file))
1✔
928
        match2 = re.match(re.compile(self.pattern_pid), match.group('product_id'))
1✔
929
        
930
        if re.search('IM__0', match.group('product_id')):
1✔
931
            raise RuntimeError('product level 0 not supported (yet)')
×
932
        
933
        meta['acquisition_mode'] = match2.group('image_mode')
1✔
934
        meta['product'] = 'SLC' if meta['acquisition_mode'] in ['IMS', 'APS', 'WSS'] else 'PRI'
1✔
935
        
936
        lea_obj = self.getFileObj(self.findfiles('LEA_01.001')[0])
1✔
937
        lea = lea_obj.read()
1✔
938
        lea_obj.close()
1✔
939
        fdr = lea[0:720]  # file descriptor record
1✔
940
        dss = lea[720:(720 + 1886)]  # data set summary record
1✔
941
        mpd = lea[(720 + 1886):(720 + 1886 + 1620)]  # map projection data record
1✔
942
        ppd_start = 720 + 1886 + 1620
1✔
943
        ppd_length = struct.unpack('>i', lea[ppd_start + 8: ppd_start + 12])[0]
1✔
944
        ppd = lea[ppd_start:ppd_length]  # platform position data record
1✔
945
        frd_start = 720 + 1886 + 1620 + ppd_length
1✔
946
        frd = lea[frd_start:(frd_start + 12288)]  # facility related data record
1✔
947
        
948
        meta['sensor'] = dss[396:412].strip().decode()
1✔
949
        meta['start'] = self.parse_date(str(dss[1814:1838].decode('utf-8')))
1✔
950
        meta['stop'] = self.parse_date(str(dss[1862:1886].decode('utf-8')))
1✔
951
        meta['polarizations'] = ['VV']
1✔
952
        looks_range = float(dss[1174:1190])
1✔
953
        looks_azimuth = float(dss[1190:1206])
1✔
954
        meta['looks'] = (looks_range, looks_azimuth)
1✔
955
        meta['heading'] = float(dss[468:476])
1✔
956
        meta['orbit'] = 'D' if meta['heading'] > 180 else 'A'
1✔
957
        orbitNumber, frameNumber = map(int, re.findall('[0-9]+', dss[36:68].decode('utf-8')))
1✔
958
        meta['orbitNumber_abs'] = orbitNumber
1✔
959
        meta['frameNumber'] = frameNumber
1✔
960
        orbitInfo = passdb_query(meta['sensor'], datetime.strptime(meta['start'], '%Y%m%dT%H%M%S'))
1✔
961
        meta['cycleNumber'] = orbitInfo['cycleNumber']
1✔
962
        meta['orbitNumber_rel'] = orbitInfo['orbitNumber_rel']
1✔
963
        spacing_azimuth = float(dss[1686:1702])
1✔
964
        spacing_range = float(dss[1702:1718])
1✔
965
        meta['spacing'] = (spacing_range, spacing_azimuth)
1✔
966
        meta['incidence_angle'] = float(dss[484:492])
1✔
967
        meta['proc_facility'] = dss[1045:1061].strip().decode()
1✔
968
        meta['proc_system'] = dss[1061:1069].strip().decode()
1✔
969
        meta['proc_version'] = dss[1069:1077].strip().decode()
1✔
970
        
971
        meta['antenna_flag'] = int(frd[658:662])
1✔
972
        meta['k_db'] = -10 * math.log(float(frd[662:678]), 10)
1✔
973
        meta['sc_db'] = {'ERS1': 59.61, 'ERS2': 60}[meta['sensor']]
1✔
974
        
975
        meta['samples'] = int(mpd[60:76])
1✔
976
        meta['lines'] = int(mpd[76:92])
1✔
977
        ul = (float(mpd[1088:1104]), float(mpd[1072:1088]))
1✔
978
        ur = (float(mpd[1120:1136]), float(mpd[1104:1120]))
1✔
979
        lr = (float(mpd[1152:1168]), float(mpd[1136:1152]))
1✔
980
        ll = (float(mpd[1184:1200]), float(mpd[1168:1184]))
1✔
981
        meta['coordinates'] = [ul, ur, lr, ll]
1✔
982
        meta['projection'] = crsConvert(4326, 'wkt')
1✔
983
        return meta
1✔
984
        
985
        # def correctAntennaPattern(self):
986
        # the following section is only relevant for PRI products and can be considered future work
987
        # select antenna gain correction lookup file from extracted meta information
988
        # the lookup files are stored in a subfolder CAL which is included in the pythonland software package
989
        # if sensor == 'ERS1':
990
        #     if date < 19950717:
991
        #         antenna = 'antenna_ERS1_x_x_19950716'
992
        #     else:
993
        #         if proc_sys == 'VMP':
994
        #             antenna = 'antenna_ERS2_VMP_v68_x' if proc_vrs >= 6.8 else 'antenna_ERS2_VMP_x_v67'
995
        #         elif proc_fac == 'UKPAF' and date < 19970121:
996
        #             antenna = 'antenna_ERS1_UKPAF_19950717_19970120'
997
        #         else:
998
        #             antenna = 'antenna_ERS1'
999
        # else:
1000
        #     if proc_sys == 'VMP':
1001
        #         antenna = 'antenna_ERS2_VMP_v68_x' if proc_vrs >= 6.8 else 'antenna_ERS2_VMP_x_v67'
1002
        #     elif proc_fac == 'UKPAF' and date < 19970121:
1003
        #         antenna = 'antenna_ERS2_UKPAF_x_19970120'
1004
        #     else:
1005
        #         antenna = 'antenna_ERS2'
1006

1007

1008
class CEOS_PSR(ID):
1✔
1009
    """
1010
    Handler class for ALOS-PALSAR data in CEOS format
1011
    
1012
    Sensors:
1013
        * PSR1
1014
        * PSR2
1015

1016
    PALSAR-1:
1017
        References:
1018
            * NEB-01006: ALOS/PALSAR Level 1 Product Format Description
1019
              (`JAXA 2006 <https://www.eorc.jaxa.jp/ALOS/en/doc/fdata/PALSAR_L10_J_ENa.zip>`_)
1020
            * NEB-070062B: ALOS/PALSAR Level 1.1/1.5 Product Format Description
1021
              (`JAXA 2009 <https://www.eorc.jaxa.jp/ALOS/en/doc/fdata/PALSAR_x_Format_EL.pdf>`_)
1022
        Products / processing levels:
1023
            * 1.0
1024
            * 1.1
1025
            * 1.5
1026
        Acquisition modes:
1027
            * AB: [SP][HWDPC]
1028
            * A: supplemental remarks of the sensor type:
1029
                * S: Wide observation mode
1030
                * P: all other modes
1031
            * B: observation mode
1032
                * H: Fine mode
1033
                * W: ScanSAR mode
1034
                * D: Direct downlink mode
1035
                * P: Polarimetry mode
1036
                * C: Calibration mode
1037
    
1038
    PALSAR-2:
1039
        Reference:
1040
            ALOS-2/PALSAR-2 Level 1.1/1.5/2.1/3.1 CEOS SAR Product Format Description
1041
            (`JAXA 2014 <https://www.eorc.jaxa.jp/ALOS-2/en/doc/fdata/PALSAR-2_xx_Format_CEOS_E_r.pdf>`_).
1042
        Products / processing levels:
1043
            * 1.0
1044
            * 1.1
1045
            * 1.5
1046
        Acquisition modes:
1047
            * SBS: Spotlight mode
1048
            * UBS: Ultra-fine mode Single polarization
1049
            * UBD: Ultra-fine mode Dual polarization
1050
            * HBS: High-sensitive mode Single polarization
1051
            * HBD: High-sensitive mode Dual polarization
1052
            * HBQ: High-sensitive mode Full (Quad.) polarimetry
1053
            * FBS: Fine mode Single polarization
1054
            * FBD: Fine mode Dual polarization
1055
            * FBQ: Fine mode Full (Quad.) polarimetry
1056
            * WBS: Scan SAR nominal [14MHz] mode Single polarization
1057
            * WBD: Scan SAR nominal [14MHz] mode Dual polarization
1058
            * WWS: Scan SAR nominal [28MHz] mode Single polarization
1059
            * WWD: Scan SAR nominal [28MHz] mode Dual polarization
1060
            * VBS: Scan SAR wide mode Single polarization
1061
            * VBD: Scan SAR wide mode Dual polarization
1062
    """
1063
    
1064
    def __init__(self, scene):
1✔
1065
        
1066
        self.scene = os.path.realpath(scene)
1✔
1067
        
1068
        candidates = [patterns.ceos_psr1, patterns.ceos_psr2]
1✔
1069
        
1070
        for i, pattern in enumerate(candidates):
1✔
1071
            self.pattern = pattern
1✔
1072
            try:
1✔
1073
                self.examine()
1✔
1074
                break
1✔
1075
            except RuntimeError as e:
1✔
1076
                if i + 1 == len(candidates):
1✔
1077
                    raise e
1✔
1078
        
1079
        self.meta = self.scanMetadata()
1✔
1080
        
1081
        # register the standardized meta attributes as object attributes
1082
        super(CEOS_PSR, self).__init__(self.meta)
1✔
1083
    
1084
    def _getLeaderfileContent(self):
1✔
1085
        led_obj = self.getFileObj(self.led_filename)
1✔
1086
        led = led_obj.read()
1✔
1087
        led_obj.close()
1✔
1088
        return led
1✔
1089
    
1090
    def _img_get_coordinates(self):
1✔
1091
        img_filename = self.findfiles('IMG')[0]
×
1092
        img_obj = self.getFileObj(img_filename)
×
1093
        imageFileDescriptor = img_obj.read(720)
×
1094
        
1095
        lineRecordLength = int(imageFileDescriptor[186:192])  # bytes per line + 412
×
1096
        numberOfRecords = int(imageFileDescriptor[180:186])
×
1097
        
1098
        signalDataDescriptor1 = img_obj.read(412)
×
1099
        img_obj.seek(720 + lineRecordLength * (numberOfRecords - 1))
×
1100
        signalDataDescriptor2 = img_obj.read()
×
1101
        
1102
        img_obj.close()
×
1103
        
1104
        lat = [signalDataDescriptor1[192:196], signalDataDescriptor1[200:204],
×
1105
               signalDataDescriptor2[192:196], signalDataDescriptor2[200:204]]
1106
        
1107
        lon = [signalDataDescriptor1[204:208], signalDataDescriptor1[212:216],
×
1108
               signalDataDescriptor2[204:208], signalDataDescriptor2[212:216]]
1109
        
1110
        lat = [struct.unpack('>i', x)[0] / 1000000. for x in lat]
×
1111
        lon = [struct.unpack('>i', x)[0] / 1000000. for x in lon]
×
1112
        
1113
        return list(zip(lon, lat))
×
1114
    
1115
    def _parseSummary(self):
1✔
1116
        try:
1✔
1117
            summary_file = self.getFileObj(self.findfiles('summary|workreport')[0])
1✔
1118
        except IndexError:
×
1119
            return {}
×
1120
        text = summary_file.getvalue().decode('utf-8').strip()
1✔
1121
        summary_file.close()
1✔
1122
        summary = ast.literal_eval('{"' + re.sub(r'\s*=', '":', text).replace('\n', ',"') + '}')
1✔
1123
        for x, y in summary.items():
1✔
1124
            summary[x] = parse_literal(y)
1✔
1125
        return summary
1✔
1126
    
1127
    @property
1✔
1128
    def led_filename(self):
1✔
1129
        return self.findfiles(self.pattern)[0]
1✔
1130
    
1131
    def scanMetadata(self):
1✔
1132
        ################################################################################################################
1133
        # read leader (LED) file
1134
        led = self._getLeaderfileContent()
1✔
1135
        
1136
        # read summary text file
1137
        meta = self._parseSummary()
1✔
1138
        
1139
        # read polarizations from image file names
1140
        meta['polarizations'] = [re.search('[HV]{2}', os.path.basename(x)).group(0) for x in self.findfiles('^IMG-')]
1✔
1141
        ################################################################################################################
1142
        # read start and stop time
1143
        
1144
        try:
1✔
1145
            meta['start'] = self.parse_date(meta['Img_SceneStartDateTime'])
1✔
1146
            meta['stop'] = self.parse_date(meta['Img_SceneEndDateTime'])
1✔
1147
        except (AttributeError, KeyError):
×
1148
            try:
×
1149
                start_string = re.search('Img_SceneStartDateTime[ ="0-9:.]*', led).group()
×
1150
                stop_string = re.search('Img_SceneEndDateTime[ ="0-9:.]*', led).group()
×
1151
                meta['start'] = self.parse_date(re.search(r'\d+\s[\d:.]+', start_string).group())
×
1152
                meta['stop'] = self.parse_date(re.search(r'\d+\s[\d:.]+', stop_string).group())
×
1153
            except AttributeError:
×
1154
                raise IndexError('start and stop time stamps cannot be extracted; see file {}'
×
1155
                                 .format(self.led_filename))
1156
        ################################################################################################################
1157
        # read file descriptor record
1158
        p0 = 0
1✔
1159
        p1 = struct.unpack('>i', led[8:12])[0]
1✔
1160
        fileDescriptor = led[p0:p1]
1✔
1161
        # dataSetSummary
1162
        dss_n = int(fileDescriptor[180:186])
1✔
1163
        dss_l = int(fileDescriptor[186:192])
1✔
1164
        # mapProjectionData
1165
        mpd_n = int(fileDescriptor[192:198])
1✔
1166
        mpd_l = int(fileDescriptor[198:204])
1✔
1167
        # platformPositionData
1168
        ppd_n = int(fileDescriptor[204:210])
1✔
1169
        ppd_l = int(fileDescriptor[210:216])
1✔
1170
        # attitudeData
1171
        adr_n = int(fileDescriptor[216:222])
1✔
1172
        adr_l = int(fileDescriptor[222:228])
1✔
1173
        # radiometricData
1174
        rdr_n = int(fileDescriptor[228:234])
1✔
1175
        rdr_l = int(fileDescriptor[234:240])
1✔
1176
        # dataQualitySummary
1177
        dqs_n = int(fileDescriptor[252:258])
1✔
1178
        dqs_l = int(fileDescriptor[258:264])
1✔
1179
        meta['sensor'] = {'AL1': 'PSR1', 'AL2': 'PSR2'}[fileDescriptor[48:51].decode('utf-8')]
1✔
1180
        ################################################################################################################
1181
        # read leader file name information
1182
        
1183
        match = re.match(re.compile(self.pattern), os.path.basename(self.led_filename))
1✔
1184
        
1185
        if meta['sensor'] == 'PSR1':
1✔
1186
            meta['acquisition_mode'] = match.group('sub') + match.group('mode')
×
1187
        else:
1188
            meta['acquisition_mode'] = match.group('mode')
1✔
1189
        meta['product'] = match.group('level')
1✔
1190
        ################################################################################################################
1191
        # read led records
1192
        p0 = p1
1✔
1193
        p1 += dss_l * dss_n
1✔
1194
        dataSetSummary = led[p0:p1]
1✔
1195
        
1196
        if mpd_n > 0:
1✔
1197
            p0 = p1
1✔
1198
            p1 += mpd_l * mpd_n
1✔
1199
            mapProjectionData = led[p0:p1]
1✔
1200
        else:
1201
            mapProjectionData = None
×
1202
        
1203
        p0 = p1
1✔
1204
        p1 += ppd_l * ppd_n
1✔
1205
        platformPositionData = led[p0:p1]
1✔
1206
        
1207
        p0 = p1
1✔
1208
        p1 += adr_l * adr_n
1✔
1209
        attitudeData = led[p0:p1]
1✔
1210
        
1211
        p0 = p1
1✔
1212
        p1 += rdr_l * rdr_n
1✔
1213
        radiometricData = led[p0:p1]
1✔
1214
        
1215
        p0 = p1
1✔
1216
        p1 += dqs_l * dqs_n
1✔
1217
        dataQualitySummary = led[p0:p1]
1✔
1218
        
1219
        facilityRelatedData = []
1✔
1220
        while p1 < len(led):
1✔
1221
            p0 = p1
1✔
1222
            length = struct.unpack('>i', led[(p0 + 8):(p0 + 12)])[0]
1✔
1223
            p1 += length
1✔
1224
            facilityRelatedData.append(led[p0:p1])
1✔
1225
        ################################################################################################################
1226
        # read map projection data record
1227
        
1228
        if mapProjectionData is not None:
1✔
1229
            lat = list(map(float, [mapProjectionData[1072:1088],
1✔
1230
                                   mapProjectionData[1104:1120],
1231
                                   mapProjectionData[1136:1152],
1232
                                   mapProjectionData[1168:1184]]))
1233
            lon = list(map(float, [mapProjectionData[1088:1104],
1✔
1234
                                   mapProjectionData[1120:1136],
1235
                                   mapProjectionData[1152:1168],
1236
                                   mapProjectionData[1184:1200]]))
1237
            meta['coordinates'] = list(zip(lon, lat))
1✔
1238
            
1239
            # https://github.com/datalyze-solutions/LandsatProcessingPlugin/blob/master/src/metageta/formats/alos.py
1240
            
1241
            src_srs = osr.SpatialReference()
1✔
1242
            # src_srs.SetGeogCS('GRS 1980','GRS 1980','GRS 1980',6378137.00000,298.2572220972)
1243
            src_srs.SetWellKnownGeogCS('WGS84')
1✔
1244
            # Proj CS
1245
            projdesc = mapProjectionData[412:444].strip()
1✔
1246
            epsg = 0  # default
1✔
1247
            if projdesc == 'UTM-PROJECTION':
1✔
1248
                nZone = int(mapProjectionData[476:480])
×
1249
                dfFalseNorthing = float(mapProjectionData[496:512])
×
1250
                if dfFalseNorthing > 0.0:
×
1251
                    bNorth = False
×
1252
                    epsg = 32700 + nZone
×
1253
                else:
1254
                    bNorth = True
×
1255
                    epsg = 32600 + nZone
×
1256
                src_srs.ImportFromEPSG(epsg)
×
1257
                # src_srs.SetUTM(nZone,bNorth) #generates WKT that osr.SpatialReference.AutoIdentifyEPSG() doesn't return an EPSG for
1258
            elif projdesc == 'UPS-PROJECTION':
1✔
1259
                dfCenterLon = float(mapProjectionData[624, 640])
×
1260
                dfCenterLat = float(mapProjectionData[640, 656])
×
1261
                dfScale = float(mapProjectionData[656, 672])
×
1262
                src_srs.SetPS(dfCenterLat, dfCenterLon, dfScale, 0.0, 0.0)
×
1263
            elif projdesc == 'MER-PROJECTION':
1✔
1264
                dfCenterLon = float(mapProjectionData[736, 752])
×
1265
                dfCenterLat = float(mapProjectionData[752, 768])
×
1266
                src_srs.SetMercator(dfCenterLat, dfCenterLon, 0, 0, 0)
×
1267
            elif projdesc == 'LCC-PROJECTION':
1✔
1268
                dfCenterLon = float(mapProjectionData[736, 752])
×
1269
                dfCenterLat = float(mapProjectionData[752, 768])
×
1270
                dfStdP1 = float(mapProjectionData[768, 784])
×
1271
                dfStdP2 = float(mapProjectionData[784, 800])
×
1272
                src_srs.SetLCC(dfStdP1, dfStdP2, dfCenterLat, dfCenterLon, 0, 0)
×
1273
            meta['projection'] = src_srs.ExportToWkt()
1✔
1274
        
1275
        else:
1276
            coordinates = self._img_get_coordinates()
×
1277
            if all([x == (0, 0) for x in coordinates]):
×
1278
                meta['projection'] = None
×
1279
            else:
1280
                meta['coordinates'] = coordinates
×
1281
                meta['projection'] = crsConvert(4326, 'wkt')
×
1282
        ################################################################################################################
1283
        # read data set summary record
1284
        
1285
        scene_id = dataSetSummary[20:52].decode('ascii')
1✔
1286
        
1287
        if meta['sensor'] == 'PSR1':
1✔
1288
            pattern = r'(?P<sat_id>[A-Z]{2})' \
×
1289
                      r'(?P<sensor_id>[A-Z]{3})' \
1290
                      r'(?P<sensor_id_sub>[A-Z]{1})' \
1291
                      r'(?P<orbitNumber>[0-9]{5})' \
1292
                      r'(?P<frameNumber>[0-9]{4})'
1293
        elif meta['sensor'] == 'PSR2':
1✔
1294
            pattern = r'(?P<sat_id>[A-Z0-9]{5})' \
1✔
1295
                      r'(?P<orbitNumber>[0-9]{5})' \
1296
                      r'(?P<frameNumber>[0-9]{4})-' \
1297
                      r'(?P<obs_day>[0-9]{6})[ ]{11}'
1298
        else:
1299
            raise ValueError('sensor must be either PSR1 or PSR2; is: {}'.format(meta['sensor']))
×
1300
        
1301
        match = re.match(re.compile(pattern), scene_id)
1✔
1302
        
1303
        orbitsPerCycle = {'PSR1': 671, 'PSR2': 207}[meta['sensor']]
1✔
1304
        
1305
        meta['orbitNumber_abs'] = int(match.group('orbitNumber'))
1✔
1306
        meta['orbitNumber_rel'] = meta['orbitNumber_abs'] % orbitsPerCycle
1✔
1307
        meta['cycleNumber'] = meta['orbitNumber_abs'] // orbitsPerCycle + 1
1✔
1308
        meta['frameNumber'] = int(match.group('frameNumber'))
1✔
1309
        
1310
        try:
1✔
1311
            meta['lines'] = int(dataSetSummary[324:332]) * 2
1✔
1312
        except ValueError:
×
1313
            if 'Pdi_NoOfLines' in meta.keys():
×
1314
                meta['lines'] = meta['Pdi_NoOfLines']
×
1315
            else:
1316
                meta['lines'] = None
×
1317
        try:
1✔
1318
            meta['samples'] = int(dataSetSummary[332:340]) * 2
1✔
1319
        except ValueError:
×
1320
            if 'Pdi_NoOfPixels' in meta.keys():
×
1321
                meta['samples'] = meta['Pdi_NoOfPixels']
×
1322
            else:
1323
                meta['samples'] = None
×
1324
        meta['incidence'] = float(dataSetSummary[484:492])
1✔
1325
        meta['wavelength'] = float(dataSetSummary[500:516]) * 100  # in cm
1✔
1326
        meta['proc_facility'] = dataSetSummary[1046:1062].strip()
1✔
1327
        meta['proc_system'] = dataSetSummary[1062:1070].strip()
1✔
1328
        meta['proc_version'] = dataSetSummary[1070:1078].strip()
1✔
1329
        
1330
        try:
1✔
1331
            azlks = float(dataSetSummary[1174:1190])
1✔
1332
            rlks = float(dataSetSummary[1190:1206])
1✔
1333
            meta['looks'] = (rlks, azlks)
1✔
1334
        except ValueError:
×
1335
            meta['looks'] = (None, None)
×
1336
        
1337
        meta['orbit'] = dataSetSummary[1534:1542].decode('utf-8').strip()[0]
1✔
1338
        
1339
        try:
1✔
1340
            spacing_azimuth = float(dataSetSummary[1686:1702])
1✔
1341
            spacing_range = float(dataSetSummary[1702:1718])
1✔
1342
            meta['spacing'] = (spacing_range, spacing_azimuth)
1✔
1343
        except ValueError:
×
1344
            meta['spacing'] = (None, None)
×
1345
        ################################################################################################################
1346
        # read radiometric data record
1347
        if len(radiometricData) > 0:
1✔
1348
            meta['k_dB'] = float(radiometricData[20:36])
1✔
1349
        else:
1350
            meta['k_dB'] = None
×
1351
        ################################################################################################################
1352
        # additional notes
1353
        
1354
        # the following can be used to read platform position time from the led file
1355
        # this covers a larger time frame than the actual scene sensing time
1356
        # y, m, d, nd, s = platformPositionData[144:182].split()
1357
        # start = datetime(int(y), int(m), int(d)) + timedelta(seconds=float(s))
1358
        # npoints = int(platformPositionData[140:144])
1359
        # interval = float(platformPositionData[182:204])
1360
        # stop = start + timedelta(seconds=(npoints - 1) * interval)
1361
        # parse_date(start)
1362
        # parse_date(stop)
1363
        
1364
        return meta
1✔
1365
    
1366
    def unpack(self, directory, overwrite=False, exist_ok=False):
1✔
1367
        outdir = os.path.join(directory, os.path.basename(self.file).replace('LED-', ''))
×
1368
        self._unpack(outdir, overwrite=overwrite, exist_ok=exist_ok)
×
1369

1370

1371
class EORC_PSR(ID):
1✔
1372
    """
1373
    Handler class for ALOS-2/PALSAR-2 data in EORC (Earth Observation Research Center) Path format
1374
    
1375
    Sensors:
1376
        * PALSAR-2
1377

1378
    PALSAR-2:
1379
        Reference: 
1380
            NDX-150019: ALOS-2/PALSAR-2 EORC Path Product Format Description (JAXA 2016)
1381
        Products / processing levels:
1382
            * 1.5
1383
        Acquisition modes:
1384
            * FBD: Fine mode Dual polarization
1385
            * WBD: Scan SAR nominal [14MHz] mode Dual polarization
1386
    """
1387
    
1388
    def __init__(self, scene):
1✔
1389
        
1390
        self.scene = os.path.realpath(scene)
1✔
1391
        
1392
        self.pattern = patterns.eorc_psr
1✔
1393
        
1394
        self.examine()
1✔
1395
        
1396
        self.meta = self.scanMetadata()
×
1397
        
1398
        # register the standardized meta attributes as object attributes
1399
        super(EORC_PSR, self).__init__(self.meta)
×
1400
    
1401
    def _getHeaderfileContent(self):
1✔
1402
        head_obj = self.getFileObj(self.header_filename)
×
1403
        head = head_obj.read().decode('utf-8')
×
1404
        head = list(head.split('\n'))
×
1405
        head_obj.close()
×
1406
        return head
×
1407
    
1408
    def _img_get_coordinates(self):
1✔
1409
        img_filename = self.findfiles('IMG')[0]
×
1410
        img_obj = self.getFileObj(img_filename)
×
1411
        imageFileDescriptor = img_obj.read(720)
×
1412
        
1413
        lineRecordLength = int(imageFileDescriptor[186:192])  # bytes per line + 412
×
1414
        numberOfRecords = int(imageFileDescriptor[180:186])
×
1415
        
1416
        signalDataDescriptor1 = img_obj.read(412)
×
1417
        img_obj.seek(720 + lineRecordLength * (numberOfRecords - 1))
×
1418
        signalDataDescriptor2 = img_obj.read()
×
1419
        
1420
        img_obj.close()
×
1421
        
1422
        lat = [signalDataDescriptor1[192:196], signalDataDescriptor1[200:204],
×
1423
               signalDataDescriptor2[192:196], signalDataDescriptor2[200:204]]
1424
        
1425
        lon = [signalDataDescriptor1[204:208], signalDataDescriptor1[212:216],
×
1426
               signalDataDescriptor2[204:208], signalDataDescriptor2[212:216]]
1427
        
1428
        lat = [struct.unpack('>i', x)[0] / 1000000. for x in lat]
×
1429
        lon = [struct.unpack('>i', x)[0] / 1000000. for x in lon]
×
1430
        
1431
        return list(zip(lon, lat))
×
1432
    
1433
    def _parseFacter_m(self):
1✔
1434
        try:
×
1435
            facter_file = self.findfiles('facter_m.dat')[0]
×
1436
        except IndexError:
×
1437
            return {}
×
1438
        facter_obj = self.getFileObj(facter_file)
×
1439
        facter_m = facter_obj.read().decode('utf-8')
×
1440
        facter_m = list(facter_m.split('\n'))
×
1441
        facter_obj.close()
×
1442
        return facter_m
×
1443
    
1444
    @property
1✔
1445
    def header_filename(self):
1✔
1446
        return self.findfiles(self.pattern)[0]
×
1447
    
1448
    def scanMetadata(self):
1✔
1449
        ################################################################################################################
1450
        # read header (HDR) file
1451
        header = self._getHeaderfileContent()
×
1452
        header = [head.replace(" ", "") for head in header]
×
1453
        
1454
        # read summary text file
1455
        facter_m = self._parseFacter_m()
×
1456
        facter_m = [fact.replace(" ", "") for fact in facter_m]
×
1457
        
1458
        meta = {}
×
1459
        
1460
        # read polarizations from image file names
1461
        meta['polarizations'] = [re.search('[HV]{2}', os.path.basename(x)).group(0) for x in self.findfiles('^sar.')]
×
1462
        meta['product'] = header[3]
×
1463
        ################################################################################################################
1464
        # read start and stop time --> TODO: in what format is the start and stop time?
1465
        
1466
        try:
×
1467
            start_time = facter_m[168].split('.')[0].zfill(2) + facter_m[168].split('.')[1][:4]
×
1468
            stop_time = facter_m[170].split('.')[0].zfill(2) + facter_m[170].split('.')[1][:4]
×
1469
        except (AttributeError):
×
1470
            raise IndexError('start and stop time stamps cannot be extracted; see file facter_m.dat')
×
1471
        
1472
        meta['start'] = str(header[6])  # +'T'+start_time
×
1473
        meta['stop'] = str(header[6])  # +'T'+stop_time
×
1474
        ################################################################################################################
1475
        # read file metadata
1476
        meta['sensor'] = header[2]
×
1477
        ################################################################################################################
1478
        # read leader file name information
1479
        meta['acquisition_mode'] = header[12]
×
1480
        # ##############################################################################################################
1481
        # read map projection data 
1482
        
1483
        lat = list(map(float, [header[33], header[35], header[37], header[39]]))
×
1484
        lon = list(map(float, [header[34], header[36], header[38], header[40]]))
×
1485
        
1486
        if len(lat) == 0 or len(lon) == 0:
×
1487
            meta['coordinates'] = self._img_get_coordinates()
×
1488
        else:
1489
            meta['coordinates'] = list(zip(lon, lat))
×
1490
        
1491
        meta['projection'] = crsConvert(4918, 'wkt')  # EPSG: 4918: ITRF97, GRS80
×
1492
        ################################################################################################################
1493
        # read data set summary record
1494
        
1495
        orbitsPerCycle = int(207)
×
1496
        
1497
        meta['orbitNumber_rel'] = int(header[7])
×
1498
        meta['cycleNumber'] = header[5]
×
1499
        meta['frameNumber'] = ''
×
1500
        meta['orbitNumber_abs'] = int(orbitsPerCycle * (meta['cycleNumber'] - 1) + meta['orbitNumber_rel'])
×
1501
        
1502
        meta['lines'] = int(float(facter_m[51]))
×
1503
        meta['samples'] = int(float(facter_m[50]))
×
1504
        meta['incidence'] = float(facter_m[119])
×
1505
        meta['proc_facility'] = header[73]
×
1506
        
1507
        meta['spacing'] = (float(header[51]), float(header[52]))
×
1508
        
1509
        meta['orbit'] = header[9]
×
1510
        ################################################################################################################
1511
        # read radiometric data record
1512
        
1513
        meta['k_dB'] = float(header[64])
×
1514
        
1515
        return meta
×
1516
    
1517
    def unpack(self, directory, overwrite=False, exist_ok=False):
1✔
1518
        outdir = os.path.join(directory, os.path.basename(self.file).replace('LED-', ''))
×
1519
        self._unpack(outdir, overwrite=overwrite, exist_ok=exist_ok)
×
1520

1521

1522
class ESA(ID):
1✔
1523
    """
1524
    Handler class for SAR data in ESA format (Envisat ASAR, ERS-1/2)
1525
    
1526
    Sensors:
1527
        * ASAR
1528
        * ERS1
1529
        * ERS2
1530
    """
1531
    
1532
    def __init__(self, scene):
1✔
1533
        
1534
        self.pattern = patterns.esa
1✔
1535
        self.pattern_pid = r'(?P<sat_id>(?:SAR|ASA))_' \
1✔
1536
                           r'(?P<image_mode>(?:IM(?:S|P|G|M|_)|AP(?:S|P|G|M|_)|WV(?:I|S|W|_)|WS(?:M|S|_)))_' \
1537
                           r'(?P<processing_level>[012B][CP])'
1538
        
1539
        self.scene = os.path.realpath(scene)
1✔
1540
        
1541
        if re.search('.[EN][12]$', self.scene):
1✔
1542
            self.file = self.scene
1✔
1543
        else:
1544
            self.examine()
1✔
1545
        
1546
        self.meta = self.scanMetadata()
1✔
1547
        
1548
        # register the standardized meta attributes as object attributes
1549
        super(ESA, self).__init__(self.meta)
1✔
1550
    
1551
    def scanMetadata(self):
1✔
1552
        match = re.match(re.compile(self.pattern), os.path.basename(self.file))
1✔
1553
        match2 = re.match(re.compile(self.pattern_pid), match.group('product_id'))
1✔
1554
        
1555
        if re.search('IM__0', match.group('product_id')):
1✔
1556
            raise RuntimeError('product level 0 not supported (yet)')
×
1557
        
1558
        meta = dict()
1✔
1559
        sensor_lookup = {'N1': 'ASAR', 'E1': 'ERS1', 'E2': 'ERS2'}
1✔
1560
        meta['sensor'] = sensor_lookup[match.group('satellite_ID')]
1✔
1561
        meta['acquisition_mode'] = match2.group('image_mode')
1✔
1562
        meta['product'] = 'SLC' if meta['acquisition_mode'] in ['IMS', 'APS', 'WSS'] else 'PRI'
1✔
1563
        meta['frameNumber'] = int(match.group('counter'))
1✔
1564
        
1565
        if meta['acquisition_mode'] in ['APS', 'IMS', 'WSM']:
1✔
1566
            meta['image_geometry'] = 'SLANT_RANGE'
1✔
1567
        elif meta['acquisition_mode'] in ['APP', 'IMP']:
1✔
1568
            meta['image_geometry'] = 'GROUND_RANGE'
1✔
1569
        else:
1570
            raise RuntimeError(f"unsupported acquisition mode: '{meta['acquisition_mode']}'")
×
1571
        
1572
        with self.getFileObj(self.file) as obj:
1✔
1573
            mph = obj.read(1247).decode('ascii')
1✔
1574
            sph = obj.read(1059).decode('ascii')
1✔
1575
            dsd = obj.read(5040).decode('ascii')
1✔
1576
            
1577
            pattern = r'(?P<key>[A-Z0-9_]+)\=(")?(?P<value>.*?)("|<|$)'
1✔
1578
            origin = {'MPH': {}, 'SPH': {}, 'DSD': {}}
1✔
1579
            for section, content in {'MPH': mph, 'SPH': sph}.items():
1✔
1580
                lines = content.split('\n')
1✔
1581
                for line in lines:
1✔
1582
                    match = re.match(pattern, line)
1✔
1583
                    if match:
1✔
1584
                        matchdict = match.groupdict()
1✔
1585
                        val = str(matchdict['value']).strip()
1✔
1586
                        origin[section][matchdict['key']] = val
1✔
1587
            
1588
            raw = []
1✔
1589
            lines = dsd.split('\n')
1✔
1590
            for line in lines:
1✔
1591
                match = re.match(pattern, line)
1✔
1592
                if match:
1✔
1593
                    matchdict = match.groupdict()
1✔
1594
                    raw.append((matchdict['key'], str(matchdict['value']).strip()))
1✔
1595
            raw = [raw[i:i + 7] for i in range(0, len(raw), 7)]
1✔
1596
            datasets = {x.pop(0)[1]: {y[0]: y[1] for y in x} for x in raw}
1✔
1597
            origin['DSD'] = datasets
1✔
1598
            
1599
            meta['origin'] = origin
1✔
1600
            
1601
            key = 'GEOLOCATION GRID ADS'
1✔
1602
            ds_offset = int(origin['DSD'][key]['DS_OFFSET'])
1✔
1603
            ds_size = int(origin['DSD'][key]['DS_SIZE'])
1✔
1604
            dsr_size = int(origin['DSD'][key]['DSR_SIZE'])
1✔
1605
            obj.seek(ds_offset)
1✔
1606
            geo = obj.read(ds_size)
1✔
1607
        
1608
        geo = [geo[i:i + dsr_size] for i in range(0, len(geo), dsr_size)]
1✔
1609
        
1610
        lat = []
1✔
1611
        lon = []
1✔
1612
        for segment in geo:
1✔
1613
            lat_seg = segment[157:(157 + 44)] + segment[411:(411 + 44)]
1✔
1614
            lon_seg = segment[201:(201 + 44)] + segment[455:(455 + 44)]
1✔
1615
            
1616
            lat_seg = [lat_seg[i:i + 4] for i in range(0, len(lat_seg), 4)]
1✔
1617
            lon_seg = [lon_seg[i:i + 4] for i in range(0, len(lon_seg), 4)]
1✔
1618
            
1619
            lat_seg = [struct.unpack('>i', x)[0] / 1000000. for x in lat_seg]
1✔
1620
            lon_seg = [struct.unpack('>i', x)[0] / 1000000. for x in lon_seg]
1✔
1621
            
1622
            lat.extend(lat_seg)
1✔
1623
            lon.extend(lon_seg)
1✔
1624
        
1625
        meta['coordinates'] = list(zip(lon, lat))
1✔
1626
        
1627
        if meta['sensor'] == 'ASAR':
1✔
1628
            pols = [y for x, y in origin['SPH'].items() if 'TX_RX_POLAR' in x]
1✔
1629
            pols = [x.replace('/', '') for x in pols if len(x) == 3]
1✔
1630
            meta['polarizations'] = sorted(pols)
1✔
1631
        elif meta['sensor'] in ['ERS1', 'ERS2']:
1✔
1632
            meta['polarizations'] = ['VV']
1✔
1633
        
1634
        meta['orbit'] = origin['SPH']['PASS'][0]
1✔
1635
        start = datetime.strptime(origin['MPH']['SENSING_START'], '%d-%b-%Y %H:%M:%S.%f')
1✔
1636
        meta['start'] = start.strftime('%Y%m%dT%H%M%S')
1✔
1637
        stop = datetime.strptime(origin['MPH']['SENSING_STOP'], '%d-%b-%Y %H:%M:%S.%f')
1✔
1638
        meta['stop'] = stop.strftime('%Y%m%dT%H%M%S')
1✔
1639
        meta['spacing'] = (float(origin['SPH']['RANGE_SPACING']), float(origin['SPH']['AZIMUTH_SPACING']))
1✔
1640
        meta['looks'] = (float(origin['SPH']['RANGE_LOOKS']), float(origin['SPH']['AZIMUTH_LOOKS']))
1✔
1641
        meta['samples'] = int(origin['SPH']['LINE_LENGTH'])
1✔
1642
        meta['lines'] = int(origin['DSD']['MDS1']['NUM_DSR'])
1✔
1643
        
1644
        meta['orbitNumber_abs'] = int(origin['MPH']['ABS_ORBIT'])
1✔
1645
        meta['orbitNumber_rel'] = int(origin['MPH']['REL_ORBIT'])
1✔
1646
        meta['cycleNumber'] = int(origin['MPH']['CYCLE'])
1✔
1647
        
1648
        meta['incidenceAngleMin'], meta['incidenceAngleMax'], \
1✔
1649
            meta['rangeResolution'], meta['azimuthResolution'], \
1650
            meta['neszNear'], meta['neszFar'] = \
1651
            get_angles_resolution(sensor=meta['sensor'], mode=meta['acquisition_mode'],
1652
                                  swath_id=origin['SPH']['SWATH'], date=meta['start'])
1653
        meta['incidence'] = median([meta['incidenceAngleMin'], meta['incidenceAngleMax']])
1✔
1654
        
1655
        meta['projection'] = crsConvert(4326, 'wkt')
1✔
1656
        
1657
        return meta
1✔
1658
    
1659
    def unpack(self, directory, overwrite=False, exist_ok=False):
1✔
1660
        base_file = os.path.basename(self.file).strip(r'\.zip|\.tar(?:\.gz|)')
×
1661
        base_dir = os.path.basename(directory.strip('/'))
×
1662
        
1663
        outdir = directory if base_file == base_dir else os.path.join(directory, base_file)
×
1664
        
1665
        self._unpack(outdir, overwrite=overwrite, exist_ok=exist_ok)
×
1666

1667

1668
class SAFE(ID):
1✔
1669
    """
1670
    Handler class for Sentinel-1 data
1671
    
1672
    Sensors:
1673
        * S1A
1674
        * S1B
1675
        * S1C
1676
        * S1D
1677

1678
    References:
1679
        * S1-RS-MDA-52-7443 Sentinel-1 IPF Auxiliary Product Specification
1680
        * MPC-0243 Masking "No-value" Pixels on GRD Products generated by the Sentinel-1 ESA IPF
1681
    """
1682
    
1683
    def __init__(self, scene):
1✔
1684
        
1685
        self.scene = os.path.realpath(scene)
1✔
1686
        
1687
        self.pattern = patterns.safe
1✔
1688
        
1689
        self.pattern_ds = r'^s1[abcd]-' \
1✔
1690
                          r'(?P<swath>s[1-6]|iw[1-3]?|ew[1-5]?|wv[1-2]|n[1-6])-' \
1691
                          r'(?P<product>slc|grd|ocn)-' \
1692
                          r'(?P<pol>hh|hv|vv|vh)-' \
1693
                          r'(?P<start>[0-9]{8}t[0-9]{6})-' \
1694
                          r'(?P<stop>[0-9]{8}t[0-9]{6})-' \
1695
                          r'(?:[0-9]{6})-(?:[0-9a-f]{6})-' \
1696
                          r'(?P<id>[0-9]{3})' \
1697
                          r'\.xml$'
1698
        
1699
        self.examine(include_folders=True)
1✔
1700
        
1701
        if not re.match(re.compile(self.pattern), os.path.basename(self.file)):
1✔
1702
            raise RuntimeError('folder does not match S1 scene naming convention')
×
1703
        
1704
        # scan the metadata XML file and add selected attributes to a meta dictionary
1705
        self.meta = self.scanMetadata()
1✔
1706
        self.meta['projection'] = crsConvert(4326, 'wkt')
1✔
1707
        
1708
        # register the standardized meta attributes as object attributes
1709
        super(SAFE, self).__init__(self.meta)
1✔
1710
        
1711
        self.gammafiles = {'slc': [], 'pri': [], 'grd': []}
1✔
1712
    
1713
    def removeGRDBorderNoise(self, method='pyroSAR'):
1✔
1714
        """
1715
        mask out Sentinel-1 image border noise.
1716
        
1717
        Parameters
1718
        ----------
1719
        method: str
1720
            the border noise removal method to be applied; one of the following:
1721
            
1722
             - 'ESA': the pure implementation as described by ESA
1723
             - 'pyroSAR': the ESA method plus the custom pyroSAR refinement
1724

1725
        Returns
1726
        -------
1727
        
1728
        See Also
1729
        --------
1730
        :func:`~pyroSAR.S1.removeGRDBorderNoise`
1731
        """
1732
        S1.removeGRDBorderNoise(self, method=method)
×
1733
    
1734
    def geo_grid(self, outname=None, driver=None, overwrite=True):
1✔
1735
        """
1736
        get the geo grid as vector geometry
1737

1738
        Parameters
1739
        ----------
1740
        outname: str
1741
            the name of the vector file to be written
1742
        driver: str
1743
            the output file format; needs to be defined if the format cannot
1744
            be auto-detected from the filename extension
1745
        overwrite: bool
1746
            overwrite an existing vector file?
1747

1748
        Returns
1749
        -------
1750
        ~spatialist.vector.Vector or None
1751
            the vector object if `outname` is None, None otherwise
1752
        
1753
        See also
1754
        --------
1755
        spatialist.vector.Vector.write
1756
        """
1757
        annotations = self.findfiles(self.pattern_ds)
1✔
1758
        key = lambda x: re.search('-[vh]{2}-', x).group()
1✔
1759
        groups = groupby(sorted(annotations, key=key), key=key)
1✔
1760
        annotations = [list(value) for key, value in groups][0]
1✔
1761
        
1762
        vec = Vector(driver='Memory')
1✔
1763
        vec.addlayer('geogrid', 4326, ogr.wkbPoint25D)
1✔
1764
        field_defs = [
1✔
1765
            ("swath", ogr.OFTString),
1766
            ("azimuthTime", ogr.OFTDateTime),
1767
            ("slantRangeTime", ogr.OFTReal),
1768
            ("line", ogr.OFTInteger),
1769
            ("pixel", ogr.OFTInteger),
1770
            ("incidenceAngle", ogr.OFTReal),
1771
            ("elevationAngle", ogr.OFTReal),
1772
        ]
1773
        for name, ftype in field_defs:
1✔
1774
            field = ogr.FieldDefn(name, ftype)
1✔
1775
            vec.layer.CreateField(field)
1✔
1776
        
1777
        for ann in annotations:
1✔
1778
            with self.getFileObj(ann) as ann_xml:
1✔
1779
                tree = ET.fromstring(ann_xml.read())
1✔
1780
            swath = tree.find(".//adsHeader/swath").text
1✔
1781
            points = tree.findall(".//geolocationGridPoint")
1✔
1782
            for point in points:
1✔
1783
                meta = {child.tag: child.text for child in point}
1✔
1784
                meta["swath"] = swath
1✔
1785
                x = float(meta.pop("longitude"))
1✔
1786
                y = float(meta.pop("latitude"))
1✔
1787
                z = float(meta.pop("height"))
1✔
1788
                geom = ogr.Geometry(ogr.wkbPoint25D)
1✔
1789
                geom.AddPoint(x, y, z)
1✔
1790
                az_time = dateparse(meta["azimuthTime"])
1✔
1791
                meta["azimuthTime"] = az_time.replace(tzinfo=timezone.utc)
1✔
1792
                for key in ["slantRangeTime", "incidenceAngle", "elevationAngle"]:
1✔
1793
                    meta[key] = float(meta[key])
1✔
1794
                for key in ["line", "pixel"]:
1✔
1795
                    meta[key] = int(meta[key])
1✔
1796
                vec.addfeature(geom, fields=meta)
1✔
1797
        geom = None
1✔
1798
        if outname is None:
1✔
1799
            return vec
1✔
1800
        else:
1801
            vec.write(outfile=outname, driver=driver, overwrite=overwrite)
1✔
1802
    
1803
    def getOSV(self, osvdir=None, osvType='POE', returnMatch=False, useLocal=True, timeout=300, url_option=1):
1✔
1804
        """
1805
        download Orbit State Vector files for the scene
1806

1807
        Parameters
1808
        ----------
1809
        osvdir: str
1810
            the directory of OSV files; subdirectories POEORB and RESORB are created automatically;
1811
            if no directory is defined, the standard SNAP auxdata location is used
1812
        osvType: str or list[str]
1813
            the type of orbit file either 'POE', 'RES' or a list of both;
1814
            if both are selected, the best matching file will be retrieved. I.e., POE if available and RES otherwise
1815
        returnMatch: bool
1816
            return the best matching orbit file?
1817
        useLocal: bool
1818
            use locally existing files and do not search for files online if the right file has been found?
1819
        timeout: int or tuple or None
1820
            the timeout in seconds for downloading OSV files as provided to :func:`requests.get`
1821
        url_option: int
1822
            the OSV download URL option; see :meth:`pyroSAR.S1.OSV.catch` for options
1823

1824
        Returns
1825
        -------
1826
        str or None
1827
            the best matching OSV file if `returnMatch` is True or None otherwise
1828
        
1829
        See Also
1830
        --------
1831
        :class:`pyroSAR.S1.OSV`
1832
        """
1833
        with S1.OSV(osvdir, timeout=timeout) as osv:
1✔
1834
            if useLocal:
1✔
1835
                match = osv.match(sensor=self.sensor, timestamp=self.start,
1✔
1836
                                  osvtype=osvType)
1837
                if match is not None:
1✔
1838
                    return match if returnMatch else None
1✔
1839
            
1840
            if osvType in ['POE', 'RES']:
1✔
1841
                files = osv.catch(sensor=self.sensor, osvtype=osvType,
1✔
1842
                                  start=self.start, stop=self.stop,
1843
                                  url_option=url_option)
1844
            elif sorted(osvType) == ['POE', 'RES']:
×
1845
                files = osv.catch(sensor=self.sensor, osvtype='POE',
×
1846
                                  start=self.start, stop=self.stop,
1847
                                  url_option=url_option)
1848
                if len(files) == 0:
×
1849
                    files = osv.catch(sensor=self.sensor, osvtype='RES',
×
1850
                                      start=self.start, stop=self.stop,
1851
                                      url_option=url_option)
1852
            else:
1853
                msg = "osvType must either be 'POE', 'RES' or a list of both"
×
1854
                raise TypeError(msg)
×
1855
            
1856
            osv.retrieve(files)
1✔
1857
            
1858
            if returnMatch:
1✔
1859
                match = osv.match(sensor=self.sensor, timestamp=self.start,
1✔
1860
                                  osvtype=osvType)
1861
                return match
1✔
1862
    
1863
    def quicklook(self, outname, format='kmz', na_transparent=True):
1✔
1864
        """
1865
        Write a quicklook file for the scene.
1866
        
1867
        Parameters
1868
        ----------
1869
        outname: str
1870
            the file to write
1871
        format: str
1872
            the quicklook format. Currently supported options:
1873
            
1874
             - kmz
1875
        na_transparent: bool
1876
            make NA values transparent?
1877

1878
        Returns
1879
        -------
1880

1881
        """
1882
        if self.product not in ['GRD', 'SLC']:
×
1883
            msg = 'this method has only been implemented for GRD and SLC, not {}'
×
1884
            raise RuntimeError(msg.format(self.product))
×
1885
        
1886
        if format != 'kmz':
×
1887
            raise RuntimeError('currently only kmz is supported as format')
×
1888
        kml_name = self.findfiles('map-overlay.kml')[0]
×
1889
        png_name = self.findfiles('quick-look.png')[0]
×
1890
        with zf.ZipFile(outname, 'w') as out:
×
1891
            with self.getFileObj(kml_name) as kml_in:
×
1892
                kml = kml_in.getvalue().decode('utf-8')
×
1893
                kml = kml.replace('Sentinel-1 Map Overlay', self.outname_base())
×
1894
                out.writestr('doc.kml', data=kml)
×
1895
            with self.getFileObj(png_name) as png_in:
×
1896
                if na_transparent:
×
1897
                    img = Image.open(png_in)
×
1898
                    img = img.convert('RGBA')
×
1899
                    datas = img.getdata()
×
1900
                    newData = []
×
1901
                    for item in datas:
×
1902
                        if item[0] == 0 and item[1] == 0 and item[2] == 0:
×
1903
                            newData.append((0, 0, 0, 0))
×
1904
                        else:
1905
                            newData.append(item)
×
1906
                    img.putdata(newData)
×
1907
                    buf = BytesIO()
×
1908
                    img.save(buf, format='png')
×
1909
                    out.writestr('quick-look.png', buf.getvalue())
×
1910
                else:
1911
                    out.writestr('quick-look.png', data=png_in.getvalue())
×
1912
    
1913
    def resolution(self):
1✔
1914
        """
1915
        Compute the mid-swath resolution of the Sentinel-1 product. For GRD products the resolution is expressed in
1916
        ground range and in slant range otherwise.
1917
        
1918
        References:
1919
            * https://sentinel.esa.int/web/sentinel/user-guides/sentinel-1-sar/resolutions/level-1-single-look-complex
1920
            * https://sentinel.esa.int/web/sentinel/user-guides/sentinel-1-sar/resolutions/level-1-ground-range-detected
1921
            * https://sentinel.esa.int/web/sentinel/user-guides/sentinel-1-sar/document-library/-/asset_publisher/1dO7RF5fJMbd/content/sentinel-1-product-definition
1922
        
1923
        Returns
1924
        -------
1925
        tuple[float]
1926
            the resolution as (range, azimuth)
1927
        """
1928
        if 'resolution' in self.meta.keys():
×
1929
            return self.meta['resolution']
×
1930
        if self.product not in ['GRD', 'SLC']:
×
1931
            msg = 'this method has only been implemented for GRD and SLC, not {}'
×
1932
            raise RuntimeError(msg.format(self.product))
×
1933
        
1934
        annotations = self.findfiles(self.pattern_ds)
×
1935
        key = lambda x: re.search('-[vh]{2}-', x).group()
×
1936
        groups = groupby(sorted(annotations, key=key), key=key)
×
1937
        annotations = [list(value) for key, value in groups][0]
×
1938
        proc_pars = []  # processing parameters per sub-swath
×
1939
        sp_az = []  # azimuth pixel spacings per sub-swath
×
1940
        ti_az = []  # azimuth time intervals per sub-swath
×
1941
        for ann in annotations:
×
1942
            with self.getFileObj(ann) as ann_xml:
×
1943
                tree = ET.fromstring(ann_xml.read())
×
1944
                par = tree.findall('.//swathProcParams')
×
1945
                proc_pars.extend(par)
×
1946
                for i in range(len(par)):
×
1947
                    sp_az.append(float(tree.find('.//azimuthPixelSpacing').text))
×
1948
                    ti_az.append(float(tree.find('.//azimuthTimeInterval').text))
×
1949
        c = 299792458.0  # speed of light
×
1950
        # see Sentinel-1 product definition for Hamming window coefficients
1951
        # and Impulse Response Width (IRW) broadening factors:
1952
        coefficients = [0.52, 0.6, 0.61, 0.62, 0.63, 0.65, 0.70, 0.72, 0.73, 0.75]
×
1953
        b_factors = [1.54, 1.32, 1.3, 1.28, 1.27, 1.24, 1.18, 1.16, 1.15, 1.13]
×
1954
        resolutions_rg = []
×
1955
        resolutions_az = []
×
1956
        for i, par in enumerate(proc_pars):
×
1957
            # computation of slant range resolution
1958
            rg_proc = par.find('rangeProcessing')
×
1959
            wrg = float(rg_proc.find('windowCoefficient').text)
×
1960
            brg = float(rg_proc.find('processingBandwidth').text)
×
1961
            lbrg = float(rg_proc.find('lookBandwidth').text)
×
1962
            lrg = brg / lbrg
×
1963
            kbrg = b_factors[coefficients.index(wrg)]
×
1964
            resolutions_rg.append(0.886 * c / (2 * brg) * kbrg * lrg)
×
1965
            
1966
            # computation of azimuth resolution; yet to be checked for correctness
1967
            az_proc = par.find('azimuthProcessing')
×
1968
            waz = float(az_proc.find('windowCoefficient').text)
×
1969
            baz = float(az_proc.find('processingBandwidth').text)
×
1970
            lbaz = float(az_proc.find('lookBandwidth').text)
×
1971
            laz = baz / lbaz
×
1972
            kbaz = b_factors[coefficients.index(waz)]
×
1973
            vsat = sp_az[i] / ti_az[i]
×
1974
            resolutions_az.append(0.886 * vsat / baz * kbaz * laz)
×
1975
        
1976
        resolution_rg = median(resolutions_rg)
×
1977
        resolution_az = median(resolutions_az)
×
1978
        
1979
        if self.meta['image_geometry'] == 'GROUND_RANGE':
×
1980
            resolution_rg /= math.sin(math.radians(self.meta['incidence']))
×
1981
        
1982
        self.meta['resolution'] = resolution_rg, resolution_az
×
1983
        return self.meta['resolution']
×
1984
    
1985
    def scanMetadata(self):
1✔
1986
        with self.getFileObj(self.findfiles('manifest.safe')[0]) as input:
1✔
1987
            manifest = input.getvalue()
1✔
1988
        namespaces = getNamespaces(manifest)
1✔
1989
        tree = ET.fromstring(manifest)
1✔
1990
        
1991
        meta = dict()
1✔
1992
        key = 's1sarl1'
1✔
1993
        obj_prod = tree.find('.//{}:productType'.format(key), namespaces)
1✔
1994
        if obj_prod == None:
1✔
1995
            key = 's1sarl2'
×
1996
            obj_prod = tree.find('.//{}:productType'.format(key), namespaces)
×
1997
        
1998
        meta['product'] = obj_prod.text
1✔
1999
        
2000
        acqmode = tree.find('.//{}:mode'.format(key), namespaces).text
1✔
2001
        if acqmode == 'SM':
1✔
2002
            meta['acquisition_mode'] = tree.find('.//{}:swath'.format(key), namespaces).text
×
2003
        else:
2004
            meta['acquisition_mode'] = acqmode
1✔
2005
        meta['acquisition_time'] = dict(
1✔
2006
            [(x, tree.find('.//safe:{}Time'.format(x), namespaces).text) for x in ['start', 'stop']])
2007
        meta['start'], meta['stop'] = (self.parse_date(meta['acquisition_time'][x]) for x in ['start', 'stop'])
1✔
2008
        meta['coordinates'] = [tuple([float(y) for y in x.split(',')][::-1]) for x in
1✔
2009
                               tree.find('.//gml:coordinates', namespaces).text.split()]
2010
        meta['orbit'] = tree.find('.//s1:pass', namespaces).text[0]
1✔
2011
        
2012
        meta['orbitNumber_abs'] = int(tree.find('.//safe:orbitNumber[@type="start"]', namespaces).text)
1✔
2013
        meta['orbitNumber_rel'] = int(tree.find('.//safe:relativeOrbitNumber[@type="start"]', namespaces).text)
1✔
2014
        meta['cycleNumber'] = int(tree.find('.//safe:cycleNumber', namespaces).text)
1✔
2015
        meta['frameNumber'] = int(tree.find('.//{}:missionDataTakeID'.format(key), namespaces).text)
1✔
2016
        
2017
        meta['orbitNumbers_abs'] = dict(
1✔
2018
            [(x, int(tree.find('.//safe:orbitNumber[@type="{0}"]'.format(x), namespaces).text)) for x in
2019
             ['start', 'stop']])
2020
        meta['orbitNumbers_rel'] = dict(
1✔
2021
            [(x, int(tree.find('.//safe:relativeOrbitNumber[@type="{0}"]'.format(x), namespaces).text)) for x in
2022
             ['start', 'stop']])
2023
        key_pol = './/{}:transmitterReceiverPolarisation'.format(key)
1✔
2024
        meta['polarizations'] = [x.text for x in tree.findall(key_pol, namespaces)]
1✔
2025
        meta['category'] = tree.find('.//{}:productClass'.format(key), namespaces).text
1✔
2026
        family = tree.find('.//safe:familyName', namespaces).text.replace('ENTINEL-', '')
1✔
2027
        number = tree.find('.//safe:number', namespaces).text
1✔
2028
        meta['sensor'] = family + number
1✔
2029
        meta['IPF_version'] = float(tree.find('.//safe:software', namespaces).attrib['version'])
1✔
2030
        sliced = tree.find('.//{}:sliceProductFlag'.format(key), namespaces).text == 'true'
1✔
2031
        if sliced:
1✔
2032
            meta['sliceNumber'] = int(tree.find('.//{}:sliceNumber'.format(key), namespaces).text)
1✔
2033
            meta['totalSlices'] = int(tree.find('.//{}:totalSlices'.format(key), namespaces).text)
1✔
2034
        else:
2035
            meta['sliceNumber'] = None
×
2036
            meta['totalSlices'] = None
×
2037
        
2038
        if meta['product'] == 'OCN':
1✔
2039
            meta['spacing'] = -1
×
2040
            meta['samples'] = -1
×
2041
            meta['lines'] = -1
×
2042
        else:
2043
            annotations = self.findfiles(self.pattern_ds)
1✔
2044
            key = lambda x: re.search('-[vh]{2}-', x).group()
1✔
2045
            groups = groupby(sorted(annotations, key=key), key=key)
1✔
2046
            annotations = [list(value) for key, value in groups][0]
1✔
2047
            ann_trees = []
1✔
2048
            for ann in annotations:
1✔
2049
                with self.getFileObj(ann) as ann_xml:
1✔
2050
                    ann_trees.append(ET.fromstring(ann_xml.read()))
1✔
2051
            
2052
            sp_rg = [float(x.find('.//rangePixelSpacing').text) for x in ann_trees]
1✔
2053
            sp_az = [float(x.find('.//azimuthPixelSpacing').text) for x in ann_trees]
1✔
2054
            meta['spacing'] = (median(sp_rg), median(sp_az))
1✔
2055
            samples = [x.find('.//imageAnnotation/imageInformation/numberOfSamples').text for x in ann_trees]
1✔
2056
            meta['samples'] = sum([int(x) for x in samples])
1✔
2057
            lines = [x.find('.//imageAnnotation/imageInformation/numberOfLines').text for x in ann_trees]
1✔
2058
            meta['lines'] = sum([int(x) for x in lines])
1✔
2059
            heading = median(float(x.find('.//platformHeading').text) for x in ann_trees)
1✔
2060
            meta['heading'] = heading if heading > 0 else heading + 360
1✔
2061
            incidence = [float(x.find('.//incidenceAngleMidSwath').text) for x in ann_trees]
1✔
2062
            meta['incidence'] = median(incidence)
1✔
2063
            meta['image_geometry'] = ann_trees[0].find('.//projection').text.replace(' ', '_').upper()
1✔
2064
        
2065
        return meta
1✔
2066
    
2067
    def unpack(self, directory, overwrite=False, exist_ok=False):
1✔
2068
        outdir = os.path.join(directory, os.path.basename(self.file))
1✔
2069
        self._unpack(outdir, overwrite=overwrite, exist_ok=exist_ok)
1✔
2070

2071

2072
class TSX(ID):
1✔
2073
    """
2074
    Handler class for TerraSAR-X and TanDEM-X data
2075
    
2076
    Sensors:
2077
        * TSX1
2078
        * TDX1
2079

2080
    References:
2081
        * TX-GS-DD-3302  TerraSAR-X Basic Product Specification Document
2082
        * TX-GS-DD-3303  TerraSAR-X Experimental Product Description
2083
        * TD-GS-PS-3028  TanDEM-X Experimental Product Description
2084
        * TerraSAR-X Image Product Guide (Airbus Defence and Space)
2085
    
2086
    Acquisition modes:
2087
        * ST:    Staring Spotlight
2088
        * HS:    High Resolution SpotLight
2089
        * HS300: High Resolution SpotLight 300 MHz
2090
        * SL:    SpotLight
2091
        * SM:    StripMap
2092
        * SC:    ScanSAR
2093
        * WS:    Wide ScanSAR
2094
    
2095
    Polarisation modes:
2096
        * Single (S): all acquisition modes
2097
        * Dual   (D): High Resolution SpotLight (HS), SpotLight (SL) and StripMap (SM)
2098
        * Twin   (T): StripMap (SM) (experimental)
2099
        * Quad   (Q): StripMap (SM) (experimental)
2100
    
2101
    Products:
2102
        * SSC: Single Look Slant Range Complex
2103
        * MGD: Multi Look Ground Range Detected
2104
        * GEC: Geocoded Ellipsoid Corrected
2105
        * EEC: Enhanced Ellipsoid Corrected
2106
    """
2107
    
2108
    def __init__(self, scene):
1✔
2109
        if isinstance(scene, str):
1✔
2110
            self.scene = os.path.realpath(scene)
1✔
2111
            
2112
            self.pattern = patterns.tsx
1✔
2113
            
2114
            self.pattern_ds = r'^IMAGE_(?P<pol>HH|HV|VH|VV)_(?:SRA|FWD|AFT)_(?P<beam>[^\.]+)\.(cos|tif)$'
1✔
2115
            self.examine(include_folders=False)
1✔
2116
            
2117
            if not re.match(re.compile(self.pattern), os.path.basename(self.file)):
×
2118
                raise RuntimeError('folder does not match TSX scene naming convention')
×
2119
            
2120
            self.meta = self.scanMetadata()
×
2121
            self.meta['projection'] = crsConvert(4326, 'wkt')
×
2122
        
2123
        super(TSX, self).__init__(self.meta)
×
2124
    
2125
    def scanMetadata(self):
1✔
2126
        annotation = self.getFileObj(self.file).getvalue()
×
2127
        namespaces = getNamespaces(annotation)
×
2128
        tree = ET.fromstring(annotation)
×
2129
        meta = dict()
×
2130
        meta['sensor'] = tree.find('.//generalHeader/mission', namespaces).text.replace('-', '')
×
2131
        meta['product'] = tree.find('.//orderInfo/productVariant', namespaces).text
×
2132
        meta['orbit'] = tree.find('.//missionInfo/orbitDirection', namespaces).text[0]
×
2133
        meta['polarizations'] = [x.text for x in
×
2134
                                 tree.findall('.//acquisitionInfo/polarisationList/polLayer', namespaces)]
2135
        
2136
        meta['orbitNumber_abs'] = int(tree.find('.//missionInfo/absOrbit', namespaces).text)
×
2137
        meta['orbitNumber_rel'] = int(tree.find('.//missionInfo/relOrbit', namespaces).text)
×
2138
        meta['cycleNumber'] = int(tree.find('.//missionInfo/orbitCycle', namespaces).text)
×
2139
        meta['frameNumber'] = int(tree.find('.//inputData/uniqueDataTakeID', namespaces).text)
×
2140
        
2141
        meta['acquisition_mode'] = tree.find('.//acquisitionInfo/imagingMode', namespaces).text
×
2142
        meta['start'] = self.parse_date(tree.find('.//sceneInfo/start/timeUTC', namespaces).text)
×
2143
        meta['stop'] = self.parse_date(tree.find('.//sceneInfo/stop/timeUTC', namespaces).text)
×
2144
        spacing_row = float(tree.find('.//imageDataInfo/imageRaster/rowSpacing', namespaces).text)
×
2145
        spacing_col = float(tree.find('.//imageDataInfo/imageRaster/columnSpacing', namespaces).text)
×
2146
        meta['spacing'] = (spacing_col, spacing_row)
×
2147
        meta['samples'] = int(tree.find('.//imageDataInfo/imageRaster/numberOfColumns', namespaces).text)
×
2148
        meta['lines'] = int(tree.find('.//imageDataInfo/imageRaster/numberOfRows', namespaces).text)
×
2149
        rlks = float(tree.find('.//imageDataInfo/imageRaster/rangeLooks', namespaces).text)
×
2150
        azlks = float(tree.find('.//imageDataInfo/imageRaster/azimuthLooks', namespaces).text)
×
2151
        meta['looks'] = (rlks, azlks)
×
2152
        meta['incidence'] = float(tree.find('.//sceneInfo/sceneCenterCoord/incidenceAngle', namespaces).text)
×
2153
        
2154
        geocs = self.getFileObj(self.findfiles('GEOREF.xml')[0]).getvalue()
×
2155
        tree = ET.fromstring(geocs)
×
2156
        pts = tree.findall('.//gridPoint')
×
2157
        lat = [float(x.find('lat').text) for x in pts]
×
2158
        lon = [float(x.find('lon').text) for x in pts]
×
2159
        # shift lon in case of west direction.
2160
        lon = [x - 360 if x > 180 else x for x in lon]
×
2161
        meta['coordinates'] = list(zip(lon, lat))
×
2162
        
2163
        return meta
×
2164
    
2165
    def unpack(self, directory, overwrite=False, exist_ok=False):
1✔
2166
        match = self.findfiles(self.pattern, True)
×
2167
        header = [x for x in match if not x.endswith('xml') and 'iif' not in x][0].replace(self.scene, '').strip('/')
×
2168
        outdir = os.path.join(directory, os.path.basename(header))
×
2169
        self._unpack(outdir, offset=header, overwrite=overwrite, exist_ok=exist_ok)
×
2170

2171

2172
class TDM(TSX):
1✔
2173
    """
2174
    Handler class for TerraSAR-X and TanDEM-X experimental data
2175
    
2176
    Sensors:
2177
        * TDM1
2178

2179
    References:
2180
        * TD-GS-PS-3028  TanDEM-X Experimental Product Description
2181
    
2182
    Acquisition modes:
2183
        * HS:    High Resolution SpotLight
2184
        * SL:    SpotLight
2185
        * SM:    StripMap
2186
    
2187
    Polarisation modes:
2188
        * Single (S): all acquisition modes
2189
        * Dual   (D): High Resolution SpotLight (HS), SpotLight (SL) and StripMap (SM)
2190
        * Twin   (T): StripMap (SM) (experimental)
2191
        * Quad   (Q): StripMap (SM) (experimental)
2192
    
2193
    Products:
2194
        * CoSSCs: (bi-static) SAR co-registered single look slant range complex products (CoSSCs)
2195

2196

2197
    Examples
2198
    ----------
2199
    Ingest all Tandem-X Bistatic scenes in a directory and its sub-directories into the database:
2200

2201
    >>> from pyroSAR import Archive, identify
2202
    >>> from spatialist.ancillary import finder
2203
    >>> dbfile = '/.../scenelist.db'
2204
    >>> archive_tdm = '/.../TDM/'
2205
    >>> scenes_tdm = finder(archive_tdm, [r'^TDM1.*'], foldermode=2, regex=True, recursive=True)
2206
    >>> with Archive(dbfile) as archive:
2207
    >>>     archive.insert(scenes_tdm)
2208
    """
2209
    
2210
    def __init__(self, scene):
1✔
2211
        self.scene = os.path.realpath(scene)
1✔
2212
        
2213
        self.pattern = patterns.tdm
1✔
2214
        
2215
        self.pattern_ds = r'^IMAGE_(?P<pol>HH|HV|VH|VV)_(?:SRA|FWD|AFT)_(?P<beam>[^\.]+)\.(cos|tif)$'
1✔
2216
        self.examine(include_folders=False)
1✔
2217
        
2218
        if not re.match(re.compile(self.pattern), os.path.basename(self.file)):
×
2219
            raise RuntimeError('folder does not match TDM scene naming convention')
×
2220
        
2221
        self.meta = self.scanMetadata()
×
2222
        self.meta['projection'] = crsConvert(4326, 'wkt')
×
2223
        
2224
        super(TDM, self).__init__(self.meta)
×
2225
    
2226
    def scanMetadata(self):
1✔
2227
        annotation = self.getFileObj(self.file).getvalue()
×
2228
        namespaces = getNamespaces(annotation)
×
2229
        tree = ET.fromstring(annotation)
×
2230
        meta = dict()
×
2231
        meta['sensor'] = tree.find('.//commonAcquisitionInfo/missionID', namespaces).text.replace('-', '')
×
2232
        meta['product'] = tree.find('.//productInfo/productType', namespaces).text
×
2233
        meta['SAT1'] = tree.find('.//commonAcquisitionInfo/satelliteIDsat1', namespaces).text
×
2234
        meta['SAT2'] = tree.find('.//commonAcquisitionInfo/satelliteIDsat2', namespaces).text
×
2235
        meta['inSARmasterID'] = tree.find('.//commonAcquisitionInfo/inSARmasterID', namespaces).text
×
2236
        pattern = './/commonAcquisitionInfo/satelliteID{}'.format(meta['inSARmasterID'].lower())
×
2237
        meta['inSARmaster'] = tree.find(pattern, namespaces).text.replace('-', '')
×
2238
        
2239
        pattern = './/commonAcquisitionInfo/operationsInfo/acquisitionItemID'
×
2240
        meta['acquisitionItemID'] = int(tree.find(pattern, namespaces).text)
×
2241
        
2242
        meta['effectiveBaseline'] = float(tree.find('.//acquisitionGeometry/effectiveBaseline', namespaces).text)
×
2243
        meta['heightOfAmbiguity'] = float(tree.find('.//acquisitionGeometry/heightOfAmbiguity', namespaces).text)
×
2244
        meta['distanceActivePos'] = float(tree.find('.//acquisitionGeometry/distanceActivePos', namespaces).text)
×
2245
        meta['distanceTracks'] = float(tree.find('.//acquisitionGeometry/distanceTracks', namespaces).text)
×
2246
        
2247
        meta['cooperativeMode'] = tree.find('.//commonAcquisitionInfo/cooperativeMode', namespaces).text
×
2248
        
2249
        if meta['cooperativeMode'].lower() == "bistatic":
×
2250
            meta['bistatic'] = True
×
2251
        else:
2252
            meta['bistatic'] = False
×
2253
        
2254
        meta['orbit'] = tree.find('.//acquisitionGeometry/orbitDirection', namespaces).text[0]
×
2255
        
2256
        pattern = ".//productComponents/component[@componentClass='imageData']/file/location/name"
×
2257
        elements = tree.findall(pattern, )
×
2258
        self.primary_scene = os.path.join(self.scene, elements[0].text)
×
2259
        self.secondary_scene = os.path.join(self.scene, elements[1].text)
×
2260
        meta["SAT1"] = TSX(self.primary_scene).scanMetadata()
×
2261
        meta["SAT2"] = TSX(self.secondary_scene).scanMetadata()
×
2262
        
2263
        meta['start'] = self.parse_date(tree.find('.//orbitHeader/firstStateTime/firstStateTimeUTC', namespaces).text)
×
2264
        meta['stop'] = self.parse_date(tree.find('.//orbitHeader/lastStateTime/lastStateTimeUTC', namespaces).text)
×
2265
        meta['samples'] = int(tree.find('.//coregistration/coregRaster/samples', namespaces).text)
×
2266
        meta['lines'] = int(tree.find('.//coregistration/coregRaster/lines', namespaces).text)
×
2267
        rlks = float(tree.find('.//processingInfo/inSARProcessing/looks/range', namespaces).text)
×
2268
        azlks = float(tree.find('.//processingInfo/inSARProcessing/looks/azimuth', namespaces).text)
×
2269
        meta['looks'] = (rlks, azlks)
×
2270
        meta['incidence'] = float(tree.find('.//commonSceneInfo/sceneCenterCoord/incidenceAngle', namespaces).text)
×
2271
        
2272
        meta['orbit'] = meta[meta['inSARmasterID']]['orbit']
×
2273
        meta['polarizations'] = meta[meta['inSARmasterID']]['polarizations']
×
2274
        
2275
        meta['orbitNumber_abs'] = meta[meta['inSARmasterID']]['orbitNumber_abs']
×
2276
        meta['orbitNumber_rel'] = meta[meta['inSARmasterID']]['orbitNumber_rel']
×
2277
        meta['cycleNumber'] = meta[meta['inSARmasterID']]['cycleNumber']
×
2278
        meta['frameNumber'] = meta[meta['inSARmasterID']]['frameNumber']
×
2279
        
2280
        meta['acquisition_mode'] = meta[meta['inSARmasterID']]['acquisition_mode']
×
2281
        meta['start'] = meta[meta['inSARmasterID']]['start']
×
2282
        meta['stop'] = meta[meta['inSARmasterID']]['stop']
×
2283
        meta['spacing'] = meta[meta['inSARmasterID']]['spacing']
×
2284
        meta['samples'] = meta[meta['inSARmasterID']]['samples']
×
2285
        meta['lines'] = meta[meta['inSARmasterID']]['lines']
×
2286
        meta['looks'] = meta[meta['inSARmasterID']]['looks']
×
2287
        meta['incidence'] = meta[meta['inSARmasterID']]['incidence']
×
2288
        
2289
        pts = tree.findall('.//sceneCornerCoord')
×
2290
        lat = [float(x.find('lat').text) for x in pts]
×
2291
        lon = [float(x.find('lon').text) for x in pts]
×
2292
        # shift lon in case of west direction.
2293
        lon = [x - 360 if x > 180 else x for x in lon]
×
2294
        meta['coordinates'] = list(zip(lon, lat))
×
2295
        
2296
        return meta
×
2297

2298

2299
class Archive(object):
1✔
2300
    """
2301
    Utility for storing SAR image metadata in a database
2302

2303
    Parameters
2304
    ----------
2305
    dbfile: str
2306
        the filename for the SpatiaLite database. This might either point to an existing database or will be created otherwise.
2307
        If postgres is set to True, this will be the name for the PostgreSQL database.
2308
    custom_fields: dict or None
2309
        a dictionary containing additional non-standard database column names and data types;
2310
        the names must be attributes of the SAR scenes to be inserted (i.e. id.attr) or keys in their meta attribute
2311
        (i.e. id.meta['attr'])
2312
    postgres: bool
2313
        enable postgres driver for the database. Default: False
2314
    user: str
2315
        required for postgres driver: username to access the database. Default: 'postgres'
2316
    password: str
2317
        required for postgres driver: password to access the database. Default: '1234'
2318
    host: str
2319
        required for postgres driver: host where the database is hosted. Default: 'localhost'
2320
    port: int
2321
        required for postgres driver: port number to the database. Default: 5432
2322
    cleanup: bool
2323
        check whether all registered scenes exist and remove missing entries?
2324
    legacy: bool
2325
        open an outdated database in legacy mode to import into a new database.
2326
        Opening an outdated database without legacy mode will throw a RuntimeError.
2327

2328
    Examples
2329
    ----------
2330
    Ingest all Sentinel-1 scenes in a directory and its sub-directories into the database:
2331

2332
    >>> from pyroSAR import Archive, identify
2333
    >>> from spatialist.ancillary import finder
2334
    >>> dbfile = '/.../scenelist.db'
2335
    >>> archive_s1 = '/.../sentinel1/GRD'
2336
    >>> scenes_s1 = finder(archive_s1, [r'^S1[AB].*\.zip'], regex=True, recursive=True)
2337
    >>> with Archive(dbfile) as archive:
2338
    >>>     archive.insert(scenes_s1)
2339

2340
    select all Sentinel-1 A/B scenes stored in the database, which
2341
    
2342
     * overlap with a test site
2343
     * were acquired in Ground-Range-Detected (GRD) Interferometric Wide Swath (IW) mode before 2018
2344
     * contain a VV polarization image
2345
     * have not been processed to directory `outdir` before
2346

2347
    >>> from pyroSAR import Archive
2348
    >>> from spatialist import Vector
2349
    >>> archive = Archive('/.../scenelist.db')
2350
    >>> site = Vector('/path/to/site.shp')
2351
    >>> outdir = '/path/to/processed/results'
2352
    >>> maxdate = '20171231T235959'
2353
    >>> selection_proc = archive.select(vectorobject=site, processdir=outdir,
2354
    >>>                                 maxdate=maxdate, sensor=('S1A', 'S1B'),
2355
    >>>                                 product='GRD', acquisition_mode='IW', vv=1)
2356
    >>> archive.close()
2357

2358
    Alternatively, the `with` statement can be used.
2359
    In this case to just check whether one particular scene is already registered in the database:
2360

2361
    >>> from pyroSAR import identify, Archive
2362
    >>> scene = identify('S1A_IW_SLC__1SDV_20150330T170734_20150330T170801_005264_006A6C_DA69.zip')
2363
    >>> with Archive('/.../scenelist.db') as archive:
2364
    >>>     print(archive.is_registered(scene.scene))
2365

2366
    When providing 'postgres' as driver, a PostgreSQL database will be created at a given host.
2367
    Additional arguments are required.
2368

2369
    >>> from pyroSAR import Archive, identify
2370
    >>> from spatialist.ancillary import finder
2371
    >>> dbfile = 'scenelist_db'
2372
    >>> archive_s1 = '/.../sentinel1/GRD'
2373
    >>> scenes_s1 = finder(archive_s1, [r'^S1[AB].*\.zip'], regex=True, recursive=True)
2374
    >>> with Archive(dbfile, driver='postgres', user='user', password='password', host='host', port=5432) as archive:
2375
    >>>     archive.insert(scenes_s1)
2376
    
2377
    Importing an old database:
2378
    
2379
    >>> from pyroSAR import Archive
2380
    >>> db_new = 'scenes.db'
2381
    >>> db_old = 'scenes_old.db'
2382
    >>> with Archive(db_new) as db:
2383
    >>>     with Archive(db_old, legacy=True) as db_old:
2384
    >>>         db.import_outdated(db_old)
2385
    """
2386
    
2387
    def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres',
1✔
2388
                 password='1234', host='localhost', port=5432, cleanup=True,
2389
                 legacy=False):
2390
        
2391
        if dbfile.endswith('.csv'):
1✔
2392
            raise RuntimeError("Please create a new Archive database and import the"
1✔
2393
                               "CSV file using db.import_outdated('<file>.csv').")
2394
        # check for driver, if postgres then check if server is reachable
2395
        if not postgres:
1✔
2396
            self.driver = 'sqlite'
1✔
2397
            dirname = os.path.dirname(os.path.abspath(dbfile))
1✔
2398
            w_ok = os.access(dirname, os.W_OK)
1✔
2399
            if not w_ok:
1✔
2400
                raise RuntimeError('cannot write to directory {}'.format(dirname))
×
2401
            # catch if .db extension is missing
2402
            root, ext = os.path.splitext(dbfile)
1✔
2403
            if len(ext) == 0:
1✔
2404
                dbfile = root + '.db'
×
2405
        else:
2406
            self.driver = 'postgresql'
1✔
2407
            if not self.__check_host(host, port):
1✔
2408
                sys.exit('Server not found!')
1✔
2409
        
2410
        connect_args = {}
1✔
2411
        
2412
        # create dict, with which a URL to the db is created
2413
        if self.driver == 'sqlite':
1✔
2414
            self.url_dict = {'drivername': self.driver,
1✔
2415
                             'database': dbfile,
2416
                             'query': {'charset': 'utf8'}}
2417
        if self.driver == 'postgresql':
1✔
2418
            self.url_dict = {'drivername': self.driver,
1✔
2419
                             'username': user,
2420
                             'password': password,
2421
                             'host': host,
2422
                             'port': port,
2423
                             'database': dbfile}
2424
            connect_args = {
1✔
2425
                'keepalives': 1,
2426
                'keepalives_idle': 30,
2427
                'keepalives_interval': 10,
2428
                'keepalives_count': 5}
2429
        
2430
        # create engine, containing URL and driver
2431
        log.debug('starting DB engine for {}'.format(URL.create(**self.url_dict)))
1✔
2432
        self.url = URL.create(**self.url_dict)
1✔
2433
        # https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS
2434
        self.engine = create_engine(url=self.url, echo=False,
1✔
2435
                                    connect_args=connect_args)
2436
        
2437
        # call to __load_spatialite() for sqlite, to load mod_spatialite via event handler listen()
2438
        if self.driver == 'sqlite':
1✔
2439
            log.debug('loading spatialite extension')
1✔
2440
            listen(target=self.engine, identifier='connect', fn=self.__load_spatialite)
1✔
2441
            # check if loading was successful
2442
            try:
1✔
2443
                conn = self.engine.connect()
1✔
2444
                version = conn.execute('SELECT spatialite_version();')
1✔
2445
                conn.close()
1✔
2446
            except exc.OperationalError:
×
2447
                raise RuntimeError('could not load spatialite extension')
×
2448
        
2449
        # if database is new, (create postgres-db and) enable spatial extension
2450
        if not database_exists(self.engine.url):
1✔
2451
            if self.driver == 'postgresql':
1✔
2452
                log.debug('creating new PostgreSQL database')
1✔
2453
                create_database(self.engine.url)
1✔
2454
            log.debug('enabling spatial extension for new database')
1✔
2455
            self.conn = self.engine.connect()
1✔
2456
            if self.driver == 'sqlite':
1✔
2457
                self.conn.execute(select([func.InitSpatialMetaData(1)]))
1✔
2458
            elif self.driver == 'postgresql':
1✔
2459
                self.conn.execute('CREATE EXTENSION postgis;')
1✔
2460
        else:
2461
            self.conn = self.engine.connect()
1✔
2462
        # create Session (ORM) and get metadata
2463
        self.Session = sessionmaker(bind=self.engine)
1✔
2464
        self.meta = MetaData(self.engine)
1✔
2465
        self.custom_fields = custom_fields
1✔
2466
        
2467
        # load or create tables
2468
        self.__init_data_table()
1✔
2469
        self.__init_duplicates_table()
1✔
2470
        
2471
        msg = ("the 'data' table is missing {}. Please create a new database "
1✔
2472
               "and import the old one opened in legacy mode using "
2473
               "Archive.import_outdated.")
2474
        pk = sql_inspect(self.data_schema).primary_key
1✔
2475
        if 'product' not in pk.columns.keys() and not legacy:
1✔
2476
            raise RuntimeError(msg.format("a primary key 'product'"))
×
2477
        
2478
        if 'geometry' not in self.get_colnames() and not legacy:
1✔
2479
            raise RuntimeError(msg.format("the 'geometry' column"))
1✔
2480
        
2481
        self.Base = automap_base(metadata=self.meta)
1✔
2482
        self.Base.prepare(self.engine, reflect=True)
1✔
2483
        self.Data = self.Base.classes.data
1✔
2484
        self.Duplicates = self.Base.classes.duplicates
1✔
2485
        self.dbfile = dbfile
1✔
2486
        
2487
        if cleanup:
1✔
2488
            log.info('checking for missing scenes')
1✔
2489
            self.cleanup()
1✔
2490
            sys.stdout.flush()
1✔
2491
    
2492
    def add_tables(self, tables):
1✔
2493
        """
2494
        Add tables to the database per :class:`sqlalchemy.schema.Table`
2495
        Tables provided here will be added to the database.
2496
        
2497
        .. note::
2498
        
2499
            Columns using Geometry must have setting management=True for SQLite,
2500
            for example: ``geometry = Column(Geometry('POLYGON', management=True, srid=4326))``
2501
        
2502
        Parameters
2503
        ----------
2504
        tables: :class:`sqlalchemy.schema.Table` or list[:class:`sqlalchemy.schema.Table`]
2505
            The table(s) to be added to the database.
2506
        """
2507
        created = []
1✔
2508
        if isinstance(tables, list):
1✔
2509
            for table in tables:
×
2510
                table.metadata = self.meta
×
2511
                if not sql_inspect(self.engine).has_table(str(table)):
×
2512
                    table.create(self.engine)
×
2513
                    created.append(str(table))
×
2514
        else:
2515
            table = tables
1✔
2516
            table.metadata = self.meta
1✔
2517
            if not sql_inspect(self.engine).has_table(str(table)):
1✔
2518
                table.create(self.engine)
1✔
2519
                created.append(str(table))
1✔
2520
        log.info('created table(s) {}.'.format(', '.join(created)))
1✔
2521
        self.Base = automap_base(metadata=self.meta)
1✔
2522
        self.Base.prepare(self.engine, reflect=True)
1✔
2523
    
2524
    def __init_data_table(self):
1✔
2525
        if sql_inspect(self.engine).has_table('data'):
1✔
2526
            self.data_schema = Table('data', self.meta, autoload_with=self.engine)
1✔
2527
            return
1✔
2528
        
2529
        log.debug("creating DB table 'data'")
1✔
2530
        
2531
        self.data_schema = Table('data', self.meta,
1✔
2532
                                 Column('sensor', String),
2533
                                 Column('orbit', String),
2534
                                 Column('orbitNumber_abs', Integer),
2535
                                 Column('orbitNumber_rel', Integer),
2536
                                 Column('cycleNumber', Integer),
2537
                                 Column('frameNumber', Integer),
2538
                                 Column('acquisition_mode', String),
2539
                                 Column('start', String),
2540
                                 Column('stop', String),
2541
                                 Column('product', String, primary_key=True),
2542
                                 Column('samples', Integer),
2543
                                 Column('lines', Integer),
2544
                                 Column('outname_base', String, primary_key=True),
2545
                                 Column('scene', String),
2546
                                 Column('hh', Integer),
2547
                                 Column('vv', Integer),
2548
                                 Column('hv', Integer),
2549
                                 Column('vh', Integer),
2550
                                 Column('geometry', Geometry(geometry_type='POLYGON',
2551
                                                             management=True, srid=4326)))
2552
        # add custom fields
2553
        if self.custom_fields is not None:
1✔
2554
            for key, val in self.custom_fields.items():
×
2555
                if val in ['Integer', 'integer', 'int']:
×
2556
                    self.data_schema.append_column(Column(key, Integer))
×
2557
                elif val in ['String', 'string', 'str']:
×
2558
                    self.data_schema.append_column(Column(key, String))
×
2559
                else:
2560
                    log.info('Value in dict custom_fields must be "integer" or "string"!')
×
2561
        
2562
        self.data_schema.create(self.engine)
1✔
2563
    
2564
    def __init_duplicates_table(self):
1✔
2565
        # create tables if not existing
2566
        if sql_inspect(self.engine).has_table('duplicates'):
1✔
2567
            self.duplicates_schema = Table('duplicates', self.meta, autoload_with=self.engine)
1✔
2568
            return
1✔
2569
        
2570
        log.debug("creating DB table 'duplicates'")
1✔
2571
        
2572
        self.duplicates_schema = Table('duplicates', self.meta,
1✔
2573
                                       Column('outname_base', String, primary_key=True),
2574
                                       Column('scene', String, primary_key=True))
2575
        self.duplicates_schema.create(self.engine)
1✔
2576
    
2577
    @staticmethod
1✔
2578
    def __load_spatialite(dbapi_conn, connection_record):
1✔
2579
        """
2580
        loads the spatialite extension for SQLite, not to be used outside the init()
2581
        
2582
        Parameters
2583
        ----------
2584
        dbapi_conn:
2585
            db engine
2586
        connection_record:
2587
            not sure what it does, but it is needed by :func:`sqlalchemy.event.listen`
2588
        """
2589
        dbapi_conn.enable_load_extension(True)
1✔
2590
        # check which platform and use according mod_spatialite
2591
        if platform.system() == 'Linux':
1✔
2592
            for option in ['mod_spatialite', 'mod_spatialite.so']:
1✔
2593
                try:
1✔
2594
                    dbapi_conn.load_extension(option)
1✔
2595
                except sqlite3.OperationalError:
×
2596
                    continue
×
2597
        elif platform.system() == 'Darwin':
×
2598
            for option in ['mod_spatialite.so', 'mod_spatialite.7.dylib',
×
2599
                           'mod_spatialite.dylib']:
2600
                try:
×
2601
                    dbapi_conn.load_extension(option)
×
2602
                except sqlite3.OperationalError:
×
2603
                    continue
×
2604
        else:
2605
            dbapi_conn.load_extension('mod_spatialite')
×
2606
    
2607
    def __prepare_insertion(self, scene):
1✔
2608
        """
2609
        read scene metadata and parse a string for inserting it into the database
2610

2611
        Parameters
2612
        ----------
2613
        scene: str or ID
2614
            a SAR scene
2615

2616
        Returns
2617
        -------
2618
        object of class Data, insert string
2619
        """
2620
        id = scene if isinstance(scene, ID) else identify(scene)
1✔
2621
        pols = [x.lower() for x in id.polarizations]
1✔
2622
        # insertion as an object of Class Data (reflected in the init())
2623
        insertion = self.Data()
1✔
2624
        colnames = self.get_colnames()
1✔
2625
        for attribute in colnames:
1✔
2626
            if attribute == 'geometry':
1✔
2627
                geom = id.geometry()
1✔
2628
                geom.reproject(4326)
1✔
2629
                geom = geom.convert2wkt(set3D=False)[0]
1✔
2630
                geom = 'SRID=4326;' + str(geom)
1✔
2631
                # set attributes of the Data object according to input
2632
                setattr(insertion, 'geometry', geom)
1✔
2633
            elif attribute in ['hh', 'vv', 'hv', 'vh']:
1✔
2634
                setattr(insertion, attribute, int(attribute in pols))
1✔
2635
            else:
2636
                if hasattr(id, attribute):
1✔
2637
                    attr = getattr(id, attribute)
1✔
2638
                elif attribute in id.meta.keys():
×
2639
                    attr = id.meta[attribute]
×
2640
                else:
2641
                    raise AttributeError('could not find attribute {}'.format(attribute))
×
2642
                value = attr() if inspect.ismethod(attr) else attr
1✔
2643
                setattr(insertion, str(attribute), value)
1✔
2644
        
2645
        return insertion  # return the Data object
1✔
2646
    
2647
    def __select_missing(self, table):
1✔
2648
        """
2649

2650
        Returns
2651
        -------
2652
        list[str]
2653
            the names of all scenes, which are no longer stored in their registered location
2654
        """
2655
        if table == 'data':
1✔
2656
            # using ORM query to get all scenes locations
2657
            scenes = self.Session().query(self.Data.scene)
1✔
2658
        elif table == 'duplicates':
×
2659
            scenes = self.Session().query(self.Duplicates.scene)
×
2660
        else:
2661
            raise ValueError("parameter 'table' must either be 'data' or 'duplicates'")
×
2662
        files = [self.encode(x[0]) for x in scenes]
1✔
2663
        return [x for x in files if not os.path.isfile(x)]
1✔
2664
    
2665
    def insert(self, scene_in, pbar=False, test=False):
1✔
2666
        """
2667
        Insert one or many scenes into the database
2668

2669
        Parameters
2670
        ----------
2671
        scene_in: str or ID or list[str or ID]
2672
            a SAR scene or a list of scenes to be inserted
2673
        pbar: bool
2674
            show a progress bar?
2675
        test: bool
2676
            should the insertion only be tested or directly be committed to the database?
2677
        """
2678
        
2679
        if isinstance(scene_in, (ID, str)):
1✔
2680
            scene_in = [scene_in]
1✔
2681
        if not isinstance(scene_in, list):
1✔
2682
            raise RuntimeError('scene_in must either be a string pointing to a file, a pyroSAR.ID object '
×
2683
                               'or a list containing several of either')
2684
        
2685
        log.info('filtering scenes by name')
1✔
2686
        scenes = self.filter_scenelist(scene_in)
1✔
2687
        if len(scenes) == 0:
1✔
2688
            log.info('...nothing to be done')
1✔
2689
            return
1✔
2690
        log.info('identifying scenes and extracting metadata')
1✔
2691
        scenes = identify_many(scenes, pbar=pbar)
1✔
2692
        
2693
        if len(scenes) == 0:
1✔
2694
            log.info('all scenes are already registered')
×
2695
            return
×
2696
        
2697
        counter_regulars = 0
1✔
2698
        counter_duplicates = 0
1✔
2699
        list_duplicates = []
1✔
2700
        
2701
        message = 'inserting {0} scene{1} into database'
1✔
2702
        log.info(message.format(len(scenes), '' if len(scenes) == 1 else 's'))
1✔
2703
        log.debug('testing changes in temporary database')
1✔
2704
        if pbar:
1✔
2705
            progress = pb.ProgressBar(max_value=len(scenes))
×
2706
        else:
2707
            progress = None
1✔
2708
        insertions = []
1✔
2709
        session = self.Session()
1✔
2710
        for i, id in enumerate(scenes):
1✔
2711
            basename = id.outname_base()
1✔
2712
            if not self.is_registered(id):
1✔
2713
                insertion = self.__prepare_insertion(id)
1✔
2714
                insertions.append(insertion)
1✔
2715
                counter_regulars += 1
1✔
2716
                log.debug('regular:   {}'.format(id.scene))
1✔
2717
            elif not self.__is_registered_in_duplicates(id):
1✔
2718
                insertion = self.Duplicates(outname_base=basename, scene=id.scene)
1✔
2719
                insertions.append(insertion)
1✔
2720
                counter_duplicates += 1
1✔
2721
                log.debug('duplicate: {}'.format(id.scene))
1✔
2722
            else:
2723
                list_duplicates.append(id.outname_base())
×
2724
            
2725
            if progress is not None:
1✔
2726
                progress.update(i + 1)
×
2727
        
2728
        if progress is not None:
1✔
2729
            progress.finish()
×
2730
        
2731
        session.add_all(insertions)
1✔
2732
        
2733
        if not test:
1✔
2734
            log.debug('committing transactions to permanent database')
1✔
2735
            # commit changes of the session
2736
            session.commit()
1✔
2737
        else:
2738
            log.info('rolling back temporary database changes')
×
2739
            # roll back changes of the session
2740
            session.rollback()
×
2741
        
2742
        message = '{0} scene{1} registered regularly'
1✔
2743
        log.info(message.format(counter_regulars, '' if counter_regulars == 1 else 's'))
1✔
2744
        message = '{0} duplicate{1} registered'
1✔
2745
        log.info(message.format(counter_duplicates, '' if counter_duplicates == 1 else 's'))
1✔
2746
    
2747
    def is_registered(self, scene):
1✔
2748
        """
2749
        Simple check if a scene is already registered in the database.
2750

2751
        Parameters
2752
        ----------
2753
        scene: str or ID
2754
            the SAR scene
2755

2756
        Returns
2757
        -------
2758
        bool
2759
            is the scene already registered?
2760
        """
2761
        id = scene if isinstance(scene, ID) else identify(scene)
1✔
2762
        # ORM query, where scene equals id.scene, return first
2763
        exists_data = self.Session().query(self.Data.outname_base).filter_by(
1✔
2764
            outname_base=id.outname_base(), product=id.product).first()
2765
        exists_duplicates = self.Session().query(self.Duplicates.outname_base).filter(
1✔
2766
            self.Duplicates.outname_base == id.outname_base()).first()
2767
        in_data = False
1✔
2768
        in_dup = False
1✔
2769
        if exists_data:
1✔
2770
            in_data = len(exists_data) != 0
1✔
2771
        if exists_duplicates:
1✔
2772
            in_dup = len(exists_duplicates) != 0
×
2773
        return in_data or in_dup
1✔
2774
    
2775
    def __is_registered_in_duplicates(self, scene):
1✔
2776
        """
2777
        Simple check if a scene is already registered in the database.
2778

2779
        Parameters
2780
        ----------
2781
        scene: str or ID
2782
            the SAR scene
2783

2784
        Returns
2785
        -------
2786
        bool
2787
            is the scene already registered?
2788
        """
2789
        id = scene if isinstance(scene, ID) else identify(scene)
1✔
2790
        # ORM query as in is registered
2791
        exists_duplicates = self.Session().query(self.Duplicates.outname_base).filter(
1✔
2792
            self.Duplicates.outname_base == id.outname_base()).first()
2793
        in_dup = False
1✔
2794
        if exists_duplicates:
1✔
2795
            in_dup = len(exists_duplicates) != 0
×
2796
        return in_dup
1✔
2797
    
2798
    def cleanup(self):
1✔
2799
        """
2800
        Remove all scenes from the database, which are no longer stored in their registered location
2801

2802
        Returns
2803
        -------
2804

2805
        """
2806
        missing = self.__select_missing('data')
1✔
2807
        for scene in missing:
1✔
2808
            log.info('Removing missing scene from database tables: {}'.format(scene))
×
2809
            self.drop_element(scene, with_duplicates=True)
×
2810
    
2811
    @staticmethod
1✔
2812
    def encode(string, encoding='utf-8'):
1✔
2813
        if not isinstance(string, str) and hasattr(string, 'encode'):
1✔
2814
            return string.encode(encoding)
×
2815
        else:
2816
            return string
1✔
2817
    
2818
    def export2shp(self, path, table='data'):
1✔
2819
        """
2820
        export the database to a shapefile
2821

2822
        Parameters
2823
        ----------
2824
        path: str
2825
            the path of the shapefile to be written.
2826
            This will overwrite other files with the same name.
2827
            If a folder is given in path it is created if not existing.
2828
            If the file extension is missing '.shp' is added.
2829
        table: str
2830
            the table to write to the shapefile; either 'data' (default) or 'duplicates'
2831
        
2832
        Returns
2833
        -------
2834
        """
2835
        if table not in self.get_tablenames():
1✔
2836
            log.warning('Only data and duplicates can be exported!')
×
2837
            return
×
2838
        
2839
        # add the .shp extension if missing
2840
        if not path.endswith('.shp'):
1✔
2841
            path += '.shp'
×
2842
        
2843
        # creates folder if not present, adds .shp if not within the path
2844
        dirname = os.path.dirname(path)
1✔
2845
        os.makedirs(dirname, exist_ok=True)
1✔
2846
        
2847
        launder_names = {'acquisition_mode': 'acq_mode',
1✔
2848
                         'orbitNumber_abs': 'orbit_abs',
2849
                         'orbitNumber_rel': 'orbit_rel',
2850
                         'cycleNumber': 'cycleNr',
2851
                         'frameNumber': 'frameNr',
2852
                         'outname_base': 'outname'}
2853
        
2854
        sel_tables = ', '.join([f'"{s}" as {launder_names[s]}' if s in launder_names else s
1✔
2855
                                for s in self.get_colnames(table)])
2856
        
2857
        if self.driver == 'sqlite':
1✔
2858
            srcDS = self.dbfile
1✔
2859
        elif self.driver == 'postgresql':
1✔
2860
            srcDS = """PG:host={host} port={port} user={username}
1✔
2861
            dbname={database} password={password} active_schema=public""".format(**self.url_dict)
2862
        else:
2863
            raise RuntimeError('unknown archive driver')
×
2864
        
2865
        gdal.VectorTranslate(destNameOrDestDS=path, srcDS=srcDS,
1✔
2866
                             format='ESRI Shapefile',
2867
                             SQLStatement=f'SELECT {sel_tables} FROM {table}',
2868
                             SQLDialect=self.driver)
2869
    
2870
    def filter_scenelist(self, scenelist):
1✔
2871
        """
2872
        Filter a list of scenes by file names already registered in the database.
2873

2874
        Parameters
2875
        ----------
2876
        scenelist: list[str or ID]
2877
            the scenes to be filtered
2878

2879
        Returns
2880
        -------
2881
        list[ID]
2882
            the file names of the scenes whose basename is not yet registered in the database
2883

2884
        """
2885
        for item in scenelist:
1✔
2886
            if not isinstance(item, (ID, str)):
1✔
2887
                raise TypeError("items in scenelist must be of type 'str' or 'pyroSAR.ID'")
1✔
2888
        
2889
        # ORM query, get all scenes locations
2890
        scenes_data = self.Session().query(self.Data.scene)
1✔
2891
        registered = [os.path.basename(self.encode(x[0])) for x in scenes_data]
1✔
2892
        scenes_duplicates = self.Session().query(self.Duplicates.scene)
1✔
2893
        duplicates = [os.path.basename(self.encode(x[0])) for x in scenes_duplicates]
1✔
2894
        names = [item.scene if isinstance(item, ID) else item for item in scenelist]
1✔
2895
        filtered = [x for x, y in zip(scenelist, names) if os.path.basename(y) not in registered + duplicates]
1✔
2896
        return filtered
1✔
2897
    
2898
    def get_colnames(self, table='data'):
1✔
2899
        """
2900
        Return the names of all columns of a table.
2901

2902
        Returns
2903
        -------
2904
        list[str]
2905
            the column names of the chosen table
2906
        """
2907
        # get all columns of one table, but shows geometry columns not correctly
2908
        table_info = Table(table, self.meta, autoload=True, autoload_with=self.engine)
1✔
2909
        col_names = table_info.c.keys()
1✔
2910
        
2911
        return sorted([self.encode(x) for x in col_names])
1✔
2912
    
2913
    def get_tablenames(self, return_all=False):
1✔
2914
        """
2915
        Return the names of all tables in the database
2916
        
2917
        Parameters
2918
        ----------
2919
        return_all: bool
2920
            only gives tables data and duplicates on default.
2921
            Set to True to get all other tables and views created automatically.
2922

2923
        Returns
2924
        -------
2925
        list[str]
2926
            the table names
2927
        """
2928
        #  TODO: make this dynamic
2929
        #  the method was intended to only return user generated tables by default, as well as data and duplicates
2930
        all_tables = ['ElementaryGeometries', 'SpatialIndex', 'geometry_columns', 'geometry_columns_auth',
1✔
2931
                      'geometry_columns_field_infos', 'geometry_columns_statistics', 'geometry_columns_time',
2932
                      'spatial_ref_sys', 'spatial_ref_sys_aux', 'spatialite_history', 'sql_statements_log',
2933
                      'sqlite_sequence', 'views_geometry_columns', 'views_geometry_columns_auth',
2934
                      'views_geometry_columns_field_infos', 'views_geometry_columns_statistics',
2935
                      'virts_geometry_columns', 'virts_geometry_columns_auth', 'virts_geometry_columns_field_infos',
2936
                      'virts_geometry_columns_statistics', 'data_licenses', 'KNN']
2937
        # get tablenames from metadata
2938
        tables = sorted([self.encode(x) for x in self.meta.tables.keys()])
1✔
2939
        if return_all:
1✔
2940
            return tables
×
2941
        else:
2942
            ret = []
1✔
2943
            for i in tables:
1✔
2944
                if i not in all_tables and 'idx_' not in i:
1✔
2945
                    ret.append(i)
1✔
2946
            return ret
1✔
2947
    
2948
    def get_unique_directories(self):
1✔
2949
        """
2950
        Get a list of directories containing registered scenes
2951

2952
        Returns
2953
        -------
2954
        list[str]
2955
            the directory names
2956
        """
2957
        # ORM query, get all directories
2958
        scenes = self.Session().query(self.Data.scene)
1✔
2959
        registered = [os.path.dirname(self.encode(x[0])) for x in scenes]
1✔
2960
        return list(set(registered))
1✔
2961
    
2962
    def import_outdated(self, dbfile):
1✔
2963
        """
2964
        import an older database
2965

2966
        Parameters
2967
        ----------
2968
        dbfile: str or Archive
2969
            the old database. If this is a string, the name of a CSV file is expected.
2970

2971
        Returns
2972
        -------
2973

2974
        """
2975
        if isinstance(dbfile, str) and dbfile.endswith('csv'):
1✔
2976
            with open(dbfile) as csvfile:
1✔
2977
                text = csvfile.read()
1✔
2978
                csvfile.seek(0)
1✔
2979
                dialect = csv.Sniffer().sniff(text)
1✔
2980
                reader = csv.DictReader(csvfile, dialect=dialect)
1✔
2981
                scenes = []
1✔
2982
                for row in reader:
1✔
2983
                    scenes.append(row['scene'])
1✔
2984
                self.insert(scenes)
1✔
2985
        elif isinstance(dbfile, Archive):
1✔
2986
            scenes = dbfile.conn.execute('SELECT scene from data')
1✔
2987
            scenes = [s.scene for s in scenes]
1✔
2988
            self.insert(scenes)
1✔
2989
            reinsert = dbfile.select_duplicates(value='scene')
1✔
2990
            if reinsert is not None:
1✔
2991
                self.insert(reinsert)
1✔
2992
        else:
2993
            raise RuntimeError("'dbfile' must either be a CSV file name or an Archive object")
1✔
2994
    
2995
    def move(self, scenelist, directory, pbar=False):
1✔
2996
        """
2997
        Move a list of files while keeping the database entries up to date.
2998
        If a scene is registered in the database (in either the data or duplicates table),
2999
        the scene entry is directly changed to the new location.
3000

3001
        Parameters
3002
        ----------
3003
        scenelist: list[str]
3004
            the file locations
3005
        directory: str
3006
            a folder to which the files are moved
3007
        pbar: bool
3008
            show a progress bar?
3009

3010
        Returns
3011
        -------
3012
        """
3013
        if not os.path.isdir(directory):
×
3014
            os.mkdir(directory)
×
3015
        if not os.access(directory, os.W_OK):
×
3016
            raise RuntimeError('directory cannot be written to')
×
3017
        failed = []
×
3018
        double = []
×
3019
        if pbar:
×
3020
            progress = pb.ProgressBar(max_value=len(scenelist)).start()
×
3021
        else:
3022
            progress = None
×
3023
        
3024
        for i, scene in enumerate(scenelist):
×
3025
            new = os.path.join(directory, os.path.basename(scene))
×
3026
            if os.path.isfile(new):
×
3027
                double.append(new)
×
3028
                continue
×
3029
            try:
×
3030
                shutil.move(scene, directory)
×
3031
            except shutil.Error:
×
3032
                failed.append(scene)
×
3033
                continue
×
3034
            finally:
3035
                if progress is not None:
×
3036
                    progress.update(i + 1)
×
3037
            if self.select(scene=scene) != 0:
×
3038
                table = 'data'
×
3039
            else:
3040
                # using core connection to execute SQL syntax (as was before)
3041
                query_duplicates = self.conn.execute(
×
3042
                    '''SELECT scene FROM duplicates WHERE scene='{0}' '''.format(scene))
3043
                if len(query_duplicates) != 0:
×
3044
                    table = 'duplicates'
×
3045
                else:
3046
                    table = None
×
3047
            if table:
×
3048
                # using core connection to execute SQL syntax (as was before)
3049
                self.conn.execute('''UPDATE {0} SET scene= '{1}' WHERE scene='{2}' '''.format(table, new, scene))
×
3050
        if progress is not None:
×
3051
            progress.finish()
×
3052
        
3053
        if len(failed) > 0:
×
3054
            log.info('The following scenes could not be moved:\n{}'.format('\n'.join(failed)))
×
3055
        if len(double) > 0:
×
3056
            log.info('The following scenes already exist at the target location:\n{}'.format('\n'.join(double)))
×
3057
    
3058
    def select(self, vectorobject=None, mindate=None, maxdate=None, date_strict=True,
1✔
3059
               processdir=None, recursive=False, polarizations=None, return_value="scene", **args):
3060
        """
3061
        select scenes from the database
3062

3063
        Parameters
3064
        ----------
3065
        vectorobject: :class:`~spatialist.vector.Vector` or None
3066
            a geometry with which the scenes need to overlap. The object may only contain one feature.
3067
        mindate: str or datetime.datetime or None
3068
            the minimum acquisition date; strings must be in format YYYYmmddTHHMMSS; default: None
3069
        maxdate: str or datetime.datetime or None
3070
            the maximum acquisition date; strings must be in format YYYYmmddTHHMMSS; default: None
3071
        date_strict: bool
3072
            treat dates as strict limits or also allow flexible limits to incorporate scenes
3073
            whose acquisition period overlaps with the defined limit?
3074

3075
            - strict: start >= mindate & stop <= maxdate
3076
            - not strict: stop >= mindate & start <= maxdate
3077
        processdir: str or None
3078
            A directory to be scanned for already processed scenes;
3079
            the selected scenes will be filtered to those that have not yet been processed. Default: None
3080
        recursive: bool
3081
            (only if `processdir` is not None) should also the subdirectories of the `processdir` be scanned?
3082
        polarizations: list[str] or None
3083
            a list of polarization strings, e.g. ['HH', 'VV']
3084
        return_value: str or List[str]
3085
            the query return value(s). Options:
3086
            
3087
            - `geometry_wkb`: the scene's footprint geometry formatted as WKB
3088
            - `geometry_wkt`: the scene's footprint geometry formatted as WKT
3089
            - `mindate`: the acquisition start datetime in UTC formatted as YYYYmmddTHHMMSS
3090
            - `maxdate`: the acquisition end datetime in UTC formatted as YYYYmmddTHHMMSS
3091
            - all further database column names (see :meth:`~Archive.get_colnames()`)
3092
        **args:
3093
            any further arguments (columns), which are registered in the database. See :meth:`~Archive.get_colnames()`
3094

3095
        Returns
3096
        -------
3097
        List[str] or List[tuple[str]]
3098
            If a single return_value is specified: list of values for that attribute
3099
            If multiple return_values are specified: list of tuples containing the requested attributes
3100
        """
3101
        # Convert return_value to list if it's a string
3102
        if isinstance(return_value, str):
1✔
3103
            return_values = [return_value]
1✔
3104
        else:
3105
            return_values = return_value
1✔
3106
        
3107
        return_values_sql = []
1✔
3108
        for val in return_values:
1✔
3109
            if val == 'mindate':
1✔
3110
                return_values_sql.append('start')
1✔
3111
            elif val == 'maxdate':
1✔
3112
                return_values_sql.append('stop')
×
3113
            elif val == 'geometry_wkt':
1✔
3114
                prefix = 'ST_' if self.driver == 'postgresql' else ''
1✔
3115
                return_values_sql.append(f'{prefix}AsText(geometry) as geometry_wkt')
1✔
3116
            elif val == 'geometry_wkb':
1✔
3117
                prefix = 'ST_' if self.driver == 'postgresql' else ''
1✔
3118
                return_values_sql.append(f'{prefix}AsBinary(geometry) as geometry_wkb')
1✔
3119
            else:
3120
                return_values_sql.append(val)
1✔
3121
        
3122
        # Validate that all requested return values exist in the database
3123
        valid_columns = self.get_colnames()
1✔
3124
        extra = ['mindate', 'maxdate', 'geometry_wkt', 'geometry_wkb']
1✔
3125
        normal_returns = [x for x in return_values if x not in extra]
1✔
3126
        invalid_returns = [x for x in normal_returns if x not in valid_columns]
1✔
3127
        if invalid_returns:
1✔
3128
            invalid_str = ', '.join(invalid_returns)
1✔
3129
            msg = (f"The following options are not supported as "
1✔
3130
                   f"return values: {invalid_str}")
3131
            raise ValueError(msg)
1✔
3132
        
3133
        arg_valid = [x for x in args.keys() if x in self.get_colnames()]
1✔
3134
        arg_invalid = [x for x in args.keys() if x not in self.get_colnames()]
1✔
3135
        if len(arg_invalid) > 0:
1✔
3136
            log.info('the following arguments will be ignored as they are not registered in the data base: {}'.format(
1✔
3137
                ', '.join(arg_invalid)))
3138
        arg_format = []
1✔
3139
        vals = []
1✔
3140
        for key in arg_valid:
1✔
3141
            if key == 'scene':
1✔
3142
                arg_format.append('''scene LIKE '%%{0}%%' '''.format(os.path.basename(args[key])))
×
3143
            else:
3144
                if isinstance(args[key], (float, int, str)):
1✔
3145
                    arg_format.append("""{0}='{1}'""".format(key, args[key]))
1✔
3146
                elif isinstance(args[key], (tuple, list)):
1✔
3147
                    arg_format.append("""{0} IN ('{1}')""".format(key, "', '".join(map(str, args[key]))))
1✔
3148
        
3149
        if mindate:
1✔
3150
            if isinstance(mindate, datetime):
1✔
3151
                mindate = mindate.strftime('%Y%m%dT%H%M%S')
×
3152
            if re.search('[0-9]{8}T[0-9]{6}', mindate):
1✔
3153
                if date_strict:
1✔
3154
                    arg_format.append('start>=?')
1✔
3155
                else:
3156
                    arg_format.append('stop>=?')
×
3157
                vals.append(mindate)
1✔
3158
            else:
3159
                log.info('WARNING: argument mindate is ignored, must be in format YYYYmmddTHHMMSS')
1✔
3160
        
3161
        if maxdate:
1✔
3162
            if isinstance(maxdate, datetime):
1✔
3163
                maxdate = maxdate.strftime('%Y%m%dT%H%M%S')
×
3164
            if re.search('[0-9]{8}T[0-9]{6}', maxdate):
1✔
3165
                if date_strict:
1✔
3166
                    arg_format.append('stop<=?')
1✔
3167
                else:
3168
                    arg_format.append('start<=?')
×
3169
                vals.append(maxdate)
1✔
3170
            else:
3171
                log.info('WARNING: argument maxdate is ignored, must be in format YYYYmmddTHHMMSS')
1✔
3172
        
3173
        if polarizations:
1✔
3174
            for pol in polarizations:
1✔
3175
                if pol in ['HH', 'VV', 'HV', 'VH']:
1✔
3176
                    arg_format.append('{}=1'.format(pol.lower()))
1✔
3177
        
3178
        if vectorobject:
1✔
3179
            if isinstance(vectorobject, Vector):
1✔
3180
                if vectorobject.nfeatures > 1:
1✔
3181
                    raise RuntimeError("'vectorobject' contains more than one feature.")
×
3182
                with vectorobject.clone() as vec:
1✔
3183
                    vec.reproject(4326)
1✔
3184
                    site_geom = vec.convert2wkt(set3D=False)[0]
1✔
3185
                # postgres has a different way to store geometries
3186
                if self.driver == 'postgresql':
1✔
3187
                    statement = f"st_intersects(geometry, 'SRID=4326; {site_geom}')"
1✔
3188
                    arg_format.append(statement)
1✔
3189
                else:
3190
                    arg_format.append('st_intersects(GeomFromText(?, 4326), geometry) = 1')
1✔
3191
                    vals.append(site_geom)
1✔
3192
            else:
3193
                log.info('WARNING: argument vectorobject is ignored, must be of type spatialist.vector.Vector')
1✔
3194
        
3195
        if len(arg_format) > 0:
1✔
3196
            subquery = ' WHERE {}'.format(' AND '.join(arg_format))
1✔
3197
        else:
3198
            subquery = ''
×
3199
        
3200
        # Modify the query to select the requested return values
3201
        query = 'SELECT {}, outname_base FROM data{}'.format(', '.join(return_values_sql), subquery)
1✔
3202
        
3203
        # the query gets assembled stepwise here
3204
        for val in vals:
1✔
3205
            query = query.replace('?', """'{0}'""", 1).format(val)
1✔
3206
        log.debug(query)
1✔
3207
        
3208
        # core SQL execution
3209
        query_rs = self.conn.execute(query)
1✔
3210
        
3211
        if processdir and os.path.isdir(processdir):
1✔
3212
            scenes = [x for x in query_rs
1✔
3213
                      if len(finder(processdir, [x[-1]], regex=True, recursive=recursive)) == 0]
3214
        else:
3215
            scenes = query_rs
1✔
3216
        
3217
        ret = []
1✔
3218
        for x in scenes:
1✔
3219
            # If only one return value was requested, append just that value
3220
            if len(return_values) == 1:
1✔
3221
                ret.append(self.encode(x[0]))
1✔
3222
            else:
3223
                # If multiple return values were requested, append a tuple of all values
3224
                ret.append(tuple(self.encode(val) for val in x[:-1]))  # Exclude outname_base
1✔
3225
        
3226
        return ret
1✔
3227
    
3228
    def select_duplicates(self, outname_base=None, scene=None, value='id'):
1✔
3229
        """
3230
        Select scenes from the duplicates table. In case both `outname_base` and `scene` are set to None all scenes in
3231
        the table are returned, otherwise only those that match the attributes `outname_base` and `scene` if they are not None.
3232

3233
        Parameters
3234
        ----------
3235
        outname_base: str
3236
            the basename of the scene
3237
        scene: str
3238
            the scene name
3239
        value: str
3240
            the return value; either 'id' or 'scene'
3241

3242
        Returns
3243
        -------
3244
        list[str]
3245
            the selected scene(s)
3246
        """
3247
        if value == 'id':
1✔
3248
            key = 0
1✔
3249
        elif value == 'scene':
1✔
3250
            key = 1
1✔
3251
        else:
3252
            raise ValueError("argument 'value' must be either 0 or 1")
×
3253
        
3254
        if not outname_base and not scene:
1✔
3255
            # core SQL execution
3256
            scenes = self.conn.execute('SELECT * from duplicates')
1✔
3257
        else:
3258
            cond = []
1✔
3259
            arg = []
1✔
3260
            if outname_base:
1✔
3261
                cond.append('outname_base=?')
1✔
3262
                arg.append(outname_base)
1✔
3263
            if scene:
1✔
3264
                cond.append('scene=?')
1✔
3265
                arg.append(scene)
1✔
3266
            query = 'SELECT * from duplicates WHERE {}'.format(' AND '.join(cond))
1✔
3267
            for a in arg:
1✔
3268
                query = query.replace('?', ''' '{0}' ''', 1).format(a)
1✔
3269
            # core SQL execution
3270
            scenes = self.conn.execute(query)
1✔
3271
        
3272
        ret = []
1✔
3273
        for x in scenes:
1✔
3274
            ret.append(self.encode(x[key]))
×
3275
        
3276
        return ret
1✔
3277
    
3278
    @property
1✔
3279
    def size(self):
1✔
3280
        """
3281
        get the number of scenes registered in the database
3282

3283
        Returns
3284
        -------
3285
        tuple[int]
3286
            the number of scenes in (1) the main table and (2) the duplicates table
3287
        """
3288
        # ORM query
3289
        session = self.Session()
1✔
3290
        r1 = session.query(self.Data.outname_base).count()
1✔
3291
        r2 = session.query(self.Duplicates.outname_base).count()
1✔
3292
        session.close()
1✔
3293
        return r1, r2
1✔
3294
    
3295
    def __enter__(self):
1✔
3296
        return self
1✔
3297
    
3298
    def close(self):
1✔
3299
        """
3300
        close the database connection
3301
        """
3302
        self.Session().close()
1✔
3303
        self.conn.close()
1✔
3304
        self.engine.dispose()
1✔
3305
        gc.collect(generation=2)  # this was added as a fix for win PermissionError when deleting sqlite.db files.
1✔
3306
    
3307
    def __exit__(self, exc_type, exc_val, exc_tb):
1✔
3308
        self.close()
1✔
3309
    
3310
    def drop_element(self, scene, with_duplicates=False):
1✔
3311
        """
3312
        Drop a scene from the data table.
3313
        If duplicates table contains matching entry, it will be moved to the data table.
3314

3315
        Parameters
3316
        ----------
3317
        scene: str
3318
            a SAR scene
3319
        with_duplicates: bool
3320
            True: delete matching entry in duplicates table
3321
            False: move matching entry from duplicates into data table
3322

3323
        Returns
3324
        -------
3325
        """
3326
        # save outname_base from to be deleted entry
3327
        search = self.data_schema.select().where(self.data_schema.c.scene == scene)
1✔
3328
        entry_data_outname_base = []
1✔
3329
        for rowproxy in self.conn.execute(search):
1✔
3330
            entry_data_outname_base.append((rowproxy[12]))
1✔
3331
        # log.info(entry_data_outname_base)
3332
        
3333
        # delete entry in data table
3334
        delete_statement = self.data_schema.delete().where(self.data_schema.c.scene == scene)
1✔
3335
        self.conn.execute(delete_statement)
1✔
3336
        
3337
        return_sentence = 'Entry with scene-id: \n{} \nwas dropped from data'.format(scene)
1✔
3338
        
3339
        # with_duplicates == True, delete entry from duplicates
3340
        if with_duplicates:
1✔
3341
            delete_statement_dup = self.duplicates_schema.delete().where(
×
3342
                self.duplicates_schema.c.outname_base == entry_data_outname_base[0])
3343
            self.conn.execute(delete_statement_dup)
×
3344
            
3345
            log.info(return_sentence + ' and duplicates!'.format(scene))
×
3346
            return
×
3347
        
3348
        # else select scene info matching outname_base from duplicates
3349
        select_in_duplicates_statement = self.duplicates_schema.select().where(
1✔
3350
            self.duplicates_schema.c.outname_base == entry_data_outname_base[0])
3351
        entry_duplicates_scene = []
1✔
3352
        for rowproxy in self.conn.execute(select_in_duplicates_statement):
1✔
3353
            entry_duplicates_scene.append((rowproxy[1]))
1✔
3354
        
3355
        # check if there is a duplicate
3356
        if len(entry_duplicates_scene) == 1:
1✔
3357
            # remove entry from duplicates
3358
            delete_statement_dup = self.duplicates_schema.delete().where(
1✔
3359
                self.duplicates_schema.c.outname_base == entry_data_outname_base[0])
3360
            self.conn.execute(delete_statement_dup)
1✔
3361
            
3362
            # insert scene from duplicates into data
3363
            self.insert(entry_duplicates_scene[0])
1✔
3364
            
3365
            return_sentence += ' and entry with outname_base \n{} \nand scene \n{} \n' \
1✔
3366
                               'was moved from duplicates into data table'.format(
3367
                entry_data_outname_base[0], entry_duplicates_scene[0])
3368
        
3369
        log.info(return_sentence + '!')
1✔
3370
    
3371
    def drop_table(self, table):
1✔
3372
        """
3373
        Drop a table from the database.
3374

3375
        Parameters
3376
        ----------
3377
        table: str
3378
            the table name
3379

3380
        Returns
3381
        -------
3382
        """
3383
        if table in self.get_tablenames(return_all=True):
×
3384
            # this removes the idx tables and entries in geometry_columns for sqlite databases
3385
            if self.driver == 'sqlite':
×
3386
                tab_with_geom = [rowproxy[0] for rowproxy
×
3387
                                 in self.conn.execute("SELECT f_table_name FROM geometry_columns")]
3388
                if table in tab_with_geom:
×
3389
                    self.conn.execute("SELECT DropGeoTable('" + table + "')")
×
3390
            else:
3391
                table_info = Table(table, self.meta, autoload=True, autoload_with=self.engine)
×
3392
                table_info.drop(self.engine)
×
3393
            log.info('table {} dropped from database.'.format(table))
×
3394
        else:
3395
            raise ValueError("table {} is not registered in the database!".format(table))
×
3396
        self.Base = automap_base(metadata=self.meta)
×
3397
        self.Base.prepare(self.engine, reflect=True)
×
3398
    
3399
    @staticmethod
1✔
3400
    def __is_open(ip, port):
1✔
3401
        """
3402
        Checks server connection, from Ben Curtis (github: Fmstrat)
3403

3404
        Parameters
3405
        ----------
3406
        ip: str
3407
            ip of the server
3408
        port: str or int
3409
            port of the server
3410

3411
        Returns
3412
        -------
3413
        bool:
3414
            is the server reachable?
3415
            
3416
        """
3417
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1✔
3418
        s.settimeout(3)
1✔
3419
        try:
1✔
3420
            s.connect((ip, int(port)))
1✔
3421
            s.shutdown(socket.SHUT_RDWR)
1✔
3422
            return True
1✔
3423
        except:
1✔
3424
            return False
1✔
3425
        finally:
3426
            s.close()
1✔
3427
    
3428
    def __check_host(self, ip, port):
1✔
3429
        """
3430
        Calls __is_open() on ip and port, from Ben Curtis (github: Fmstrat)
3431

3432
        Parameters
3433
        ----------
3434
        ip: str
3435
            ip of the server
3436
        port: str or int
3437
            port of the server
3438

3439
        Returns
3440
        -------
3441
        bool:
3442
            is the server reachable?
3443
        """
3444
        ipup = False
1✔
3445
        for i in range(2):
1✔
3446
            if self.__is_open(ip, port):
1✔
3447
                ipup = True
1✔
3448
                break
1✔
3449
            else:
3450
                time.sleep(5)
1✔
3451
        return ipup
1✔
3452

3453

3454
def drop_archive(archive):
1✔
3455
    """
3456
    drop (delete) a scene database
3457
    
3458
    Parameters
3459
    ----------
3460
    archive: pyroSAR.drivers.Archive
3461
        the database to be deleted
3462

3463
    Returns
3464
    -------
3465
    
3466
    See Also
3467
    --------
3468
    :func:`sqlalchemy_utils.functions.drop_database()`
3469
    
3470
    Examples
3471
    --------
3472
    >>> pguser = os.environ.get('PGUSER')
3473
    >>> pgpassword = os.environ.get('PGPASSWORD')
3474
    
3475
    >>> db = Archive('test', postgres=True, port=5432, user=pguser, password=pgpassword)
3476
    >>> drop_archive(db)
3477
    """
3478
    if archive.driver == 'postgresql':
1✔
3479
        url = archive.url
1✔
3480
        archive.close()
1✔
3481
        drop_database(url)
1✔
3482
    else:
3483
        raise RuntimeError('this function only works for PostgreSQL databases.'
×
3484
                           'For SQLite databases it is recommended to just delete the DB file.')
3485

3486

3487
def getFileObj(scene, filename):
1✔
3488
    """
3489
    Load a file in a SAR scene archive into a readable file object.
3490

3491
    Parameters
3492
    ----------
3493
    scene: str
3494
        the scene archive. Can be either a directory or a compressed archive of type `zip` or `tar.gz`.
3495
    filename: str
3496
        the name of a file in the scene archive, easiest to get with method :meth:`~ID.findfiles`
3497

3498
    Returns
3499
    -------
3500
    ~io.BytesIO
3501
        a file object
3502
    """
3503
    membername = filename.replace(scene, '').strip(r'\/')
1✔
3504
    
3505
    if not os.path.exists(scene):
1✔
3506
        raise RuntimeError('scene does not exist')
1✔
3507
    
3508
    if os.path.isdir(scene):
1✔
3509
        obj = BytesIO()
1✔
3510
        with open(filename, 'rb') as infile:
1✔
3511
            obj.write(infile.read())
1✔
3512
        obj.seek(0)
1✔
3513
    
3514
    # the scene consists of a single file
3515
    elif os.path.isfile(scene) and scene == filename:
1✔
3516
        obj = BytesIO()
1✔
3517
        with open(filename, 'rb') as infile:
1✔
3518
            obj.write(infile.read())
1✔
3519
        obj.seek(0)
1✔
3520
    
3521
    elif zf.is_zipfile(scene):
1✔
3522
        obj = BytesIO()
1✔
3523
        with zf.ZipFile(scene, 'r') as zip:
1✔
3524
            obj.write(zip.open(membername).read())
1✔
3525
        obj.seek(0)
1✔
3526
    
3527
    elif tf.is_tarfile(scene):
1✔
3528
        obj = BytesIO()
1✔
3529
        tar = tf.open(scene, 'r:gz')
1✔
3530
        obj.write(tar.extractfile(membername).read())
1✔
3531
        tar.close()
1✔
3532
        obj.seek(0)
1✔
3533
    else:
3534
        raise RuntimeError('input must be either a file name or a location in an zip or tar archive')
1✔
3535
    return obj
1✔
3536

3537

3538
def parse_date(x):
1✔
3539
    """
3540
    this function gathers known time formats provided in the different SAR products and converts them to a common
3541
    standard of the form YYYYMMDDTHHMMSS
3542

3543
    Parameters
3544
    ----------
3545
    x: str or ~datetime.datetime
3546
        the time stamp to be converted
3547

3548
    Returns
3549
    -------
3550
    str
3551
        the converted time stamp in format YYYYmmddTHHMMSS
3552
    """
3553
    if isinstance(x, datetime):
1✔
3554
        return x.strftime('%Y%m%dT%H%M%S')
1✔
3555
    elif isinstance(x, str):
1✔
3556
        for timeformat in ['%d-%b-%Y %H:%M:%S.%f',
1✔
3557
                           '%Y%m%d%H%M%S%f',
3558
                           '%Y-%m-%dT%H:%M:%S.%f',
3559
                           '%Y-%m-%dT%H:%M:%S.%fZ',
3560
                           '%Y%m%d %H:%M:%S.%f']:
3561
            try:
1✔
3562
                return strftime('%Y%m%dT%H%M%S', strptime(x, timeformat))
1✔
3563
            except (TypeError, ValueError):
1✔
3564
                continue
1✔
3565
        raise ValueError('unknown time format; check function parse_date')
1✔
3566
    else:
3567
        raise ValueError('input must be either a string or a datetime object')
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc