• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

johntruckenbrodt / spatialist / 14925397507

09 May 2025 08:37AM UTC coverage: 80.579%. First build
14925397507

push

github

johntruckenbrodt
Merge branch 'main' of https://github.com/johntruckenbrodt/spatialist into docs/restructure

# Conflicts:
#	docs/source/spatialist.rst

14 of 20 new or added lines in 2 files covered. (70.0%)

1975 of 2451 relevant lines covered (80.58%)

0.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.4
/spatialist/ancillary.py
1
##############################################################
2
# core routines for software spatialist
3
# John Truckenbrodt 2014-2025
4
##############################################################
5
"""
6
This script gathers central functions and classes for general applications
7
"""
8
import dill
1✔
9
import string
1✔
10
import shutil
1✔
11
import tempfile
1✔
12
import platform
1✔
13
import tblib.pickling_support
1✔
14
from io import StringIO
1✔
15
from urllib.parse import urlparse, urlunparse, urlencode
1✔
16
from builtins import str
1✔
17
import re
1✔
18
import sys
1✔
19
import fnmatch
1✔
20
import inspect
1✔
21
import itertools
1✔
22
import os
1✔
23
import subprocess as sp
1✔
24
import tarfile as tf
1✔
25
import zipfile as zf
1✔
26
from typing import Iterable, List
1✔
27
import numpy as np
1✔
28
import pandas as pd
1✔
29
import progressbar as pb
1✔
30

31
try:
1✔
32
    import pathos.multiprocessing as mp
1✔
33
except ImportError:
×
34
    pass
×
35

36

37
class HiddenPrints:
1✔
38
    """
39
    | Suppress console stdout prints, i.e. redirect them to a temporary string object.
40
    | Adapted from https://stackoverflow.com/questions/8391411/suppress-calls-to-print-python
41

42
    Examples
43
    --------
44
    >>> with HiddenPrints():
45
    >>>     print('foobar')
46
    >>> print('foobar')
47
    """
48
    
49
    def __enter__(self):
1✔
50
        self._original_stdout = sys.stdout
×
51
        sys.stdout = StringIO()
×
52
    
53
    def __exit__(self, exc_type, exc_val, exc_tb):
1✔
54
        sys.stdout = self._original_stdout
×
55

56

57
def decode_filter(text, encoding='utf-8'):
1✔
58
    """
59
    decode a binary object to str and filter out non-printable characters
60
    
61
    Parameters
62
    ----------
63
    text: bytes
64
        the binary object to be decoded
65
    encoding: str
66
        the encoding to be used
67

68
    Returns
69
    -------
70
    str
71
        the decoded and filtered string
72
    """
73
    if text is not None:
1✔
74
        text = text.decode(encoding, errors='ignore')
1✔
75
        printable = set(string.printable)
1✔
76
        text = filter(lambda x: x in printable, text)
1✔
77
        return ''.join(list(text))
1✔
78
    else:
79
        return None
1✔
80

81

82
def dictmerge(x, y):
1✔
83
    """
84
    merge two dictionaries
85
    """
86
    z = x.copy()
1✔
87
    z.update(y)
1✔
88
    return z
1✔
89

90

91
# todo consider using itertools.chain like in function finder
92
def dissolve(inlist):
1✔
93
    """
94
    list and tuple flattening
95
    
96
    Parameters
97
    ----------
98
    inlist: list
99
        the list with sub-lists or tuples to be flattened
100
    
101
    Returns
102
    -------
103
    list
104
        the flattened result
105
    
106
    Examples
107
    --------
108
    >>> dissolve([[1, 2], [3, 4]])
109
    [1, 2, 3, 4]
110
    
111
    >>> dissolve([(1, 2, (3, 4)), [5, (6, 7)]])
112
    [1, 2, 3, 4, 5, 6, 7]
113
    """
114
    out = []
1✔
115
    for i in inlist:
1✔
116
        i = list(i) if isinstance(i, tuple) else i
1✔
117
        out.extend(dissolve(i)) if isinstance(i, list) else out.append(i)
1✔
118
    return out
1✔
119

120

121
def parent_dirs(path: str) -> Iterable[str]:
1✔
122
    """
123
    generator that yields parent directories of a zipfile path
124

125
    Parameters
126
    ----------
127
    path: str
128
        a path to get parent directories from
129

130
    Yields
131
    -------
132
    Iterable[str]
133
        generator of parent directories
134
    """
135
    parent = os.path.dirname(path)
1✔
136
    if parent:
1✔
137
        parent_dirs(parent)
1✔
138
        yield parent + "/"
1✔
139

140

141
def namelist_with_implicit_dirs(root: zf.ZipFile) -> List[str]:
1✔
142
    """
143
    returns a list of files in zipfile archive, including implicit directories
144

145
    Parameters
146
    ----------
147
    root: ZipFile
148
        zipfile archive get namelist from
149

150
    Returns
151
    -------
152
    List[str]
153
        list of zipfile folders and files in the archive
154
    """
155
    complete_namelist = set()
1✔
156
    for file_name in root.namelist():
1✔
157
        complete_namelist.update(set(parent_dirs(file_name)))
1✔
158
        complete_namelist.add(file_name)
1✔
159
    
160
    return list(complete_namelist)
1✔
161

162

163
def finder(target, matchlist, foldermode=0, regex=False, recursive=True):
1✔
164
    """
165
    function for finding files/folders in folders and their subdirectories
166

167
    Parameters
168
    ----------
169
    target: str or list[str]
170
        a directory, zip- or tar-archive or a list of them to be searched
171
    matchlist: list[str]
172
        a list of search patterns
173
    foldermode: int
174
        * 0: only files
175
        * 1: files and folders
176
        * 2: only folders
177
    regex: bool
178
        are the search patterns in matchlist regular expressions or unix shell standard (default)?
179
    recursive: bool
180
        search target recursively into all subdirectories or only in the top level?
181
        This is currently only implemented for parameter `target` being a directory.
182

183
    Returns
184
    -------
185
    list[str]
186
        the absolute names of files/folders matching the patterns
187
    """
188
    if foldermode not in [0, 1, 2]:
1✔
189
        raise ValueError("'foldermode' must be either 0, 1 or 2")
1✔
190
    
191
    # match patterns
192
    if isinstance(target, str):
1✔
193
        
194
        pattern = r'|'.join(matchlist if regex else [fnmatch.translate(x) for x in matchlist])
1✔
195
        
196
        if os.path.isdir(target):
1✔
197
            if recursive:
1✔
198
                out = dissolve([[os.path.join(root, x)
1✔
199
                                 for x in dirs + files
200
                                 if re.search(pattern, x)]
201
                                for root, dirs, files in os.walk(target)])
202
            else:
203
                out = [os.path.join(target, x)
1✔
204
                       for x in os.listdir(target)
205
                       if re.search(pattern, x)]
206
            
207
            if foldermode == 0:
1✔
208
                out = [x for x in out if not os.path.isdir(x)]
1✔
209
            if foldermode == 2:
1✔
210
                out = [x for x in out if os.path.isdir(x)]
1✔
211
            
212
            return sorted(out)
1✔
213
        
214
        elif os.path.isfile(target):
1✔
215
            if zf.is_zipfile(target):
1✔
216
                with zf.ZipFile(target, 'r') as zip:
1✔
217
                    out = [os.path.join(target, name)
1✔
218
                           for name in namelist_with_implicit_dirs(zip)
219
                           if re.search(pattern, os.path.basename(name.strip('/')))]
220
                
221
                if foldermode == 0:
1✔
222
                    out = [x for x in out if not x.endswith('/')]
1✔
223
                elif foldermode == 1:
1✔
224
                    out = [x.strip('/') for x in out]
1✔
225
                elif foldermode == 2:
1✔
226
                    out = [x.strip('/') for x in out if x.endswith('/')]
1✔
227
                
228
                return sorted(out)
1✔
229
            
230
            elif tf.is_tarfile(target):
1✔
231
                tar = tf.open(target)
1✔
232
                out = [name for name in tar.getnames()
1✔
233
                       if re.search(pattern, os.path.basename(name.strip('/')))]
234
                
235
                if foldermode == 0:
1✔
236
                    out = [x for x in out if not tar.getmember(x).isdir()]
1✔
237
                elif foldermode == 2:
1✔
238
                    out = [x for x in out if tar.getmember(x).isdir()]
1✔
239
                
240
                tar.close()
1✔
241
                
242
                out = [os.path.join(target, x) for x in out]
1✔
243
                
244
                return sorted(out)
1✔
245
            
246
            else:
247
                raise RuntimeError("if parameter 'target' is a file, "
1✔
248
                                   "it must be a zip or tar archive:\n    {}"
249
                                   .format(target))
250
        else:
251
            raise RuntimeError("if parameter 'target' is of type str, "
1✔
252
                               "it must be a directory or a file:\n    {}"
253
                               .format(target))
254
    
255
    elif isinstance(target, list):
1✔
256
        groups = [finder(x, matchlist, foldermode, regex, recursive) for x in target]
1✔
257
        return list(itertools.chain(*groups))
1✔
258
    
259
    else:
260
        raise TypeError("parameter 'target' must be of type str or list")
1✔
261

262

263
def multicore(function, cores, multiargs, pbar=False, **singleargs):
1✔
264
    """
265
    wrapper for multicore process execution
266

267
    Parameters
268
    ----------
269
    function
270
        individual function to be applied to each process item
271
    cores: int
272
        the number of subprocesses started/CPUs used;
273
        this value is reduced in case the number of subprocesses is smaller
274
    multiargs: dict
275
        a dictionary containing sub-function argument names as keys and lists of arguments to be
276
        distributed among the processes as values
277
    pbar: bool
278
        add a progress bar? Does not yet work on Windows.
279
    singleargs
280
        all remaining arguments which are invariant among the subprocesses
281

282
    Returns
283
    -------
284
    None or list
285
        the return of the function for all subprocesses
286

287
    Notes
288
    -----
289
    - all `multiargs` value lists must be of same length, i.e. all argument keys must be explicitly defined for each
290
      subprocess
291
    - all function arguments passed via `singleargs` must be provided with the full argument name and its value
292
      (i.e. argname=argval); default function args are not accepted
293
    - if the processes return anything else than None, this function will return a list of results
294
    - if all processes return None, this function will be of type void
295

296
    Examples
297
    --------
298
    >>> def add(x, y, z):
299
    >>>     return x + y + z
300
    >>> multicore(add, cores=2, multiargs={'x': [1, 2]}, y=5, z=9)
301
    [15, 16]
302
    >>> multicore(add, cores=2, multiargs={'x': [1, 2], 'y': [5, 6]}, z=9)
303
    [15, 17]
304

305
    See Also
306
    --------
307
    :mod:`pathos.multiprocessing`
308
    """
309
    tblib.pickling_support.install()
1✔
310
    
311
    # compare the function arguments with the multi and single arguments and raise errors if mismatches occur
312
    check = inspect.getfullargspec(function)
1✔
313
    varkw = check.varkw
1✔
314
    
315
    if not check.varargs and not varkw:
1✔
316
        multiargs_check = [x for x in multiargs if x not in check.args]
1✔
317
        singleargs_check = [x for x in singleargs if x not in check.args]
1✔
318
        if len(multiargs_check) > 0:
1✔
319
            raise AttributeError('incompatible multi arguments: {0}'.format(', '.join(multiargs_check)))
1✔
320
        if len(singleargs_check) > 0:
1✔
321
            raise AttributeError('incompatible single arguments: {0}'.format(', '.join(singleargs_check)))
1✔
322
    
323
    # compare the list lengths of the multi arguments and raise errors if they are of different length
324
    arglengths = list(set([len(multiargs[x]) for x in multiargs]))
1✔
325
    if len(arglengths) > 1:
1✔
326
        raise AttributeError('multi argument lists of different length')
1✔
327
    if arglengths[0] == 0:
1✔
328
        raise RuntimeError('did not get any multiargs')
×
329
    
330
    # prevent starting more threads than necessary
331
    cores = cores if arglengths[0] >= cores else arglengths[0]
1✔
332
    
333
    # create a list of dictionaries each containing the arguments for individual
334
    # function calls to be passed to the multicore processes
335
    processlist = [dictmerge(dict([(arg, multiargs[arg][i]) for arg in multiargs]), singleargs)
1✔
336
                   for i in range(len(multiargs[list(multiargs.keys())[0]]))]
337
    
338
    if platform.system() == 'Windows':
1✔
339
        
340
        # in Windows parallel processing needs to strictly be in a "if __name__ == '__main__':" wrapper
341
        # it was thus necessary to outsource this to a different script and try to serialize all input for sharing objects
342
        # https://stackoverflow.com/questions/38236211/why-multiprocessing-process-behave-differently-on-windows-and-linux-for-global-o
343
        
344
        # a helper script to perform the parallel processing
345
        script = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'multicore_helper.py')
×
346
        
347
        # a temporary file to write the serialized function variables
348
        tmpfile = os.path.join(tempfile.gettempdir(), 'spatialist_dump')
×
349
        
350
        # check if everything can be serialized
351
        if not dill.pickles([function, cores, processlist]):
×
352
            raise RuntimeError('cannot fully serialize function arguments;\n'
×
353
                               ' see https://github.com/uqfoundation/dill for supported types')
354
        
355
        # write the serialized variables
356
        with open(tmpfile, 'wb') as tmp:
×
357
            dill.dump([function, cores, processlist], tmp, byref=False)
×
358
        
359
        # run the helper script
360
        proc = sp.Popen([sys.executable, script], stdin=sp.PIPE, stderr=sp.PIPE)
×
361
        out, err = proc.communicate()
×
362
        if proc.returncode != 0:
×
363
            raise RuntimeError(err.decode())
×
364
        
365
        # retrieve the serialized output of the processing which was written to the temporary file by the helper script
366
        with open(tmpfile, 'rb') as tmp:
×
367
            result = dill.load(tmp)
×
368
        return result
×
369
    else:
370
        def wrapper(**kwargs):
1✔
371
            try:
×
372
                # hide print messages in the sub-processes
373
                with HiddenPrints():
×
374
                    out = function(**kwargs)
×
375
                return out
×
376
            except Exception as e:
×
377
                return ExceptionWrapper(e)
×
378
        
379
        jobs = len(processlist)
1✔
380
        progress = None
1✔
381
        chunksize, remainder = divmod(jobs, cores * 4)
1✔
382
        if remainder:
1✔
383
            chunksize += 1
1✔
384
        
385
        if pbar:
1✔
386
            widgets = [pb.Percentage(), pb.Bar(), pb.Timer(), ' ', pb.ETA()]
×
387
            progress = pb.ProgressBar(max_value=jobs, widgets=widgets).start()
×
388
        
389
        with mp.ProcessPool(processes=cores) as pool:
1✔
390
            results = pool.amap(lambda x: wrapper(**x), processlist)
1✔
391
            while not results.ready():
1✔
392
                left = results._number_left * chunksize
1✔
393
                done = jobs - left if left <= jobs else 0
1✔
394
                if pbar:
1✔
395
                    progress.update(done)
×
396
            results = results.get()
1✔
397
        
398
        if progress is not None:
1✔
399
            progress.finish()
×
400
        
401
        i = 0
1✔
402
        out = []
1✔
403
        for item in results:
1✔
404
            if isinstance(item, ExceptionWrapper):
1✔
405
                item.ee = type(item.ee)(str(item.ee) +
×
406
                                        "\n(called function '{}' with args {})"
407
                                        .format(function.__name__, processlist[i]))
408
                raise (item.re_raise())
×
409
            out.append(item)
1✔
410
            i += 1
1✔
411
        
412
        # evaluate the return of the processing function;
413
        # if any value is not None then the whole list of results is returned
414
        eval = [x for x in out if x is not None]
1✔
415
        if len(eval) == 0:
1✔
416
            return None
1✔
417
        else:
418
            return out
1✔
419

420

421
def add(x, y, z):
1✔
422
    """
423
    only a dummy function for testing the multicore function
424
    defining it in the test script is not possible since it cannot be serialized
425
    with a reference module that does not exist (i.e. the test script)
426
    """
427
    return x + y + z
×
428

429

430
class ExceptionWrapper(object):
1✔
431
    """
432
    | class for enabling traceback pickling in function multiprocess
433
    | https://stackoverflow.com/questions/6126007/python-getting-a-traceback-from-a-multiprocessing-process
434
    | https://stackoverflow.com/questions/34463087/valid-syntax-in-both-python-2-x-and-3-x-for-raising-exception
435
    """
436
    
437
    def __init__(self, ee):
1✔
438
        self.ee = ee
×
439
        __, __, self.tb = sys.exc_info()
×
440
    
441
    def re_raise(self):
1✔
442
        if sys.version_info[0] == 3:
×
443
            def reraise(tp, value, tb=None):
×
444
                raise tp.with_traceback(tb)
×
445
        else:
446
            exec("def reraise(tp, value, tb=None):\n    raise tp, value, tb\n")
×
447
        reraise(self.ee, None, self.tb)
×
448

449

450
def parse_literal(x):
1✔
451
    """
452
    return the smallest possible data type for a string or list of strings
453

454
    Parameters
455
    ----------
456
    x: str or list
457
        a string to be parsed
458

459
    Returns
460
    -------
461
    int, float or str
462
        the parsing result
463
    
464
    Examples
465
    --------
466
    >>> isinstance(parse_literal('1.5'), float)
467
    True
468
    
469
    >>> isinstance(parse_literal('1'), int)
470
    True
471
    
472
    >>> isinstance(parse_literal('foobar'), str)
473
    True
474
    """
475
    if isinstance(x, list):
1✔
476
        return [parse_literal(y) for y in x]
1✔
477
    elif isinstance(x, (bytes, str)):
1✔
478
        try:
1✔
479
            return int(x)
1✔
480
        except ValueError:
1✔
481
            try:
1✔
482
                return float(x)
1✔
483
            except ValueError:
1✔
484
                return x
1✔
485
    else:
486
        raise TypeError('input must be a string or a list of strings')
1✔
487

488

489
class Queue(object):
1✔
490
    """
491
    classical queue implementation
492
    """
493
    
494
    def __init__(self, inlist=None):
1✔
495
        self.stack = [] if inlist is None else inlist
1✔
496
    
497
    def empty(self):
1✔
498
        return len(self.stack) == 0
1✔
499
    
500
    def length(self):
1✔
501
        return len(self.stack)
1✔
502
    
503
    def push(self, x):
1✔
504
        self.stack.append(x)
1✔
505
    
506
    def pop(self):
1✔
507
        if not self.empty():
1✔
508
            val = self.stack[0]
1✔
509
            del self.stack[0]
1✔
510
            return val
1✔
511

512

513
def rescale(inlist, newrange=(0, 1)):
1✔
514
    """
515
    rescale the values in a list between the values in newrange (a tuple with the new minimum and maximum)
516
    """
517
    OldMax = max(inlist)
1✔
518
    OldMin = min(inlist)
1✔
519
    
520
    if OldMin == OldMax:
1✔
521
        raise RuntimeError('list contains of only one unique value')
1✔
522
    
523
    OldRange = OldMax - OldMin
1✔
524
    NewRange = newrange[1] - newrange[0]
1✔
525
    result = [(((float(x) - OldMin) * NewRange) / OldRange) + newrange[0] for x in inlist]
1✔
526
    return result
1✔
527

528

529
def run(cmd, outdir=None, logfile=None, inlist=None, void=True, errorpass=False, env=None):
1✔
530
    """
531
    | wrapper for subprocess execution including logfile writing and command prompt piping
532
    | this is a convenience wrapper around the :mod:`subprocess` module and calls
533
      its class :class:`~subprocess.Popen` internally.
534
    
535
    Parameters
536
    ----------
537
    cmd: list
538
        the command arguments
539
    outdir: str or None
540
        the directory to execute the command in
541
    logfile: str or None
542
        a file to write stdout to
543
    inlist: list or None
544
        a list of arguments passed to stdin, i.e. arguments passed to interactive input of the program
545
    void: bool
546
        return stdout and stderr?
547
    errorpass: bool
548
        if False, a :class:`subprocess.CalledProcessError` is raised if the command fails
549
    env: dict or None
550
        the environment to be passed to the subprocess
551

552
    Returns
553
    -------
554
    None or Tuple
555
        a tuple of (stdout, stderr) if `void` is False otherwise None
556
    """
557
    cmd = [str(x) for x in dissolve(cmd)]
1✔
558
    if outdir is None:
1✔
559
        outdir = os.getcwd()
1✔
560
    log = sp.PIPE if logfile is None else open(logfile, 'a')
1✔
561
    proc = sp.Popen(cmd, stdin=sp.PIPE, stdout=log, stderr=sp.PIPE, cwd=outdir, env=env)
1✔
562
    instream = None if inlist is None \
1✔
563
        else ''.join([str(x) + '\n' for x in inlist]).encode('utf-8')
564
    out, err = proc.communicate(instream)
1✔
565
    out = decode_filter(out)
1✔
566
    err = decode_filter(err)
1✔
567
    if not errorpass and proc.returncode != 0:
1✔
568
        raise sp.CalledProcessError(proc.returncode, cmd, err)
1✔
569
    # add line for separating log entries of repeated function calls
570
    if logfile:
1✔
571
        log.write('#####################################################################\n')
1✔
572
        log.close()
1✔
573
    if not void:
1✔
574
        return out, err
1✔
575

576

577
class Stack(object):
1✔
578
    """
579
    classical stack implementation
580
    input can be a list, a single value or None (i.e. Stack())
581
    """
582
    
583
    def __init__(self, inlist=None):
1✔
584
        if isinstance(inlist, list):
1✔
585
            self.stack = inlist
1✔
586
        elif inlist is None:
1✔
587
            self.stack = []
1✔
588
        else:
589
            self.stack = [inlist]
1✔
590
    
591
    def empty(self):
1✔
592
        """
593
        check whether stack is empty
594
        """
595
        return len(self.stack) == 0
1✔
596
    
597
    def flush(self):
1✔
598
        """
599
        empty the stack
600
        """
601
        self.stack = []
1✔
602
    
603
    def length(self):
1✔
604
        """
605
        get the length of the stack
606
        """
607
        return len(self.stack)
1✔
608
    
609
    def push(self, x):
1✔
610
        """
611
        append items to the stack; input can be a single value or a list
612
        """
613
        if isinstance(x, list):
1✔
614
            for item in x:
1✔
615
                self.stack.append(item)
1✔
616
        else:
617
            self.stack.append(x)
1✔
618
    
619
    def pop(self):
1✔
620
        """
621
        return the last stack element and delete it from the list
622
        """
623
        if not self.empty():
1✔
624
            val = self.stack[-1]
1✔
625
            del self.stack[-1]
1✔
626
            return val
1✔
627

628

629
def union(a, b):
1✔
630
    """
631
    union of two lists
632
    """
633
    return list(set(a) & set(b))
1✔
634

635

636
def urlQueryParser(url, querydict):
1✔
637
    """
638
    parse a url query
639
    """
640
    address_parse = urlparse(url)
1✔
641
    return urlunparse(address_parse._replace(query=urlencode(querydict)))
1✔
642

643

644
def which(program, mode=os.F_OK | os.X_OK):
1✔
645
    """
646
    | mimics UNIX's which
647
    | taken from this post: http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
648
    | can be replaced by :func:`shutil.which()` starting from Python 3.3
649
    
650
    Parameters
651
    ----------
652
    program: str
653
        the program to be found
654
    mode: os.F_OK or os.X_OK
655
        the mode of the found file, i.e. file exists or file  is executable; see :func:`os.access`
656

657
    Returns
658
    -------
659
    str or None
660
        the full path and name of the command
661
    """
662
    if sys.version_info >= (3, 3):
1✔
663
        return shutil.which(program, mode=mode)
1✔
664
    else:
665
        def is_exe(fpath, mode):
×
666
            return os.path.isfile(fpath) and os.access(fpath, mode)
×
667
        
668
        fpath, fname = os.path.split(program)
×
669
        if fpath:
×
670
            if is_exe(program, mode):
×
671
                return program
×
672
        else:
673
            for path in os.environ['PATH'].split(os.path.pathsep):
×
674
                path = path.strip('"')
×
675
                exe_files = [os.path.join(path, program), os.path.join(path, program + '.exe')]
×
676
                for exe_file in exe_files:
×
677
                    if is_exe(exe_file, mode):
×
678
                        return exe_file
×
679
        return None
×
680

681

682
def parallel_apply_along_axis(func1d, axis, arr, cores=4, *args, **kwargs):
1✔
683
    """
684
    Like :func:`numpy.apply_along_axis()` but using multiple threads.
685
    Adapted from `here <https://stackoverflow.com/questions/45526700/
686
    easy-parallelization-of-numpy-apply-along-axis>`_.
687

688
    Parameters
689
    ----------
690
    func1d: function
691
        the function to be applied
692
    axis: int
693
        the axis along which to apply `func1d`
694
    arr: numpy.ndarray
695
        the input array
696
    cores: int
697
        the number of parallel cores
698
    args: any
699
        Additional arguments to `func1d`.
700
    kwargs: any
701
        Additional named arguments to `func1d`.
702

703
    Returns
704
    -------
705
    numpy.ndarray
706
    """
707
    # Effective axis where apply_along_axis() will be applied by each
708
    # worker (any non-zero axis number would work, so as to allow the use
709
    # of `np.array_split()`, which is only done on axis 0):
710
    effective_axis = 1 if axis == 0 else axis
1✔
711
    if effective_axis != axis:
1✔
712
        arr = arr.swapaxes(axis, effective_axis)
×
713
    
714
    def unpack(arguments):
1✔
715
        func1d, axis, arr, args, kwargs = arguments
×
716
        return np.apply_along_axis(func1d, axis, arr, *args, **kwargs)
×
717
    
718
    if cores <= 0:
1✔
719
        raise ValueError('cores must be larger than 0')
×
720
    elif cores == 1:
1✔
721
        return np.apply_along_axis(func1d, axis, arr, *args, **kwargs)
×
722
    else:
723
        chunks = [(func1d, effective_axis, sub_arr, args, kwargs)
1✔
724
                  for sub_arr in np.array_split(arr, mp.cpu_count())]
725
        
726
        pool = mp.Pool(cores)
1✔
727
        individual_results = pool.map(unpack, chunks)
1✔
728
        # Freeing the workers:
729
        pool.close()
1✔
730
        pool.join()
1✔
731
        
732
        return np.concatenate(individual_results)
1✔
733

734

735
def sampler(mask, samples=None, dim=1, replace=False, seed=42):
1✔
736
    """
737
    General function to select random sample indexes from arrays.
738
    Adapted from package `S1_ARD <https://github.com/johntruckenbrodt/S1_ARD>`_.
739

740
    Parameters
741
    ----------
742
    mask: numpy.ndarray
743
        A 2D boolean mask to limit the sample selection.
744
    samples: int or None
745
        The number of samples to select. If None, the positions of all matching values are returned.
746
        If there are fewer values than required samples, the positions of all values are returned.
747
    dim: int
748
        The dimensions of the output array and its indexes. If 1, the returned array has one
749
        dimension and the indexes refer to the one-dimensional (i.e., flattened) representation
750
        of the input mask. If 2, the output array is of shape `(2, samples)` with two separate
751
        2D arrays for y (index 0) and x respectively, which reference positions in the original
752
        2D shape of the input array.
753
    replace: bool
754
        Draw samples with or without replacement?
755
    seed: int
756
        Seed used to initialize the pseudo-random number generator.
757
    
758
    Returns
759
    -------
760
    numpy.ndarray
761
        The index positions of the generated random samples as 1D or 2D array.
762
    
763
    Examples
764
    --------
765
    >>> import numpy as np
766
    >>> from spatialist.ancillary import sampler
767
    >>> array = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
768
    >>> mask = array > 2
769
    >>> s1d = sampler(mask=mask, samples=2, dim=1)
770
    >>> s2d = sampler(mask=mask, samples=2, dim=2)
771
    >>> print(s1d)
772
    [2 3]
773
    >>> print(s2d)
774
    [[1 1]
775
     [0 1]]
776
    >>> print(array.flatten()[s1d] == array[s2d[0], s2d[1]])
777
    [ True  True]
778
    
779
    See Also
780
    --------
781
    numpy.random.seed
782
    numpy.random.choice
783
    """
784
    cols, rows = mask.shape
×
785
    indices = np.where(mask.flatten())[0]
×
786
    samplesize = min(indices.size, samples) if samples is not None else indices.size
×
787
    np.random.seed(seed)
×
788
    sample_ids = np.random.choice(a=indices, size=samplesize, replace=replace)
×
789
    if dim == 1:
×
790
        return sample_ids
×
791
    elif dim == 2:
×
792
        out = np.ndarray(shape=(2, samples), dtype=np.uint)
×
793
        out[0] = sample_ids // rows
×
794
        out[1] = sample_ids % rows
×
795
        return out
×
796
    else:
797
        raise ValueError("'dim' must either be 1 or 2")
×
798

799

800
def ogr_datetime_to_pandas(ogr_dt):
1✔
801
    """
802
    Convert an OGR DateTime tuple to a pandas Timestamp.
803

804
    Parameters
805
    ----------
806
    ogr_dt : tuple
807
        A 7-element tuple in the format (year, month, day, hour, minute,
808
        second, tz_flag) as returned by :meth:`osgeo.ogr.Feature.GetFieldAsDateTime`.
809

810
    Returns
811
    -------
812
    pandas.Timestamp
813
        A pandas Timestamp object representing the input datetime.
814

815
    Notes
816
    -----
817
    The `tz_flag` is interpreted as follows:
818
    
819
    - 0: Unknown timezone (returns naive timestamp).
820
    - 1: Local time (returns naive timestamp).
821
    - 100: UTC.
822
    - >100: Offset from UTC in minutes (applied to UTC).
823

824
    Examples
825
    --------
826
    >>> ogr_datetime_to_pandas((2024, 9, 1, 11, 16, 2, 100))
827
    Timestamp('2024-09-01 11:16:02+0000', tz='UTC')
828
    
829
    Raises
830
    ------
831
    RuntimeError
832
    """
833
    
834
    year, month, day, hour, minute, second, tz_flag = ogr_dt
1✔
835
    
836
    try:
1✔
837
        dt = pd.Timestamp(year, month, day, hour, minute, int(second))
1✔
838
        if tz_flag == 100:
1✔
NEW
839
            return dt.tz_localize('UTC')
×
840
        elif tz_flag > 100:
1✔
NEW
841
            offset_min = tz_flag - 100
×
NEW
842
            offset = pd.Timedelta(minutes=offset_min)
×
NEW
843
            return dt.tz_localize('UTC') + offset
×
844
        else:
845
            return dt
1✔
NEW
846
    except Exception:
×
NEW
847
        raise RuntimeError('Failed to convert datetime to pandas Timestamp')
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc