• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

cuthbertLab / music21 / 5433206976

pending completion
5433206976

Pull #1240

github

web-flow
Merge a825cd291 into baf32cb57
Pull Request #1240: Apply tuplet to multiple components to express durations like 5/6 or 7/3 QL

40 of 40 new or added lines in 4 files covered. (100.0%)

80763 of 86820 relevant lines covered (93.02%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.48
/music21/common/stringTools.py
1
# -*- coding: utf-8 -*-
2
# ------------------------------------------------------------------------------
3
# Name:         common/stringTools.py
4
# Purpose:      Utilities for strings
5
#
6
# Authors:      Michael Scott Asato Cuthbert
7
#               Christopher Ariza
8
#
9
# Copyright:    Copyright © 2009-2015 Michael Scott Asato Cuthbert
10
# License:      BSD, see license.txt
11
# ------------------------------------------------------------------------------
12
'''
1✔
13
Tools for working with strings
14
'''
15
from __future__ import annotations
1✔
16

17
__all__ = [
1✔
18
    'whitespaceEqual',
19
    'getNumFromStr',
20
    'hyphenToCamelCase',
21
    'camelCaseToHyphen',
22
    'spaceCamelCase',
23
    'getMd5',
24
    'formatStr',
25
    'stripAccents',
26
    'normalizeFilename',
27
    'removePunctuation',
28
    'parenthesesMatch',
29
    'ParenthesesMatch',
30
]
31

32
import dataclasses
1✔
33
import hashlib
1✔
34
import random
1✔
35
import re
1✔
36
import time
1✔
37
import string
1✔
38
import unicodedata
1✔
39

40
# ------------------------------------------------------------------------------
41
WHITESPACE = re.compile(r'\s+')
1✔
42
LINEFEED = re.compile('\n+')
1✔
43

44

45
def whitespaceEqual(a: str, b: str) -> bool:
1✔
46
    # noinspection PyShadowingNames
47
    r'''
48
    returns True if a and b are equal except for whitespace differences
49

50
    >>> a = '    hello \n there '
51
    >>> b = 'hello there'
52
    >>> c = ' bye there '
53
    >>> common.whitespaceEqual(a, b)
54
    True
55
    >>> common.whitespaceEqual(a, c)
56
    False
57
    '''
58
    a = WHITESPACE.sub('', a)
1✔
59
    b = WHITESPACE.sub('', b)
1✔
60
    a = LINEFEED.sub('', a)
1✔
61
    b = LINEFEED.sub('', b)
1✔
62
    if a == b:
1✔
63
        return True
1✔
64
    else:
65
        return False
1✔
66

67

68
def getNumFromStr(usrStr: str, numbers: str = '0123456789') -> tuple[str, str]:
1✔
69
    '''
70
    Given a string, extract any numbers.
71
    Return two strings, the numbers (as strings) and the remaining characters.
72

73
    >>> common.getNumFromStr('23a')
74
    ('23', 'a')
75
    >>> common.getNumFromStr('23a954Hello')
76
    ('23954', 'aHello')
77
    >>> common.getNumFromStr('')
78
    ('', '')
79
    '''
80
    found = []
1✔
81
    remain = []
1✔
82
    for char in usrStr:
1✔
83
        if char in numbers:
1✔
84
            found.append(char)
1✔
85
        else:
86
            remain.append(char)
1✔
87
    # returns numbers and then characters
88
    return ''.join(found), ''.join(remain)
1✔
89

90

91
def hyphenToCamelCase(usrStr: str, replacement: str = '-') -> str:
1✔
92
    '''
93
    Given a hyphen-connected-string, change it to
94
    a camelCaseConnectedString.
95

96
    The replacement can be specified to be something besides a hyphen.
97

98
    >>> common.hyphenToCamelCase('movement-name')
99
    'movementName'
100

101
    >>> common.hyphenToCamelCase('movement_name', replacement='_')
102
    'movementName'
103

104
    Safe to call on a string lacking the replacement character:
105

106
    >>> common.hyphenToCamelCase('voice')
107
    'voice'
108

109
    And on "words" beginning with numbers:
110

111
    >>> common.hyphenToCamelCase('music-21')
112
    'music21'
113
    '''
114
    post = ''
1✔
115
    for i, word in enumerate(usrStr.split(replacement)):
1✔
116
        if i == 0:
1✔
117
            post = word
1✔
118
        else:
119
            post += word.capitalize()
1✔
120
    return post
1✔
121

122

123
def camelCaseToHyphen(usrStr: str, replacement: str = '-') -> str:
1✔
124
    # pylint: disable=line-too-long
125
    '''
126
    Given a camel-cased string, or a mixture of numbers and characters,
127
    create a space separated string.
128

129
    The replacement can be specified to be something besides a hyphen, but only
130
    a single character and not (for internal reasons) an uppercase character.
131

132
    code from https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case
133

134
    >>> common.camelCaseToHyphen('movementName')
135
    'movement-name'
136

137
    First letter can be uppercase as well:
138

139
    >>> common.camelCaseToHyphen('MovementName')
140
    'movement-name'
141

142
    >>> common.camelCaseToHyphen('movementNameName')
143
    'movement-name-name'
144

145
    >>> common.camelCaseToHyphen('fileName', replacement='_')
146
    'file_name'
147

148
    Some things you cannot do:
149

150
    >>> common.camelCaseToHyphen('fileName', replacement='NotFound')
151
    Traceback (most recent call last):
152
    ValueError: Replacement must be a single character.
153

154
    >>> common.camelCaseToHyphen('fileName', replacement='A')
155
    Traceback (most recent call last):
156
    ValueError: Replacement cannot be an uppercase character.
157
    '''
158
    if len(replacement) != 1:
1✔
159
        raise ValueError('Replacement must be a single character.')
1✔
160
    if replacement.lower() != replacement:
1✔
161
        raise ValueError('Replacement cannot be an uppercase character.')
1✔
162
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1' + replacement + r'\2', usrStr)
1✔
163
    return re.sub('([a-z0-9])([A-Z])', r'\1' + replacement + r'\2', s1).lower()
1✔
164

165

166
def spaceCamelCase(usrStr: str, replaceUnderscore=True, fixMeList=None) -> str:
1✔
167
    '''
168
    Given a camel-cased string, or a mixture of numbers and characters,
169
    create a space separated string.
170

171
    If replaceUnderscore is True (default) then underscores also become spaces (but without the _)
172

173
    >>> common.spaceCamelCase('thisIsATest')
174
    'this Is A Test'
175
    >>> common.spaceCamelCase('ThisIsATest')
176
    'This Is A Test'
177
    >>> common.spaceCamelCase('movement3')
178
    'movement 3'
179
    >>> common.spaceCamelCase('opus41no1')
180
    'opus 41 no 1'
181
    >>> common.spaceCamelCase('opus23402no219235')
182
    'opus 23402 no 219235'
183
    >>> common.spaceCamelCase('opus23402no219235').title()
184
    'Opus 23402 No 219235'
185

186
    There is a small list called fixMeList that can fix mistakes.
187

188
    >>> common.spaceCamelCase('PMFC22')
189
    'PMFC 22'
190

191
    >>> common.spaceCamelCase('hello_myke')
192
    'hello myke'
193
    >>> common.spaceCamelCase('hello_myke', replaceUnderscore=False)
194
    'hello_myke'
195
    '''
196
    numbers = '0123456789.'
1✔
197
    firstNum = False
1✔
198
    firstChar = False
1✔
199
    isNumber = False
1✔
200
    lastIsNum = False
1✔
201
    post: list[str] = []
1✔
202

203
    # do not split these...
204
    if fixMeList is None:
1✔
205
        fixupList = ('PMFC',)
1✔
206
    else:
207
        fixupList = fixMeList
×
208

209
    for char in usrStr:
1✔
210
        if char in numbers:
1✔
211
            isNumber = True
1✔
212
        else:
213
            isNumber = False
1✔
214

215
        if isNumber and not firstNum and not lastIsNum:
1✔
216
            firstNum = True
1✔
217
        else:
218
            firstNum = False
1✔
219

220
        # for chars
221
        if not isNumber and not firstChar and lastIsNum:
1✔
222
            firstChar = True
1✔
223
        else:
224
            firstChar = False
1✔
225

226
        if post:
1✔
227
            if char.isupper() or firstNum or firstChar:
1✔
228
                post.append(' ')
1✔
229
            post.append(char)
1✔
230
        else:  # first character
231
            post.append(char)
1✔
232

233
        if isNumber:
1✔
234
            lastIsNum = True
1✔
235
        else:
236
            lastIsNum = False
1✔
237
    postStr = ''.join(post)
1✔
238
    for fixMe in fixupList:
1✔
239
        fixMeSpaced = ' '.join(fixMe)
1✔
240
        postStr = postStr.replace(fixMeSpaced, fixMe)
1✔
241

242
    if replaceUnderscore:
1✔
243
        postStr = postStr.replace('_', ' ')
1✔
244
    return postStr
1✔
245

246

247
def getMd5(value=None) -> str:
1✔
248
    # noinspection SpellCheckingInspection
249
    '''
250
    Return an md5 hash from a string.  If no value is given then
251
    the current time plus a random number is encoded.
252

253
    >>> common.getMd5('test')
254
    '098f6bcd4621d373cade4e832627b4f6'
255
    '''
256
    if value is None:
1✔
257
        value = str(time.time()) + str(random.random())
1✔
258
    m = hashlib.md5()
1✔
259
    try:
1✔
260
        m.update(value)
1✔
261
    except TypeError:  # unicode...
1✔
262
        m.update(value.encode('UTF-8'))
1✔
263

264
    return m.hexdigest()
1✔
265

266

267
def formatStr(msg,
1✔
268
              *rest_of_message,
269
              **keywords) -> str:
270
    '''
271
    DEPRECATED: do not use.  May be removed at any time.
272

273
    Format one or more data elements into string suitable for printing
274
    straight to stderr or other outputs
275

276
    >>> a = common.formatStr('test', '1', 2, 3)
277
    >>> print(a)
278
    test 1 2 3
279
    <BLANKLINE>
280
    '''
281
    msg = [msg, *rest_of_message]
1✔
282
    for i in range(len(msg)):
1✔
283
        x = msg[i]
1✔
284
        if isinstance(x, bytes):
1✔
285
            msg[i] = x.decode('utf-8')
×
286
        if not isinstance(x, str):
1✔
287
            try:
1✔
288
                msg[i] = repr(x)
1✔
289
            except TypeError:
×
290
                try:
×
291
                    msg[i] = x.decode('utf-8')
×
292
                except AttributeError:
×
293
                    msg[i] = ''
×
294
    return ' '.join(msg) + '\n'
1✔
295

296

297
def stripAccents(inputString: str) -> str:
1✔
298
    r'''
299
    removes accents from unicode strings.
300

301
    >>> s = 'trés vite'
302
    >>> 'é' in s
303
    True
304
    >>> common.stripAccents(s)
305
    'tres vite'
306

307
    Also handles the German Eszett
308

309
    >>> common.stripAccents('Muß')
310
    'Muss'
311
    '''
312
    nfkd_form = unicodedata.normalize('NFKD', inputString).replace('ß', 'ss')
1✔
313
    return ''.join([c for c in nfkd_form if not unicodedata.combining(c)])
1✔
314

315

316
def normalizeFilename(name: str) -> str:
1✔
317
    '''
318
    take a name that might contain unicode characters, punctuation,
319
    or spaces and
320
    normalize it so that it is POSIX compliant (except for the limit
321
    on length).
322

323
    Takes in a string or unicode string and returns a string (unicode in Py3)
324
    without any accented characters.
325

326
    >>> common.normalizeFilename('03-Niccolò all’lessandra.not really.xml')
327
    '03-Niccolo_alllessandra_not_really.xml'
328
    '''
329
    extension = None
1✔
330
    lenName = len(name)
1✔
331

332
    if lenName > 5 and name[-4] == '.':
1✔
333
        extension = str(name[lenName - 4:])
1✔
334
        name = name[:lenName - 4]
1✔
335

336
    name = stripAccents(name)
1✔
337
    name = name.encode('ascii', 'ignore').decode('UTF-8')
1✔
338
    name = re.sub(r'[^\w-]', '_', name).strip()
1✔
339
    if extension is not None:
1✔
340
        name += extension
1✔
341
    return name
1✔
342

343

344
def removePunctuation(s: str) -> str:
1✔
345
    '''
346
    Remove all punctuation from a string.
347

348
    >>> common.removePunctuation('This, is! my (face).')
349
    'This is my face'
350
    '''
351
    maketrans = str.maketrans('', '', string.punctuation)
1✔
352
    out = s.translate(maketrans)
1✔
353
    return out
1✔
354

355
@dataclasses.dataclass
1✔
356
class ParenthesesMatch:
1✔
357
    start: int
1✔
358
    end: int
1✔
359
    text: str
1✔
360
    nested: list[ParenthesesMatch]
1✔
361

362
def parenthesesMatch(
1✔
363
    s: str,
364
    open: str = '(',  # pylint: disable=redefined-builtin
365
    close: str = ')',
366
) -> list[ParenthesesMatch]:
367
    r'''
368
    Utility tool to return a list of parentheses matches for a string using a dataclass
369
    called `ParenthesesMatch` which has indices of the `start` and `end`
370
    of the match, and the `text` of the match, and a set of `nested`
371
    ParenthesesMatch objects (which may have their own nested objects).
372

373
    >>> st = r'Bologne wrote (a (whole) (lot) \(of\)) sym\(ph(on)ies\) concertantes.'
374
    >>> common.stringTools.parenthesesMatch(st)
375
    [ParenthesesMatch(start=15, end=37, text='a (whole) (lot) \\(of\\)',
376
                      nested=[ParenthesesMatch(start=18, end=23, text='whole', nested=[]),
377
                              ParenthesesMatch(start=26, end=29, text='lot', nested=[])]),
378
     ParenthesesMatch(start=47, end=49, text='on', nested=[])]
379

380
    Other brackets can be used:
381

382
    >>> st = r'[Whammy bars] and [oboes] do [not [mix] very] [well.]'
383
    >>> common.stringTools.parenthesesMatch(st, open='[', close=']')
384
    [ParenthesesMatch(start=1, end=12, text='Whammy bars', nested=[]),
385
     ParenthesesMatch(start=19, end=24, text='oboes', nested=[]),
386
     ParenthesesMatch(start=30, end=44, text='not [mix] very',
387
                      nested=[ParenthesesMatch(start=35, end=38, text='mix', nested=[])]),
388
     ParenthesesMatch(start=47, end=52, text='well.', nested=[])]
389

390
    The `open` and `close` parameters can be multiple characters:
391

392
    >>> st = r'Did you eat <<beans>> today <<Pythagoreas<<?>>>>'
393
    >>> common.stringTools.parenthesesMatch(st, open='<<', close='>>')
394
    [ParenthesesMatch(start=14, end=19, text='beans', nested=[]),
395
     ParenthesesMatch(start=30, end=46, text='Pythagoreas<<?>>',
396
                      nested=[ParenthesesMatch(start=43, end=44, text='?', nested=[])])]
397

398
    They cannot, however, be empty:
399

400
    >>> common.stringTools.parenthesesMatch(st, open='', close='')
401
    Traceback (most recent call last):
402
    ValueError: Neither open nor close can be empty.
403

404
    Unmatched opening or closing parentheses will raise a ValueError:
405

406
    >>> common.stringTools.parenthesesMatch('My (parentheses (sometimes (continue',)
407
    Traceback (most recent call last):
408
    ValueError:  Opening '(' at index 3 was never closed
409

410
    >>> common.stringTools.parenthesesMatch('This is a <bad> example>', open='<', close='>')
411
    Traceback (most recent call last):
412
    ValueError: Closing '>' without '<' at index 23.
413

414
    Note that using multiple characters like a prefix can have unintended consequences:
415

416
    >>> st = r'[Pitch("C4"), [Pitch("D5"), Pitch("E6")], Pitch("Pity("Z9")")]'
417
    >>> common.stringTools.parenthesesMatch(st, open='Pitch("', close='")')
418
    Traceback (most recent call last):
419
    ValueError: Closing '")' without 'Pitch("' at index 59.
420

421
    So to do something like this, you might need to get creative:
422
    >>> out = common.stringTools.parenthesesMatch(st, open='("', close='")')
423
    >>> out
424
    [ParenthesesMatch(start=8, end=10, text='C4', nested=[]),
425
     ParenthesesMatch(start=22, end=24, text='D5', nested=[]),
426
     ParenthesesMatch(start=35, end=37, text='E6', nested=[]),
427
     ParenthesesMatch(start=49, end=59, text='Pity("Z9")',
428
                      nested=[ParenthesesMatch(start=55, end=57, text='Z9', nested=[])])]
429
    >>> extractedPitches = []
430
    >>> for match in out:
431
    ...     if st[match.start - 7:match.start] == 'Pitch("':
432
    ...          extractedPitches.append(match.text)
433
    >>> extractedPitches
434
    ['C4', 'D5', 'E6', 'Pity("Z9")']
435

436
    * New in v9.3.
437
    '''
438
    if not open or not close:
1✔
439
        raise ValueError('Neither open nor close can be empty.')
1✔
440

441
    mainMatch = ParenthesesMatch(-1, -1, '', [])
1✔
442
    stack: list[ParenthesesMatch] = [mainMatch]
1✔
443

444
    lastCharWasBackslash = False
1✔
445

446
    i = 0
1✔
447
    while i < len(s):
1✔
448
        if (not lastCharWasBackslash
1✔
449
                and s[i:i + len(open)] == open):
450
            curPM = ParenthesesMatch(i + len(open), -1, '', [])
1✔
451
            stack.append(curPM)
1✔
452
            i += len(open)
1✔
453
            continue
1✔
454
        elif (not lastCharWasBackslash
1✔
455
              and s[i:i + len(close)] == close):
456
            if len(stack) <= 1:
1✔
457
                raise ValueError(f'Closing {close!r} without {open!r} at index {i}.')
1✔
458
            curPM = stack.pop()
1✔
459
            curPM.end = i
1✔
460
            curPM.text = s[curPM.start:i]
1✔
461
            stack[-1].nested.append(curPM)
1✔
462
            i += len(close)
1✔
463
            continue
1✔
464

465
        if s[i] == '\\':
1✔
466
            lastCharWasBackslash = not lastCharWasBackslash
1✔
467
        else:
468
            lastCharWasBackslash = False
1✔
469
        i += 1
1✔
470

471
    if len(stack) > 1:
1✔
472
        raise ValueError(f'Opening {open!r} at index {stack[1].start-1} was never closed')
1✔
473

474
    return mainMatch.nested
1✔
475

476

477
# -----------------------------------------------------------------------------
478
if __name__ == '__main__':
479
    import music21
480
    music21.mainTest()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc