• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

cuthbertLab / music21 / 28127151050

24 Jun 2026 08:21PM UTC coverage: 93.134% (+0.002%) from 93.132%
28127151050

Pull #1953

github

web-flow
Merge d2a38e4c6 into 603fa2449
Pull Request #1953: Typing of "common" directory

127 of 136 new or added lines in 14 files covered. (93.38%)

1 existing line in 1 file now uncovered.

81814 of 87845 relevant lines covered (93.13%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.48
/music21/common/stringTools.py
1
# ------------------------------------------------------------------------------
2
# Name:         common/stringTools.py
3
# Purpose:      Utilities for strings
4
#
5
# Authors:      Michael Scott Asato Cuthbert
6
#               Christopher Ariza
7
#
8
# Copyright:    Copyright © 2009-2015 Michael Scott Asato Cuthbert
9
# License:      BSD, see license.txt
10
# ------------------------------------------------------------------------------
11
'''
12
Tools for working with strings
13
'''
14
from __future__ import annotations
1✔
15

16
__all__ = [
1✔
17
    'whitespaceEqual',
18
    'getNumFromStr',
19
    'hyphenToCamelCase',
20
    'camelCaseToHyphen',
21
    'spaceCamelCase',
22
    'getMd5',
23
    'formatStr',
24
    'stripAccents',
25
    'normalizeFilename',
26
    'removePunctuation',
27
    'parenthesesMatch',
28
    'ParenthesesMatch',
29
]
30

31
from collections.abc import Iterable
1✔
32
import typing as t
1✔
33
import dataclasses
1✔
34
import hashlib
1✔
35
import random
1✔
36
import re
1✔
37
import time
1✔
38
import string
1✔
39
import unicodedata
1✔
40

41
# ------------------------------------------------------------------------------
42
WHITESPACE = re.compile(r'\s+')
1✔
43
LINEFEED = re.compile('\n+')
1✔
44

45

46
def whitespaceEqual(a: str, b: str) -> bool:
1✔
47
    # noinspection PyShadowingNames
48
    r'''
49
    returns True if a and b are equal except for whitespace differences
50

51
    >>> a = '    hello \n there '
52
    >>> b = 'hello there'
53
    >>> c = ' bye there '
54
    >>> common.whitespaceEqual(a, b)
55
    True
56
    >>> common.whitespaceEqual(a, c)
57
    False
58
    '''
59
    a = WHITESPACE.sub('', a)
1✔
60
    b = WHITESPACE.sub('', b)
1✔
61
    a = LINEFEED.sub('', a)
1✔
62
    b = LINEFEED.sub('', b)
1✔
63
    if a == b:
1✔
64
        return True
1✔
65
    else:
66
        return False
1✔
67

68

69
def getNumFromStr(usrStr: str, numbers: str = '0123456789') -> tuple[str, str]:
1✔
70
    '''
71
    Given a string, extract any numbers.
72
    Return two strings, the numbers (as strings) and the remaining characters.
73

74
    >>> common.getNumFromStr('23a')
75
    ('23', 'a')
76
    >>> common.getNumFromStr('23a954Hello')
77
    ('23954', 'aHello')
78
    >>> common.getNumFromStr('')
79
    ('', '')
80
    '''
81
    found = []
1✔
82
    remain = []
1✔
83
    for char in usrStr:
1✔
84
        if char in numbers:
1✔
85
            found.append(char)
1✔
86
        else:
87
            remain.append(char)
1✔
88
    # returns numbers and then characters
89
    return ''.join(found), ''.join(remain)
1✔
90

91

92
def hyphenToCamelCase(usrStr: str, replacement: str = '-') -> str:
1✔
93
    '''
94
    Given a hyphen-connected-string, change it to
95
    a camelCaseConnectedString.
96

97
    The replacement can be specified to be something besides a hyphen.
98

99
    >>> common.hyphenToCamelCase('movement-name')
100
    'movementName'
101

102
    >>> common.hyphenToCamelCase('movement_name', replacement='_')
103
    'movementName'
104

105
    Safe to call on a string lacking the replacement character:
106

107
    >>> common.hyphenToCamelCase('voice')
108
    'voice'
109

110
    And on "words" beginning with numbers:
111

112
    >>> common.hyphenToCamelCase('music-21')
113
    'music21'
114
    '''
115
    post = ''
1✔
116
    for i, word in enumerate(usrStr.split(replacement)):
1✔
117
        if i == 0:
1✔
118
            post = word
1✔
119
        else:
120
            post += word.capitalize()
1✔
121
    return post
1✔
122

123

124
def camelCaseToHyphen(usrStr: str, replacement: str = '-') -> str:
1✔
125
    # pylint: disable=line-too-long
126
    '''
127
    Given a camel-cased string, or a mixture of numbers and characters,
128
    create a space separated string.
129

130
    The replacement can be specified to be something besides a hyphen, but only
131
    a single character and not (for internal reasons) an uppercase character.
132

133
    code from https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case
134

135
    >>> common.camelCaseToHyphen('movementName')
136
    'movement-name'
137

138
    First letter can be uppercase as well:
139

140
    >>> common.camelCaseToHyphen('MovementName')
141
    'movement-name'
142

143
    >>> common.camelCaseToHyphen('movementNameName')
144
    'movement-name-name'
145

146
    >>> common.camelCaseToHyphen('fileName', replacement='_')
147
    'file_name'
148

149
    Some things you cannot do:
150

151
    >>> common.camelCaseToHyphen('fileName', replacement='NotFound')
152
    Traceback (most recent call last):
153
    ValueError: Replacement must be a single character.
154

155
    >>> common.camelCaseToHyphen('fileName', replacement='A')
156
    Traceback (most recent call last):
157
    ValueError: Replacement cannot be an uppercase character.
158
    '''
159
    if len(replacement) != 1:
1✔
160
        raise ValueError('Replacement must be a single character.')
1✔
161
    if replacement.lower() != replacement:
1✔
162
        raise ValueError('Replacement cannot be an uppercase character.')
1✔
163
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1' + replacement + r'\2', usrStr)
1✔
164
    return re.sub('([a-z0-9])([A-Z])', r'\1' + replacement + r'\2', s1).lower()
1✔
165

166

167
def spaceCamelCase(
1✔
168
    usrStr: str,
169
    replaceUnderscore: bool = True,
170
    fixMeList: Iterable[str]|None = None
171
) -> str:
172
    '''
173
    Given a camel-cased string, or a mixture of numbers and characters,
174
    create a space separated string.
175

176
    If replaceUnderscore is True (default) then underscores also become spaces (but without the _)
177

178
    >>> common.spaceCamelCase('thisIsATest')
179
    'this Is A Test'
180
    >>> common.spaceCamelCase('ThisIsATest')
181
    'This Is A Test'
182
    >>> common.spaceCamelCase('movement3')
183
    'movement 3'
184
    >>> common.spaceCamelCase('opus41no1')
185
    'opus 41 no 1'
186
    >>> common.spaceCamelCase('opus23402no219235')
187
    'opus 23402 no 219235'
188
    >>> common.spaceCamelCase('opus23402no219235').title()
189
    'Opus 23402 No 219235'
190

191
    There is a small list called fixMeList that can fix mistakes.
192

193
    >>> common.spaceCamelCase('PMFC22')
194
    'PMFC 22'
195

196
    >>> common.spaceCamelCase('hello_myke')
197
    'hello myke'
198
    >>> common.spaceCamelCase('hello_myke', replaceUnderscore=False)
199
    'hello_myke'
200
    '''
201
    numbers = '0123456789.'
1✔
202
    firstNum = False
1✔
203
    firstChar = False
1✔
204
    isNumber = False
1✔
205
    lastIsNum = False
1✔
206
    post: list[str] = []
1✔
207

208
    # do not split these
209
    fixupList: Iterable[str]
210
    if fixMeList is None:
1✔
211
        fixupList = ('PMFC',)
1✔
212
    else:
213
        fixupList = fixMeList
×
214

215
    for char in usrStr:
1✔
216
        if char in numbers:
1✔
217
            isNumber = True
1✔
218
        else:
219
            isNumber = False
1✔
220

221
        if isNumber and not firstNum and not lastIsNum:
1✔
222
            firstNum = True
1✔
223
        else:
224
            firstNum = False
1✔
225

226
        # for chars
227
        if not isNumber and not firstChar and lastIsNum:
1✔
228
            firstChar = True
1✔
229
        else:
230
            firstChar = False
1✔
231

232
        if post:
1✔
233
            if char.isupper() or firstNum or firstChar:
1✔
234
                post.append(' ')
1✔
235
            post.append(char)
1✔
236
        else:  # first character
237
            post.append(char)
1✔
238

239
        if isNumber:
1✔
240
            lastIsNum = True
1✔
241
        else:
242
            lastIsNum = False
1✔
243
    postStr = ''.join(post)
1✔
244
    for fixMe in fixupList:
1✔
245
        fixMeSpaced = ' '.join(fixMe)
1✔
246
        postStr = postStr.replace(fixMeSpaced, fixMe)
1✔
247

248
    if replaceUnderscore:
1✔
249
        postStr = postStr.replace('_', ' ')
1✔
250
    return postStr
1✔
251

252

253
def getMd5(value: str|bytes|None = None) -> str:
1✔
254
    # noinspection SpellCheckingInspection
255
    '''
256
    Return an md5 hash from a string.  If no value is given then
257
    the current time plus a random number is encoded.
258

259
    >>> common.getMd5('test')
260
    '098f6bcd4621d373cade4e832627b4f6'
261
    '''
262
    if value is None:
1✔
263
        value = str(time.time()) + str(random.random())
1✔
264
    if isinstance(value, str):
1✔
265
        value = value.encode('UTF-8')
1✔
266
    m = hashlib.md5()
1✔
267
    m.update(value)
1✔
268
    return m.hexdigest()
1✔
269

270

271
def formatStr(msg: object,
1✔
272
              *rest_of_message: object,
273
              **keywords: object) -> str:
274
    '''
275
    DEPRECATED: do not use.  May be removed at any time.
276

277
    Format one or more data elements into string suitable for printing
278
    straight to stderr or other outputs
279

280
    >>> a = common.formatStr('test', '1', 2, 3)
281
    >>> print(a)
282
    test 1 2 3
283
    <BLANKLINE>
284
    '''
285
    msgList: list[t.Any] = [msg, *rest_of_message]
1✔
286
    for i in range(len(msgList)):
1✔
287
        x = msgList[i]
1✔
288
        if isinstance(x, bytes):
1✔
NEW
289
            msgList[i] = x.decode('utf-8')
×
290
        if not isinstance(x, str):
1✔
291
            try:
1✔
292
                msgList[i] = repr(x)
1✔
293
            except TypeError:
×
294
                try:
×
NEW
295
                    msgList[i] = x.decode('utf-8')
×
296
                except AttributeError:
×
NEW
297
                    msgList[i] = ''
×
298
    return ' '.join(msgList) + '\n'
1✔
299

300

301
def stripAccents(inputString: str) -> str:
1✔
302
    r'''
303
    removes accents from unicode strings.
304

305
    >>> s = 'trés vite'
306
    >>> 'é' in s
307
    True
308
    >>> common.stripAccents(s)
309
    'tres vite'
310

311
    Also handles the German Eszett and smart quotes
312

313
    >>> common.stripAccents('Muß')
314
    'Muss'
315
    >>> common.stripAccents('Süss, “êtré”')
316
    'Suss, "etre"'
317

318
    Note -- it is still possible to have non-Ascii characters after this,
319
    like in this Japanese expression for music:
320

321
    >>> common.stripAccents('音楽')
322
    '音楽'
323
    '''
324
    nfkd_form = (
1✔
325
        unicodedata.normalize('NFKD', inputString)
326
        .replace('ß', 'ss')
327
        .replace('“', '"')
328
        .replace('”', '"')
329
        .replace('‘', "'")
330
        .replace('’', "'")
331
    )
332
    return ''.join([c for c in nfkd_form if not unicodedata.combining(c)])
1✔
333

334

335
def normalizeFilename(name: str) -> str:
1✔
336
    '''
337
    take a name that might contain unicode characters, punctuation,
338
    or spaces and
339
    normalize it so that it is POSIX compliant (except for the limit
340
    on length).
341

342
    Takes in a string or unicode string and returns a string (unicode in Py3)
343
    without any accented characters.
344

345
    >>> common.normalizeFilename('03-Niccolò all’lessandra.not really.xml')
346
    '03-Niccolo_all_lessandra_not_really.xml'
347
    '''
348
    extension = None
1✔
349
    lenName = len(name)
1✔
350

351
    if lenName > 5 and name[-4] == '.':
1✔
352
        extension = str(name[lenName - 4:])
1✔
353
        name = name[:lenName - 4]
1✔
354

355
    name = stripAccents(name)
1✔
356
    name = name.encode('ascii', 'ignore').decode('UTF-8')
1✔
357
    name = re.sub(r'[^\w-]', '_', name).strip()
1✔
358
    if extension is not None:
1✔
359
        name += extension
1✔
360
    return name
1✔
361

362

363
def removePunctuation(s: str) -> str:
1✔
364
    '''
365
    Remove all punctuation from a string.
366

367
    >>> common.removePunctuation('This, is! my (face).')
368
    'This is my face'
369
    '''
370
    maketrans = str.maketrans('', '', string.punctuation)
1✔
371
    out = s.translate(maketrans)
1✔
372
    return out
1✔
373

374
@dataclasses.dataclass
1✔
375
class ParenthesesMatch:
1✔
376
    start: int
1✔
377
    end: int
1✔
378
    text: str
1✔
379
    nested: list[ParenthesesMatch]
1✔
380

381
def parenthesesMatch(
1✔
382
    s: str,
383
    open: str = '(',  # pylint: disable=redefined-builtin
384
    close: str = ')',
385
) -> list[ParenthesesMatch]:
386
    r'''
387
    Utility tool to return a list of parentheses matches for a string using a dataclass
388
    called `ParenthesesMatch` which has indices of the `start` and `end`
389
    of the match, and the `text` of the match, and a set of `nested`
390
    ParenthesesMatch objects (which may have their own nested objects).
391

392
    >>> st = r'Bologne wrote (a (whole) (lot) \(of\)) sym\(ph(on)ies\) concertantes.'
393
    >>> common.stringTools.parenthesesMatch(st)
394
    [ParenthesesMatch(start=15, end=37, text='a (whole) (lot) \\(of\\)',
395
                      nested=[ParenthesesMatch(start=18, end=23, text='whole', nested=[]),
396
                              ParenthesesMatch(start=26, end=29, text='lot', nested=[])]),
397
     ParenthesesMatch(start=47, end=49, text='on', nested=[])]
398

399
    Other brackets can be used:
400

401
    >>> st = r'[Whammy bars] and [oboes] do [not [mix] very] [well.]'
402
    >>> common.stringTools.parenthesesMatch(st, open='[', close=']')
403
    [ParenthesesMatch(start=1, end=12, text='Whammy bars', nested=[]),
404
     ParenthesesMatch(start=19, end=24, text='oboes', nested=[]),
405
     ParenthesesMatch(start=30, end=44, text='not [mix] very',
406
                      nested=[ParenthesesMatch(start=35, end=38, text='mix', nested=[])]),
407
     ParenthesesMatch(start=47, end=52, text='well.', nested=[])]
408

409
    The `open` and `close` parameters can be multiple characters:
410

411
    >>> st = r'Did you eat <<beans>> today <<Pythagoreas<<?>>>>'
412
    >>> common.stringTools.parenthesesMatch(st, open='<<', close='>>')
413
    [ParenthesesMatch(start=14, end=19, text='beans', nested=[]),
414
     ParenthesesMatch(start=30, end=46, text='Pythagoreas<<?>>',
415
                      nested=[ParenthesesMatch(start=43, end=44, text='?', nested=[])])]
416

417
    They cannot, however, be empty:
418

419
    >>> common.stringTools.parenthesesMatch(st, open='', close='')
420
    Traceback (most recent call last):
421
    ValueError: Neither open nor close can be empty.
422

423
    Unmatched opening or closing parentheses will raise a ValueError:
424

425
    >>> common.stringTools.parenthesesMatch('My (parentheses (sometimes (continue',)
426
    Traceback (most recent call last):
427
    ValueError:  Opening '(' at index 3 was never closed
428

429
    >>> common.stringTools.parenthesesMatch('This is a <bad> example>', open='<', close='>')
430
    Traceback (most recent call last):
431
    ValueError: Closing '>' without '<' at index 23.
432

433
    Note that using multiple characters like a prefix can have unintended consequences:
434

435
    >>> st = r'[Pitch("C4"), [Pitch("D5"), Pitch("E6")], Pitch("Pity("Z9")")]'
436
    >>> common.stringTools.parenthesesMatch(st, open='Pitch("', close='")')
437
    Traceback (most recent call last):
438
    ValueError: Closing '")' without 'Pitch("' at index 59.
439

440
    So to do something like this, you might need to get creative:
441

442
    >>> out = common.stringTools.parenthesesMatch(st, open='("', close='")')
443
    >>> out
444
    [ParenthesesMatch(start=8, end=10, text='C4', nested=[]),
445
     ParenthesesMatch(start=22, end=24, text='D5', nested=[]),
446
     ParenthesesMatch(start=35, end=37, text='E6', nested=[]),
447
     ParenthesesMatch(start=49, end=59, text='Pity("Z9")',
448
                      nested=[ParenthesesMatch(start=55, end=57, text='Z9', nested=[])])]
449
    >>> extractedPitches = []
450
    >>> for match in out:
451
    ...     if st[match.start - 7:match.start] == 'Pitch("':
452
    ...          extractedPitches.append(match.text)
453
    >>> extractedPitches
454
    ['C4', 'D5', 'E6', 'Pity("Z9")']
455

456
    * New in v9.3.
457
    '''
458
    if not open or not close:
1✔
459
        raise ValueError('Neither open nor close can be empty.')
1✔
460

461
    mainMatch = ParenthesesMatch(-1, -1, '', [])
1✔
462
    stack: list[ParenthesesMatch] = [mainMatch]
1✔
463

464
    lastCharWasBackslash = False
1✔
465

466
    i = 0
1✔
467
    while i < len(s):
1✔
468
        if (not lastCharWasBackslash
1✔
469
                and s[i:i + len(open)] == open):
470
            curPM = ParenthesesMatch(i + len(open), -1, '', [])
1✔
471
            stack.append(curPM)
1✔
472
            i += len(open)
1✔
473
            continue
1✔
474
        elif (not lastCharWasBackslash
1✔
475
              and s[i:i + len(close)] == close):
476
            if len(stack) <= 1:
1✔
477
                raise ValueError(f'Closing {close!r} without {open!r} at index {i}.')
1✔
478
            curPM = stack.pop()
1✔
479
            curPM.end = i
1✔
480
            curPM.text = s[curPM.start:i]
1✔
481
            stack[-1].nested.append(curPM)
1✔
482
            i += len(close)
1✔
483
            continue
1✔
484

485
        if s[i] == '\\':
1✔
486
            lastCharWasBackslash = not lastCharWasBackslash
1✔
487
        else:
488
            lastCharWasBackslash = False
1✔
489
        i += 1
1✔
490

491
    if len(stack) > 1:
1✔
492
        raise ValueError(f'Opening {open!r} at index {stack[1].start - 1} was never closed')
1✔
493

494
    return mainMatch.nested
1✔
495

496

497
# -----------------------------------------------------------------------------
498
if __name__ == '__main__':
499
    import music21
500
    music21.mainTest()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc