28127151050

Committed 24 Jun 2026 08:21PM UTC coverage: 93.134% (+0.002%) from 93.132%

Build # 28127151050

Build Type

Pull #1953

github

Committed by

web-flow

Commit Message

Merge d2a38e4c6 into 603fa2449

Pull Request Pull Request #1953: Typing of "common" directory

Coverage Stats

127 of 136 new or added lines in 14 files covered. (93.38%)

1 existing line in 1 file now uncovered.

81814 of 87845 relevant lines covered (93.13%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.48

/music21/common/stringTools.py

# ------------------------------------------------------------------------------
# Name:         common/stringTools.py
# Purpose:      Utilities for strings
#
# Authors:      Michael Scott Asato Cuthbert
#               Christopher Ariza
#
# Copyright:    Copyright © 2009-2015 Michael Scott Asato Cuthbert
# License:      BSD, see license.txt
# ------------------------------------------------------------------------------
'''
Tools for working with strings
'''
from __future__ import annotations

__all__ = [
    'whitespaceEqual',
    'getNumFromStr',
    'hyphenToCamelCase',
    'camelCaseToHyphen',
    'spaceCamelCase',
    'getMd5',
    'formatStr',
    'stripAccents',
    'normalizeFilename',
    'removePunctuation',
    'parenthesesMatch',
    'ParenthesesMatch',
]

from collections.abc import Iterable
import typing as t
import dataclasses
import hashlib
import random
import re
import time
import string
import unicodedata

# ------------------------------------------------------------------------------
WHITESPACE = re.compile(r'\s+')
LINEFEED = re.compile('\n+')


def whitespaceEqual(a: str, b: str) -> bool:
    # noinspection PyShadowingNames
    r'''
    returns True if a and b are equal except for whitespace differences

    >>> a = '    hello \n there '
    >>> b = 'hello there'
    >>> c = ' bye there '
    >>> common.whitespaceEqual(a, b)
    True
    >>> common.whitespaceEqual(a, c)
    False
    '''
    a = WHITESPACE.sub('', a)
    b = WHITESPACE.sub('', b)
    a = LINEFEED.sub('', a)
    b = LINEFEED.sub('', b)
    if a == b:
        return True
    else:
        return False


def getNumFromStr(usrStr: str, numbers: str = '0123456789') -> tuple[str, str]:
    '''
    Given a string, extract any numbers.
    Return two strings, the numbers (as strings) and the remaining characters.

    >>> common.getNumFromStr('23a')
    ('23', 'a')
    >>> common.getNumFromStr('23a954Hello')
    ('23954', 'aHello')
    >>> common.getNumFromStr('')
    ('', '')
    '''
    found = []
    remain = []
    for char in usrStr:
        if char in numbers:
            found.append(char)
        else:
            remain.append(char)
    # returns numbers and then characters
    return ''.join(found), ''.join(remain)


def hyphenToCamelCase(usrStr: str, replacement: str = '-') -> str:
    '''
    Given a hyphen-connected-string, change it to
    a camelCaseConnectedString.

    The replacement can be specified to be something besides a hyphen.

    >>> common.hyphenToCamelCase('movement-name')
    'movementName'

    >>> common.hyphenToCamelCase('movement_name', replacement='_')
    'movementName'

    Safe to call on a string lacking the replacement character:

    >>> common.hyphenToCamelCase('voice')
    'voice'

    And on "words" beginning with numbers:

    >>> common.hyphenToCamelCase('music-21')
    'music21'
    '''
    post = ''
    for i, word in enumerate(usrStr.split(replacement)):
        if i == 0:
            post = word
        else:
            post += word.capitalize()
    return post


def camelCaseToHyphen(usrStr: str, replacement: str = '-') -> str:
    # pylint: disable=line-too-long
    '''
    Given a camel-cased string, or a mixture of numbers and characters,
    create a space separated string.

    The replacement can be specified to be something besides a hyphen, but only
    a single character and not (for internal reasons) an uppercase character.

    code from https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case

    >>> common.camelCaseToHyphen('movementName')
    'movement-name'

    First letter can be uppercase as well:

    >>> common.camelCaseToHyphen('MovementName')
    'movement-name'

    >>> common.camelCaseToHyphen('movementNameName')
    'movement-name-name'

    >>> common.camelCaseToHyphen('fileName', replacement='_')
    'file_name'

    Some things you cannot do:

    >>> common.camelCaseToHyphen('fileName', replacement='NotFound')
    Traceback (most recent call last):
    ValueError: Replacement must be a single character.

    >>> common.camelCaseToHyphen('fileName', replacement='A')
    Traceback (most recent call last):
    ValueError: Replacement cannot be an uppercase character.
    '''
    if len(replacement) != 1:
        raise ValueError('Replacement must be a single character.')
    if replacement.lower() != replacement:
        raise ValueError('Replacement cannot be an uppercase character.')
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1' + replacement + r'\2', usrStr)
    return re.sub('([a-z0-9])([A-Z])', r'\1' + replacement + r'\2', s1).lower()


def spaceCamelCase(
    usrStr: str,
    replaceUnderscore: bool = True,
    fixMeList: Iterable[str]|None = None
) -> str:
    '''
    Given a camel-cased string, or a mixture of numbers and characters,
    create a space separated string.

    If replaceUnderscore is True (default) then underscores also become spaces (but without the _)

    >>> common.spaceCamelCase('thisIsATest')
    'this Is A Test'
    >>> common.spaceCamelCase('ThisIsATest')
    'This Is A Test'
    >>> common.spaceCamelCase('movement3')
    'movement 3'
    >>> common.spaceCamelCase('opus41no1')
    'opus 41 no 1'
    >>> common.spaceCamelCase('opus23402no219235')
    'opus 23402 no 219235'
    >>> common.spaceCamelCase('opus23402no219235').title()
    'Opus 23402 No 219235'

    There is a small list called fixMeList that can fix mistakes.

    >>> common.spaceCamelCase('PMFC22')
    'PMFC 22'

    >>> common.spaceCamelCase('hello_myke')
    'hello myke'
    >>> common.spaceCamelCase('hello_myke', replaceUnderscore=False)
    'hello_myke'
    '''
    numbers = '0123456789.'
    firstNum = False
    firstChar = False
    isNumber = False
    lastIsNum = False
    post: list[str] = []

    # do not split these
    fixupList: Iterable[str]
    if fixMeList is None:
        fixupList = ('PMFC',)
    else:
        fixupList = fixMeList

    for char in usrStr:
        if char in numbers:
            isNumber = True
        else:
            isNumber = False

        if isNumber and not firstNum and not lastIsNum:
            firstNum = True
        else:
            firstNum = False

        # for chars
        if not isNumber and not firstChar and lastIsNum:
            firstChar = True
        else:
            firstChar = False

        if post:
            if char.isupper() or firstNum or firstChar:
                post.append(' ')
            post.append(char)
        else:  # first character
            post.append(char)

        if isNumber:
            lastIsNum = True
        else:
            lastIsNum = False
    postStr = ''.join(post)
    for fixMe in fixupList:
        fixMeSpaced = ' '.join(fixMe)
        postStr = postStr.replace(fixMeSpaced, fixMe)

    if replaceUnderscore:
        postStr = postStr.replace('_', ' ')
    return postStr


def getMd5(value: str|bytes|None = None) -> str:
    # noinspection SpellCheckingInspection
    '''
    Return an md5 hash from a string.  If no value is given then
    the current time plus a random number is encoded.

    >>> common.getMd5('test')
    '098f6bcd4621d373cade4e832627b4f6'
    '''
    if value is None:
        value = str(time.time()) + str(random.random())
    if isinstance(value, str):
        value = value.encode('UTF-8')
    m = hashlib.md5()
    m.update(value)
    return m.hexdigest()


def formatStr(msg: object,
              *rest_of_message: object,
              **keywords: object) -> str:
    '''
    DEPRECATED: do not use.  May be removed at any time.

    Format one or more data elements into string suitable for printing
    straight to stderr or other outputs

    >>> a = common.formatStr('test', '1', 2, 3)
    >>> print(a)
    test 1 2 3
    <BLANKLINE>
    '''
    msgList: list[t.Any] = [msg, *rest_of_message]
    for i in range(len(msgList)):
        x = msgList[i]
        if isinstance(x, bytes):
            msgList[i] = x.decode('utf-8')
        if not isinstance(x, str):
            try:
                msgList[i] = repr(x)
            except TypeError:
                try:
                    msgList[i] = x.decode('utf-8')
                except AttributeError:
                    msgList[i] = ''
    return ' '.join(msgList) + '\n'


def stripAccents(inputString: str) -> str:
    r'''
    removes accents from unicode strings.

    >>> s = 'trés vite'
    >>> 'é' in s
    True
    >>> common.stripAccents(s)
    'tres vite'

    Also handles the German Eszett and smart quotes

    >>> common.stripAccents('Muß')
    'Muss'
    >>> common.stripAccents('Süss, “êtré”')
    'Suss, "etre"'

    Note -- it is still possible to have non-Ascii characters after this,
    like in this Japanese expression for music:

    >>> common.stripAccents('音楽')
    '音楽'
    '''
    nfkd_form = (
        unicodedata.normalize('NFKD', inputString)
        .replace('ß', 'ss')
        .replace('“', '"')
        .replace('”', '"')
        .replace('‘', "'")
        .replace('’', "'")
    )
    return ''.join([c for c in nfkd_form if not unicodedata.combining(c)])


def normalizeFilename(name: str) -> str:
    '''
    take a name that might contain unicode characters, punctuation,
    or spaces and
    normalize it so that it is POSIX compliant (except for the limit
    on length).

    Takes in a string or unicode string and returns a string (unicode in Py3)
    without any accented characters.

    >>> common.normalizeFilename('03-Niccolò all’lessandra.not really.xml')
    '03-Niccolo_all_lessandra_not_really.xml'
    '''
    extension = None
    lenName = len(name)

    if lenName > 5 and name[-4] == '.':
        extension = str(name[lenName - 4:])
        name = name[:lenName - 4]

    name = stripAccents(name)
    name = name.encode('ascii', 'ignore').decode('UTF-8')
    name = re.sub(r'[^\w-]', '_', name).strip()
    if extension is not None:
        name += extension
    return name


def removePunctuation(s: str) -> str:
    '''
    Remove all punctuation from a string.

    >>> common.removePunctuation('This, is! my (face).')
    'This is my face'
    '''
    maketrans = str.maketrans('', '', string.punctuation)
    out = s.translate(maketrans)
    return out

@dataclasses.dataclass
class ParenthesesMatch:
    start: int
    end: int
    text: str
    nested: list[ParenthesesMatch]

def parenthesesMatch(
    s: str,
    open: str = '(',  # pylint: disable=redefined-builtin
    close: str = ')',
) -> list[ParenthesesMatch]:
    r'''
    Utility tool to return a list of parentheses matches for a string using a dataclass
    called `ParenthesesMatch` which has indices of the `start` and `end`
    of the match, and the `text` of the match, and a set of `nested`
    ParenthesesMatch objects (which may have their own nested objects).

    >>> st = r'Bologne wrote (a (whole) (lot) \(of\)) sym\(ph(on)ies\) concertantes.'
    >>> common.stringTools.parenthesesMatch(st)
    [ParenthesesMatch(start=15, end=37, text='a (whole) (lot) \\(of\\)',
                      nested=[ParenthesesMatch(start=18, end=23, text='whole', nested=[]),
                              ParenthesesMatch(start=26, end=29, text='lot', nested=[])]),
     ParenthesesMatch(start=47, end=49, text='on', nested=[])]

    Other brackets can be used:

    >>> st = r'[Whammy bars] and [oboes] do [not [mix] very] [well.]'
    >>> common.stringTools.parenthesesMatch(st, open='[', close=']')
    [ParenthesesMatch(start=1, end=12, text='Whammy bars', nested=[]),
     ParenthesesMatch(start=19, end=24, text='oboes', nested=[]),
     ParenthesesMatch(start=30, end=44, text='not [mix] very',
                      nested=[ParenthesesMatch(start=35, end=38, text='mix', nested=[])]),
     ParenthesesMatch(start=47, end=52, text='well.', nested=[])]

    The `open` and `close` parameters can be multiple characters:

    >>> st = r'Did you eat <<beans>> today <<Pythagoreas<<?>>>>'
    >>> common.stringTools.parenthesesMatch(st, open='<<', close='>>')
    [ParenthesesMatch(start=14, end=19, text='beans', nested=[]),
     ParenthesesMatch(start=30, end=46, text='Pythagoreas<<?>>',
                      nested=[ParenthesesMatch(start=43, end=44, text='?', nested=[])])]

    They cannot, however, be empty:

    >>> common.stringTools.parenthesesMatch(st, open='', close='')
    Traceback (most recent call last):
    ValueError: Neither open nor close can be empty.

    Unmatched opening or closing parentheses will raise a ValueError:

    >>> common.stringTools.parenthesesMatch('My (parentheses (sometimes (continue',)
    Traceback (most recent call last):
    ValueError:  Opening '(' at index 3 was never closed

    >>> common.stringTools.parenthesesMatch('This is a <bad> example>', open='<', close='>')
    Traceback (most recent call last):
    ValueError: Closing '>' without '<' at index 23.

    Note that using multiple characters like a prefix can have unintended consequences:

    >>> st = r'[Pitch("C4"), [Pitch("D5"), Pitch("E6")], Pitch("Pity("Z9")")]'
    >>> common.stringTools.parenthesesMatch(st, open='Pitch("', close='")')
    Traceback (most recent call last):
    ValueError: Closing '")' without 'Pitch("' at index 59.

    So to do something like this, you might need to get creative:

    >>> out = common.stringTools.parenthesesMatch(st, open='("', close='")')
    >>> out
    [ParenthesesMatch(start=8, end=10, text='C4', nested=[]),
     ParenthesesMatch(start=22, end=24, text='D5', nested=[]),
     ParenthesesMatch(start=35, end=37, text='E6', nested=[]),
     ParenthesesMatch(start=49, end=59, text='Pity("Z9")',
                      nested=[ParenthesesMatch(start=55, end=57, text='Z9', nested=[])])]
    >>> extractedPitches = []
    >>> for match in out:
    ...     if st[match.start - 7:match.start] == 'Pitch("':
    ...          extractedPitches.append(match.text)
    >>> extractedPitches
    ['C4', 'D5', 'E6', 'Pity("Z9")']

    * New in v9.3.
    '''
    if not open or not close:
        raise ValueError('Neither open nor close can be empty.')

    mainMatch = ParenthesesMatch(-1, -1, '', [])
    stack: list[ParenthesesMatch] = [mainMatch]

    lastCharWasBackslash = False

    i = 0
    while i < len(s):
        if (not lastCharWasBackslash
                and s[i:i + len(open)] == open):
            curPM = ParenthesesMatch(i + len(open), -1, '', [])
            stack.append(curPM)
            i += len(open)
            continue
        elif (not lastCharWasBackslash
              and s[i:i + len(close)] == close):
            if len(stack) <= 1:
                raise ValueError(f'Closing {close!r} without {open!r} at index {i}.')
            curPM = stack.pop()
            curPM.end = i
            curPM.text = s[curPM.start:i]
            stack[-1].nested.append(curPM)
            i += len(close)
            continue

        if s[i] == '\\':
            lastCharWasBackslash = not lastCharWasBackslash
        else:
            lastCharWasBackslash = False
        i += 1

    if len(stack) > 1:
        raise ValueError(f'Opening {open!r} at index {stack[1].start - 1} was never closed')

    return mainMatch.nested


# -----------------------------------------------------------------------------
if __name__ == '__main__':
    import music21
    music21.mainTest()

1	# ------------------------------------------------------------------------------
2	# Name: common/stringTools.py
3	# Purpose: Utilities for strings
4	#
5	# Authors: Michael Scott Asato Cuthbert
6	# Christopher Ariza
7	#
8	# Copyright: Copyright © 2009-2015 Michael Scott Asato Cuthbert
9	# License: BSD, see license.txt
10	# ------------------------------------------------------------------------------
11	'''
12	Tools for working with strings
13	'''
14	from __future__ import annotations	1✔
15
16	__all__ = [	1✔
17	'whitespaceEqual',
18	'getNumFromStr',
19	'hyphenToCamelCase',
20	'camelCaseToHyphen',
21	'spaceCamelCase',
22	'getMd5',
23	'formatStr',
24	'stripAccents',
25	'normalizeFilename',
26	'removePunctuation',
27	'parenthesesMatch',
28	'ParenthesesMatch',
29	]
30
31	from collections.abc import Iterable	1✔
32	import typing as t	1✔
33	import dataclasses	1✔
34	import hashlib	1✔
35	import random	1✔
36	import re	1✔
37	import time	1✔
38	import string	1✔
39	import unicodedata	1✔
40
41	# ------------------------------------------------------------------------------
42	WHITESPACE = re.compile(r'\s+')	1✔
43	LINEFEED = re.compile('\n+')	1✔
44
45
46	def whitespaceEqual(a: str, b: str) -> bool:	1✔
47	# noinspection PyShadowingNames
48	r'''
49	returns True if a and b are equal except for whitespace differences
50
51	>>> a = ' hello \n there '
52	>>> b = 'hello there'
53	>>> c = ' bye there '
54	>>> common.whitespaceEqual(a, b)
55	True
56	>>> common.whitespaceEqual(a, c)
57	False
58	'''
59	a = WHITESPACE.sub('', a)	1✔
60	b = WHITESPACE.sub('', b)	1✔
61	a = LINEFEED.sub('', a)	1✔
62	b = LINEFEED.sub('', b)	1✔
63	if a == b:	1✔
64	return True	1✔
65	else:
66	return False	1✔
67
68
69	def getNumFromStr(usrStr: str, numbers: str = '0123456789') -> tuple[str, str]:	1✔
70	'''
71	Given a string, extract any numbers.
72	Return two strings, the numbers (as strings) and the remaining characters.
73
74	>>> common.getNumFromStr('23a')
75	('23', 'a')
76	>>> common.getNumFromStr('23a954Hello')
77	('23954', 'aHello')
78	>>> common.getNumFromStr('')
79	('', '')
80	'''
81	found = []	1✔
82	remain = []	1✔
83	for char in usrStr:	1✔
84	if char in numbers:	1✔
85	found.append(char)	1✔
86	else:
87	remain.append(char)	1✔
88	# returns numbers and then characters
89	return ''.join(found), ''.join(remain)	1✔
90
91
92	def hyphenToCamelCase(usrStr: str, replacement: str = '-') -> str:	1✔
93	'''
94	Given a hyphen-connected-string, change it to
95	a camelCaseConnectedString.
96
97	The replacement can be specified to be something besides a hyphen.
98
99	>>> common.hyphenToCamelCase('movement-name')
100	'movementName'
101
102	>>> common.hyphenToCamelCase('movement_name', replacement='_')
103	'movementName'
104
105	Safe to call on a string lacking the replacement character:
106
107	>>> common.hyphenToCamelCase('voice')
108	'voice'
109
110	And on "words" beginning with numbers:
111
112	>>> common.hyphenToCamelCase('music-21')
113	'music21'
114	'''
115	post = ''	1✔
116	for i, word in enumerate(usrStr.split(replacement)):	1✔
117	if i == 0:	1✔
118	post = word	1✔
119	else:
120	post += word.capitalize()	1✔
121	return post	1✔
122
123
124	def camelCaseToHyphen(usrStr: str, replacement: str = '-') -> str:	1✔
125	# pylint: disable=line-too-long
126	'''
127	Given a camel-cased string, or a mixture of numbers and characters,
128	create a space separated string.
129
130	The replacement can be specified to be something besides a hyphen, but only
131	a single character and not (for internal reasons) an uppercase character.
132
133	code from https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case
134
135	>>> common.camelCaseToHyphen('movementName')
136	'movement-name'
137
138	First letter can be uppercase as well:
139
140	>>> common.camelCaseToHyphen('MovementName')
141	'movement-name'
142
143	>>> common.camelCaseToHyphen('movementNameName')
144	'movement-name-name'
145
146	>>> common.camelCaseToHyphen('fileName', replacement='_')
147	'file_name'
148
149	Some things you cannot do:
150
151	>>> common.camelCaseToHyphen('fileName', replacement='NotFound')
152	Traceback (most recent call last):
153	ValueError: Replacement must be a single character.
154
155	>>> common.camelCaseToHyphen('fileName', replacement='A')
156	Traceback (most recent call last):
157	ValueError: Replacement cannot be an uppercase character.
158	'''
159	if len(replacement) != 1:	1✔
160	raise ValueError('Replacement must be a single character.')	1✔
161	if replacement.lower() != replacement:	1✔
162	raise ValueError('Replacement cannot be an uppercase character.')	1✔
163	s1 = re.sub('(.)([A-Z][a-z]+)', r'\1' + replacement + r'\2', usrStr)	1✔
164	return re.sub('([a-z0-9])([A-Z])', r'\1' + replacement + r'\2', s1).lower()	1✔
165
166
167	def spaceCamelCase(	1✔
168	usrStr: str,
169	replaceUnderscore: bool = True,
170	fixMeList: Iterable[str]\|None = None
171	) -> str:
172	'''
173	Given a camel-cased string, or a mixture of numbers and characters,
174	create a space separated string.
175
176	If replaceUnderscore is True (default) then underscores also become spaces (but without the _)
177
178	>>> common.spaceCamelCase('thisIsATest')
179	'this Is A Test'
180	>>> common.spaceCamelCase('ThisIsATest')
181	'This Is A Test'
182	>>> common.spaceCamelCase('movement3')
183	'movement 3'
184	>>> common.spaceCamelCase('opus41no1')
185	'opus 41 no 1'
186	>>> common.spaceCamelCase('opus23402no219235')
187	'opus 23402 no 219235'
188	>>> common.spaceCamelCase('opus23402no219235').title()
189	'Opus 23402 No 219235'
190
191	There is a small list called fixMeList that can fix mistakes.
192
193	>>> common.spaceCamelCase('PMFC22')
194	'PMFC 22'
195
196	>>> common.spaceCamelCase('hello_myke')
197	'hello myke'
198	>>> common.spaceCamelCase('hello_myke', replaceUnderscore=False)
199	'hello_myke'
200	'''
201	numbers = '0123456789.'	1✔
202	firstNum = False	1✔
203	firstChar = False	1✔
204	isNumber = False	1✔
205	lastIsNum = False	1✔
206	post: list[str] = []	1✔
207
208	# do not split these
209	fixupList: Iterable[str]
210	if fixMeList is None:	1✔
211	fixupList = ('PMFC',)	1✔
212	else:
213	fixupList = fixMeList	×
214
215	for char in usrStr:	1✔
216	if char in numbers:	1✔
217	isNumber = True	1✔
218	else:
219	isNumber = False	1✔
220
221	if isNumber and not firstNum and not lastIsNum:	1✔
222	firstNum = True	1✔
223	else:
224	firstNum = False	1✔
225
226	# for chars
227	if not isNumber and not firstChar and lastIsNum:	1✔
228	firstChar = True	1✔
229	else:
230	firstChar = False	1✔
231
232	if post:	1✔
233	if char.isupper() or firstNum or firstChar:	1✔
234	post.append(' ')	1✔
235	post.append(char)	1✔
236	else: # first character
237	post.append(char)	1✔
238
239	if isNumber:	1✔
240	lastIsNum = True	1✔
241	else:
242	lastIsNum = False	1✔
243	postStr = ''.join(post)	1✔
244	for fixMe in fixupList:	1✔
245	fixMeSpaced = ' '.join(fixMe)	1✔
246	postStr = postStr.replace(fixMeSpaced, fixMe)	1✔
247
248	if replaceUnderscore:	1✔
249	postStr = postStr.replace('_', ' ')	1✔
250	return postStr	1✔
251
252
253	def getMd5(value: str\|bytes\|None = None) -> str:	1✔
254	# noinspection SpellCheckingInspection
255	'''
256	Return an md5 hash from a string. If no value is given then
257	the current time plus a random number is encoded.
258
259	>>> common.getMd5('test')
260	'098f6bcd4621d373cade4e832627b4f6'
261	'''
262	if value is None:	1✔
263	value = str(time.time()) + str(random.random())	1✔
264	if isinstance(value, str):	1✔
265	value = value.encode('UTF-8')	1✔
266	m = hashlib.md5()	1✔
267	m.update(value)	1✔
268	return m.hexdigest()	1✔
269
270
271	def formatStr(msg: object,	1✔
272	*rest_of_message: object,
273	**keywords: object) -> str:
274	'''
275	DEPRECATED: do not use. May be removed at any time.
276
277	Format one or more data elements into string suitable for printing
278	straight to stderr or other outputs
279
280	>>> a = common.formatStr('test', '1', 2, 3)
281	>>> print(a)
282	test 1 2 3
283	<BLANKLINE>
284	'''
285	msgList: list[t.Any] = [msg, *rest_of_message]	1✔
286	for i in range(len(msgList)):	1✔
287	x = msgList[i]	1✔
288	if isinstance(x, bytes):	1✔
NEW 289	msgList[i] = x.decode('utf-8')	×
290	if not isinstance(x, str):	1✔
291	try:	1✔
292	msgList[i] = repr(x)	1✔
293	except TypeError:	×
294	try:	×
NEW 295	msgList[i] = x.decode('utf-8')	×
296	except AttributeError:	×
NEW 297	msgList[i] = ''	×
298	return ' '.join(msgList) + '\n'	1✔
299
300
301	def stripAccents(inputString: str) -> str:	1✔
302	r'''
303	removes accents from unicode strings.
304
305	>>> s = 'trés vite'
306	>>> 'é' in s
307	True
308	>>> common.stripAccents(s)
309	'tres vite'
310
311	Also handles the German Eszett and smart quotes
312
313	>>> common.stripAccents('Muß')
314	'Muss'
315	>>> common.stripAccents('Süss, “êtré”')
316	'Suss, "etre"'
317
318	Note -- it is still possible to have non-Ascii characters after this,
319	like in this Japanese expression for music:
320
321	>>> common.stripAccents('音楽')
322	'音楽'
323	'''
324	nfkd_form = (	1✔
325	unicodedata.normalize('NFKD', inputString)
326	.replace('ß', 'ss')
327	.replace('“', '"')
328	.replace('”', '"')
329	.replace('‘', "'")
330	.replace('’', "'")
331	)
332	return ''.join([c for c in nfkd_form if not unicodedata.combining(c)])	1✔
333
334
335	def normalizeFilename(name: str) -> str:	1✔
336	'''
337	take a name that might contain unicode characters, punctuation,
338	or spaces and
339	normalize it so that it is POSIX compliant (except for the limit
340	on length).
341
342	Takes in a string or unicode string and returns a string (unicode in Py3)
343	without any accented characters.
344
345	>>> common.normalizeFilename('03-Niccolò all’lessandra.not really.xml')
346	'03-Niccolo_all_lessandra_not_really.xml'
347	'''
348	extension = None	1✔
349	lenName = len(name)	1✔
350
351	if lenName > 5 and name[-4] == '.':	1✔
352	extension = str(name[lenName - 4:])	1✔
353	name = name[:lenName - 4]	1✔
354
355	name = stripAccents(name)	1✔
356	name = name.encode('ascii', 'ignore').decode('UTF-8')	1✔
357	name = re.sub(r'[^\w-]', '_', name).strip()	1✔
358	if extension is not None:	1✔
359	name += extension	1✔
360	return name	1✔
361
362
363	def removePunctuation(s: str) -> str:	1✔
364	'''
365	Remove all punctuation from a string.
366
367	>>> common.removePunctuation('This, is! my (face).')
368	'This is my face'
369	'''
370	maketrans = str.maketrans('', '', string.punctuation)	1✔
371	out = s.translate(maketrans)	1✔
372	return out	1✔
373
374	@dataclasses.dataclass	1✔
375	class ParenthesesMatch:	1✔
376	start: int	1✔
377	end: int	1✔
378	text: str	1✔
379	nested: list[ParenthesesMatch]	1✔
380
381	def parenthesesMatch(	1✔
382	s: str,
383	open: str = '(', # pylint: disable=redefined-builtin
384	close: str = ')',
385	) -> list[ParenthesesMatch]:
386	r'''
387	Utility tool to return a list of parentheses matches for a string using a dataclass
388	called `ParenthesesMatch` which has indices of the `start` and `end`
389	of the match, and the `text` of the match, and a set of `nested`
390	ParenthesesMatch objects (which may have their own nested objects).
391
392	>>> st = r'Bologne wrote (a (whole) (lot) \(of\)) sym\(ph(on)ies\) concertantes.'
393	>>> common.stringTools.parenthesesMatch(st)
394	[ParenthesesMatch(start=15, end=37, text='a (whole) (lot) \\(of\\)',
395	nested=[ParenthesesMatch(start=18, end=23, text='whole', nested=[]),
396	ParenthesesMatch(start=26, end=29, text='lot', nested=[])]),
397	ParenthesesMatch(start=47, end=49, text='on', nested=[])]
398
399	Other brackets can be used:
400
401	>>> st = r'[Whammy bars] and [oboes] do [not [mix] very] [well.]'
402	>>> common.stringTools.parenthesesMatch(st, open='[', close=']')
403	[ParenthesesMatch(start=1, end=12, text='Whammy bars', nested=[]),
404	ParenthesesMatch(start=19, end=24, text='oboes', nested=[]),
405	ParenthesesMatch(start=30, end=44, text='not [mix] very',
406	nested=[ParenthesesMatch(start=35, end=38, text='mix', nested=[])]),
407	ParenthesesMatch(start=47, end=52, text='well.', nested=[])]
408
409	The `open` and `close` parameters can be multiple characters:
410
411	>>> st = r'Did you eat <<beans>> today <<Pythagoreas<<?>>>>'
412	>>> common.stringTools.parenthesesMatch(st, open='<<', close='>>')
413	[ParenthesesMatch(start=14, end=19, text='beans', nested=[]),
414	ParenthesesMatch(start=30, end=46, text='Pythagoreas<<?>>',
415	nested=[ParenthesesMatch(start=43, end=44, text='?', nested=[])])]
416
417	They cannot, however, be empty:
418
419	>>> common.stringTools.parenthesesMatch(st, open='', close='')
420	Traceback (most recent call last):
421	ValueError: Neither open nor close can be empty.
422
423	Unmatched opening or closing parentheses will raise a ValueError:
424
425	>>> common.stringTools.parenthesesMatch('My (parentheses (sometimes (continue',)
426	Traceback (most recent call last):
427	ValueError: Opening '(' at index 3 was never closed
428
429	>>> common.stringTools.parenthesesMatch('This is a <bad> example>', open='<', close='>')
430	Traceback (most recent call last):
431	ValueError: Closing '>' without '<' at index 23.
432
433	Note that using multiple characters like a prefix can have unintended consequences:
434
435	>>> st = r'[Pitch("C4"), [Pitch("D5"), Pitch("E6")], Pitch("Pity("Z9")")]'
436	>>> common.stringTools.parenthesesMatch(st, open='Pitch("', close='")')
437	Traceback (most recent call last):
438	ValueError: Closing '")' without 'Pitch("' at index 59.
439
440	So to do something like this, you might need to get creative:
441
442	>>> out = common.stringTools.parenthesesMatch(st, open='("', close='")')
443	>>> out
444	[ParenthesesMatch(start=8, end=10, text='C4', nested=[]),
445	ParenthesesMatch(start=22, end=24, text='D5', nested=[]),
446	ParenthesesMatch(start=35, end=37, text='E6', nested=[]),
447	ParenthesesMatch(start=49, end=59, text='Pity("Z9")',
448	nested=[ParenthesesMatch(start=55, end=57, text='Z9', nested=[])])]
449	>>> extractedPitches = []
450	>>> for match in out:
451	... if st[match.start - 7:match.start] == 'Pitch("':
452	... extractedPitches.append(match.text)
453	>>> extractedPitches
454	['C4', 'D5', 'E6', 'Pity("Z9")']
455
456	* New in v9.3.
457	'''
458	if not open or not close:	1✔
459	raise ValueError('Neither open nor close can be empty.')	1✔
460
461	mainMatch = ParenthesesMatch(-1, -1, '', [])	1✔
462	stack: list[ParenthesesMatch] = [mainMatch]	1✔
463
464	lastCharWasBackslash = False	1✔
465
466	i = 0	1✔
467	while i < len(s):	1✔
468	if (not lastCharWasBackslash	1✔
469	and s[i:i + len(open)] == open):
470	curPM = ParenthesesMatch(i + len(open), -1, '', [])	1✔
471	stack.append(curPM)	1✔
472	i += len(open)	1✔
473	continue	1✔
474	elif (not lastCharWasBackslash	1✔
475	and s[i:i + len(close)] == close):
476	if len(stack) <= 1:	1✔
477	raise ValueError(f'Closing {close!r} without {open!r} at index {i}.')	1✔
478	curPM = stack.pop()	1✔
479	curPM.end = i	1✔
480	curPM.text = s[curPM.start:i]	1✔
481	stack[-1].nested.append(curPM)	1✔
482	i += len(close)	1✔
483	continue	1✔
484
485	if s[i] == '\\':	1✔
486	lastCharWasBackslash = not lastCharWasBackslash	1✔
487	else:
488	lastCharWasBackslash = False	1✔
489	i += 1	1✔
490
491	if len(stack) > 1:	1✔
492	raise ValueError(f'Opening {open!r} at index {stack[1].start - 1} was never closed')	1✔
493
494	return mainMatch.nested	1✔
495
496
497	# -----------------------------------------------------------------------------
498	if __name__ == '__main__':
499	import music21
500	music21.mainTest()

cuthbertLab / music21 / 28127151050

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous