5433206976

pending completion

Build # 5433206976

Build Type

Pull #1240

github

Committed by

web-flow

Commit Message

Merge a825cd291 into baf32cb57

Pull Request Pull Request #1240: Apply tuplet to multiple components to express durations like 5/6 or 7/3 QL

Run Details

40 of 40 new or added lines in 4 files covered. (100.0%)

80763 of 86820 relevant lines covered (93.02%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.48

/music21/common/stringTools.py

# -*- coding: utf-8 -*-
# ------------------------------------------------------------------------------
# Name:         common/stringTools.py
# Purpose:      Utilities for strings
#
# Authors:      Michael Scott Asato Cuthbert
#               Christopher Ariza
#
# Copyright:    Copyright © 2009-2015 Michael Scott Asato Cuthbert
# License:      BSD, see license.txt
# ------------------------------------------------------------------------------
'''
Tools for working with strings
'''
from __future__ import annotations

__all__ = [
    'whitespaceEqual',
    'getNumFromStr',
    'hyphenToCamelCase',
    'camelCaseToHyphen',
    'spaceCamelCase',
    'getMd5',
    'formatStr',
    'stripAccents',
    'normalizeFilename',
    'removePunctuation',
    'parenthesesMatch',
    'ParenthesesMatch',
]

import dataclasses
import hashlib
import random
import re
import time
import string
import unicodedata

# ------------------------------------------------------------------------------
WHITESPACE = re.compile(r'\s+')
LINEFEED = re.compile('\n+')


def whitespaceEqual(a: str, b: str) -> bool:
    # noinspection PyShadowingNames
    r'''
    returns True if a and b are equal except for whitespace differences

    >>> a = '    hello \n there '
    >>> b = 'hello there'
    >>> c = ' bye there '
    >>> common.whitespaceEqual(a, b)
    True
    >>> common.whitespaceEqual(a, c)
    False
    '''
    a = WHITESPACE.sub('', a)
    b = WHITESPACE.sub('', b)
    a = LINEFEED.sub('', a)
    b = LINEFEED.sub('', b)
    if a == b:
        return True
    else:
        return False


def getNumFromStr(usrStr: str, numbers: str = '0123456789') -> tuple[str, str]:
    '''
    Given a string, extract any numbers.
    Return two strings, the numbers (as strings) and the remaining characters.

    >>> common.getNumFromStr('23a')
    ('23', 'a')
    >>> common.getNumFromStr('23a954Hello')
    ('23954', 'aHello')
    >>> common.getNumFromStr('')
    ('', '')
    '''
    found = []
    remain = []
    for char in usrStr:
        if char in numbers:
            found.append(char)
        else:
            remain.append(char)
    # returns numbers and then characters
    return ''.join(found), ''.join(remain)


def hyphenToCamelCase(usrStr: str, replacement: str = '-') -> str:
    '''
    Given a hyphen-connected-string, change it to
    a camelCaseConnectedString.

    The replacement can be specified to be something besides a hyphen.

    >>> common.hyphenToCamelCase('movement-name')
    'movementName'

    >>> common.hyphenToCamelCase('movement_name', replacement='_')
    'movementName'

    Safe to call on a string lacking the replacement character:

    >>> common.hyphenToCamelCase('voice')
    'voice'

    And on "words" beginning with numbers:

    >>> common.hyphenToCamelCase('music-21')
    'music21'
    '''
    post = ''
    for i, word in enumerate(usrStr.split(replacement)):
        if i == 0:
            post = word
        else:
            post += word.capitalize()
    return post


def camelCaseToHyphen(usrStr: str, replacement: str = '-') -> str:
    # pylint: disable=line-too-long
    '''
    Given a camel-cased string, or a mixture of numbers and characters,
    create a space separated string.

    The replacement can be specified to be something besides a hyphen, but only
    a single character and not (for internal reasons) an uppercase character.

    code from https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case

    >>> common.camelCaseToHyphen('movementName')
    'movement-name'

    First letter can be uppercase as well:

    >>> common.camelCaseToHyphen('MovementName')
    'movement-name'

    >>> common.camelCaseToHyphen('movementNameName')
    'movement-name-name'

    >>> common.camelCaseToHyphen('fileName', replacement='_')
    'file_name'

    Some things you cannot do:

    >>> common.camelCaseToHyphen('fileName', replacement='NotFound')
    Traceback (most recent call last):
    ValueError: Replacement must be a single character.

    >>> common.camelCaseToHyphen('fileName', replacement='A')
    Traceback (most recent call last):
    ValueError: Replacement cannot be an uppercase character.
    '''
    if len(replacement) != 1:
        raise ValueError('Replacement must be a single character.')
    if replacement.lower() != replacement:
        raise ValueError('Replacement cannot be an uppercase character.')
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1' + replacement + r'\2', usrStr)
    return re.sub('([a-z0-9])([A-Z])', r'\1' + replacement + r'\2', s1).lower()


def spaceCamelCase(usrStr: str, replaceUnderscore=True, fixMeList=None) -> str:
    '''
    Given a camel-cased string, or a mixture of numbers and characters,
    create a space separated string.

    If replaceUnderscore is True (default) then underscores also become spaces (but without the _)

    >>> common.spaceCamelCase('thisIsATest')
    'this Is A Test'
    >>> common.spaceCamelCase('ThisIsATest')
    'This Is A Test'
    >>> common.spaceCamelCase('movement3')
    'movement 3'
    >>> common.spaceCamelCase('opus41no1')
    'opus 41 no 1'
    >>> common.spaceCamelCase('opus23402no219235')
    'opus 23402 no 219235'
    >>> common.spaceCamelCase('opus23402no219235').title()
    'Opus 23402 No 219235'

    There is a small list called fixMeList that can fix mistakes.

    >>> common.spaceCamelCase('PMFC22')
    'PMFC 22'

    >>> common.spaceCamelCase('hello_myke')
    'hello myke'
    >>> common.spaceCamelCase('hello_myke', replaceUnderscore=False)
    'hello_myke'
    '''
    numbers = '0123456789.'
    firstNum = False
    firstChar = False
    isNumber = False
    lastIsNum = False
    post: list[str] = []

    # do not split these...
    if fixMeList is None:
        fixupList = ('PMFC',)
    else:
        fixupList = fixMeList

    for char in usrStr:
        if char in numbers:
            isNumber = True
        else:
            isNumber = False

        if isNumber and not firstNum and not lastIsNum:
            firstNum = True
        else:
            firstNum = False

        # for chars
        if not isNumber and not firstChar and lastIsNum:
            firstChar = True
        else:
            firstChar = False

        if post:
            if char.isupper() or firstNum or firstChar:
                post.append(' ')
            post.append(char)
        else:  # first character
            post.append(char)

        if isNumber:
            lastIsNum = True
        else:
            lastIsNum = False
    postStr = ''.join(post)
    for fixMe in fixupList:
        fixMeSpaced = ' '.join(fixMe)
        postStr = postStr.replace(fixMeSpaced, fixMe)

    if replaceUnderscore:
        postStr = postStr.replace('_', ' ')
    return postStr


def getMd5(value=None) -> str:
    # noinspection SpellCheckingInspection
    '''
    Return an md5 hash from a string.  If no value is given then
    the current time plus a random number is encoded.

    >>> common.getMd5('test')
    '098f6bcd4621d373cade4e832627b4f6'
    '''
    if value is None:
        value = str(time.time()) + str(random.random())
    m = hashlib.md5()
    try:
        m.update(value)
    except TypeError:  # unicode...
        m.update(value.encode('UTF-8'))

    return m.hexdigest()


def formatStr(msg,
              *rest_of_message,
              **keywords) -> str:
    '''
    DEPRECATED: do not use.  May be removed at any time.

    Format one or more data elements into string suitable for printing
    straight to stderr or other outputs

    >>> a = common.formatStr('test', '1', 2, 3)
    >>> print(a)
    test 1 2 3
    <BLANKLINE>
    '''
    msg = [msg, *rest_of_message]
    for i in range(len(msg)):
        x = msg[i]
        if isinstance(x, bytes):
            msg[i] = x.decode('utf-8')
        if not isinstance(x, str):
            try:
                msg[i] = repr(x)
            except TypeError:
                try:
                    msg[i] = x.decode('utf-8')
                except AttributeError:
                    msg[i] = ''
    return ' '.join(msg) + '\n'


def stripAccents(inputString: str) -> str:
    r'''
    removes accents from unicode strings.

    >>> s = 'trés vite'
    >>> 'é' in s
    True
    >>> common.stripAccents(s)
    'tres vite'

    Also handles the German Eszett

    >>> common.stripAccents('Muß')
    'Muss'
    '''
    nfkd_form = unicodedata.normalize('NFKD', inputString).replace('ß', 'ss')
    return ''.join([c for c in nfkd_form if not unicodedata.combining(c)])


def normalizeFilename(name: str) -> str:
    '''
    take a name that might contain unicode characters, punctuation,
    or spaces and
    normalize it so that it is POSIX compliant (except for the limit
    on length).

    Takes in a string or unicode string and returns a string (unicode in Py3)
    without any accented characters.

    >>> common.normalizeFilename('03-Niccolò all’lessandra.not really.xml')
    '03-Niccolo_alllessandra_not_really.xml'
    '''
    extension = None
    lenName = len(name)

    if lenName > 5 and name[-4] == '.':
        extension = str(name[lenName - 4:])
        name = name[:lenName - 4]

    name = stripAccents(name)
    name = name.encode('ascii', 'ignore').decode('UTF-8')
    name = re.sub(r'[^\w-]', '_', name).strip()
    if extension is not None:
        name += extension
    return name


def removePunctuation(s: str) -> str:
    '''
    Remove all punctuation from a string.

    >>> common.removePunctuation('This, is! my (face).')
    'This is my face'
    '''
    maketrans = str.maketrans('', '', string.punctuation)
    out = s.translate(maketrans)
    return out

@dataclasses.dataclass
class ParenthesesMatch:
    start: int
    end: int
    text: str
    nested: list[ParenthesesMatch]

def parenthesesMatch(
    s: str,
    open: str = '(',  # pylint: disable=redefined-builtin
    close: str = ')',
) -> list[ParenthesesMatch]:
    r'''
    Utility tool to return a list of parentheses matches for a string using a dataclass
    called `ParenthesesMatch` which has indices of the `start` and `end`
    of the match, and the `text` of the match, and a set of `nested`
    ParenthesesMatch objects (which may have their own nested objects).

    >>> st = r'Bologne wrote (a (whole) (lot) \(of\)) sym\(ph(on)ies\) concertantes.'
    >>> common.stringTools.parenthesesMatch(st)
    [ParenthesesMatch(start=15, end=37, text='a (whole) (lot) \\(of\\)',
                      nested=[ParenthesesMatch(start=18, end=23, text='whole', nested=[]),
                              ParenthesesMatch(start=26, end=29, text='lot', nested=[])]),
     ParenthesesMatch(start=47, end=49, text='on', nested=[])]

    Other brackets can be used:

    >>> st = r'[Whammy bars] and [oboes] do [not [mix] very] [well.]'
    >>> common.stringTools.parenthesesMatch(st, open='[', close=']')
    [ParenthesesMatch(start=1, end=12, text='Whammy bars', nested=[]),
     ParenthesesMatch(start=19, end=24, text='oboes', nested=[]),
     ParenthesesMatch(start=30, end=44, text='not [mix] very',
                      nested=[ParenthesesMatch(start=35, end=38, text='mix', nested=[])]),
     ParenthesesMatch(start=47, end=52, text='well.', nested=[])]

    The `open` and `close` parameters can be multiple characters:

    >>> st = r'Did you eat <<beans>> today <<Pythagoreas<<?>>>>'
    >>> common.stringTools.parenthesesMatch(st, open='<<', close='>>')
    [ParenthesesMatch(start=14, end=19, text='beans', nested=[]),
     ParenthesesMatch(start=30, end=46, text='Pythagoreas<<?>>',
                      nested=[ParenthesesMatch(start=43, end=44, text='?', nested=[])])]

    They cannot, however, be empty:

    >>> common.stringTools.parenthesesMatch(st, open='', close='')
    Traceback (most recent call last):
    ValueError: Neither open nor close can be empty.

    Unmatched opening or closing parentheses will raise a ValueError:

    >>> common.stringTools.parenthesesMatch('My (parentheses (sometimes (continue',)
    Traceback (most recent call last):
    ValueError:  Opening '(' at index 3 was never closed

    >>> common.stringTools.parenthesesMatch('This is a <bad> example>', open='<', close='>')
    Traceback (most recent call last):
    ValueError: Closing '>' without '<' at index 23.

    Note that using multiple characters like a prefix can have unintended consequences:

    >>> st = r'[Pitch("C4"), [Pitch("D5"), Pitch("E6")], Pitch("Pity("Z9")")]'
    >>> common.stringTools.parenthesesMatch(st, open='Pitch("', close='")')
    Traceback (most recent call last):
    ValueError: Closing '")' without 'Pitch("' at index 59.

    So to do something like this, you might need to get creative:
    >>> out = common.stringTools.parenthesesMatch(st, open='("', close='")')
    >>> out
    [ParenthesesMatch(start=8, end=10, text='C4', nested=[]),
     ParenthesesMatch(start=22, end=24, text='D5', nested=[]),
     ParenthesesMatch(start=35, end=37, text='E6', nested=[]),
     ParenthesesMatch(start=49, end=59, text='Pity("Z9")',
                      nested=[ParenthesesMatch(start=55, end=57, text='Z9', nested=[])])]
    >>> extractedPitches = []
    >>> for match in out:
    ...     if st[match.start - 7:match.start] == 'Pitch("':
    ...          extractedPitches.append(match.text)
    >>> extractedPitches
    ['C4', 'D5', 'E6', 'Pity("Z9")']

    * New in v9.3.
    '''
    if not open or not close:
        raise ValueError('Neither open nor close can be empty.')

    mainMatch = ParenthesesMatch(-1, -1, '', [])
    stack: list[ParenthesesMatch] = [mainMatch]

    lastCharWasBackslash = False

    i = 0
    while i < len(s):
        if (not lastCharWasBackslash
                and s[i:i + len(open)] == open):
            curPM = ParenthesesMatch(i + len(open), -1, '', [])
            stack.append(curPM)
            i += len(open)
            continue
        elif (not lastCharWasBackslash
              and s[i:i + len(close)] == close):
            if len(stack) <= 1:
                raise ValueError(f'Closing {close!r} without {open!r} at index {i}.')
            curPM = stack.pop()
            curPM.end = i
            curPM.text = s[curPM.start:i]
            stack[-1].nested.append(curPM)
            i += len(close)
            continue

        if s[i] == '\\':
            lastCharWasBackslash = not lastCharWasBackslash
        else:
            lastCharWasBackslash = False
        i += 1

    if len(stack) > 1:
        raise ValueError(f'Opening {open!r} at index {stack[1].start-1} was never closed')

    return mainMatch.nested


# -----------------------------------------------------------------------------
if __name__ == '__main__':
    import music21
    music21.mainTest()

1	# -- coding: utf-8 --
2	# ------------------------------------------------------------------------------
3	# Name: common/stringTools.py
4	# Purpose: Utilities for strings
5	#
6	# Authors: Michael Scott Asato Cuthbert
7	# Christopher Ariza
8	#
9	# Copyright: Copyright © 2009-2015 Michael Scott Asato Cuthbert
10	# License: BSD, see license.txt
11	# ------------------------------------------------------------------------------
12	'''	1✔
13	Tools for working with strings
14	'''
15	from __future__ import annotations	1✔
16
17	__all__ = [	1✔
18	'whitespaceEqual',
19	'getNumFromStr',
20	'hyphenToCamelCase',
21	'camelCaseToHyphen',
22	'spaceCamelCase',
23	'getMd5',
24	'formatStr',
25	'stripAccents',
26	'normalizeFilename',
27	'removePunctuation',
28	'parenthesesMatch',
29	'ParenthesesMatch',
30	]
31
32	import dataclasses	1✔
33	import hashlib	1✔
34	import random	1✔
35	import re	1✔
36	import time	1✔
37	import string	1✔
38	import unicodedata	1✔
39
40	# ------------------------------------------------------------------------------
41	WHITESPACE = re.compile(r'\s+')	1✔
42	LINEFEED = re.compile('\n+')	1✔
43
44
45	def whitespaceEqual(a: str, b: str) -> bool:	1✔
46	# noinspection PyShadowingNames
47	r'''
48	returns True if a and b are equal except for whitespace differences
49
50	>>> a = ' hello \n there '
51	>>> b = 'hello there'
52	>>> c = ' bye there '
53	>>> common.whitespaceEqual(a, b)
54	True
55	>>> common.whitespaceEqual(a, c)
56	False
57	'''
58	a = WHITESPACE.sub('', a)	1✔
59	b = WHITESPACE.sub('', b)	1✔
60	a = LINEFEED.sub('', a)	1✔
61	b = LINEFEED.sub('', b)	1✔
62	if a == b:	1✔
63	return True	1✔
64	else:
65	return False	1✔
66
67
68	def getNumFromStr(usrStr: str, numbers: str = '0123456789') -> tuple[str, str]:	1✔
69	'''
70	Given a string, extract any numbers.
71	Return two strings, the numbers (as strings) and the remaining characters.
72
73	>>> common.getNumFromStr('23a')
74	('23', 'a')
75	>>> common.getNumFromStr('23a954Hello')
76	('23954', 'aHello')
77	>>> common.getNumFromStr('')
78	('', '')
79	'''
80	found = []	1✔
81	remain = []	1✔
82	for char in usrStr:	1✔
83	if char in numbers:	1✔
84	found.append(char)	1✔
85	else:
86	remain.append(char)	1✔
87	# returns numbers and then characters
88	return ''.join(found), ''.join(remain)	1✔
89
90
91	def hyphenToCamelCase(usrStr: str, replacement: str = '-') -> str:	1✔
92	'''
93	Given a hyphen-connected-string, change it to
94	a camelCaseConnectedString.
95
96	The replacement can be specified to be something besides a hyphen.
97
98	>>> common.hyphenToCamelCase('movement-name')
99	'movementName'
100
101	>>> common.hyphenToCamelCase('movement_name', replacement='_')
102	'movementName'
103
104	Safe to call on a string lacking the replacement character:
105
106	>>> common.hyphenToCamelCase('voice')
107	'voice'
108
109	And on "words" beginning with numbers:
110
111	>>> common.hyphenToCamelCase('music-21')
112	'music21'
113	'''
114	post = ''	1✔
115	for i, word in enumerate(usrStr.split(replacement)):	1✔
116	if i == 0:	1✔
117	post = word	1✔
118	else:
119	post += word.capitalize()	1✔
120	return post	1✔
121
122
123	def camelCaseToHyphen(usrStr: str, replacement: str = '-') -> str:	1✔
124	# pylint: disable=line-too-long
125	'''
126	Given a camel-cased string, or a mixture of numbers and characters,
127	create a space separated string.
128
129	The replacement can be specified to be something besides a hyphen, but only
130	a single character and not (for internal reasons) an uppercase character.
131
132	code from https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case
133
134	>>> common.camelCaseToHyphen('movementName')
135	'movement-name'
136
137	First letter can be uppercase as well:
138
139	>>> common.camelCaseToHyphen('MovementName')
140	'movement-name'
141
142	>>> common.camelCaseToHyphen('movementNameName')
143	'movement-name-name'
144
145	>>> common.camelCaseToHyphen('fileName', replacement='_')
146	'file_name'
147
148	Some things you cannot do:
149
150	>>> common.camelCaseToHyphen('fileName', replacement='NotFound')
151	Traceback (most recent call last):
152	ValueError: Replacement must be a single character.
153
154	>>> common.camelCaseToHyphen('fileName', replacement='A')
155	Traceback (most recent call last):
156	ValueError: Replacement cannot be an uppercase character.
157	'''
158	if len(replacement) != 1:	1✔
159	raise ValueError('Replacement must be a single character.')	1✔
160	if replacement.lower() != replacement:	1✔
161	raise ValueError('Replacement cannot be an uppercase character.')	1✔
162	s1 = re.sub('(.)([A-Z][a-z]+)', r'\1' + replacement + r'\2', usrStr)	1✔
163	return re.sub('([a-z0-9])([A-Z])', r'\1' + replacement + r'\2', s1).lower()	1✔
164
165
166	def spaceCamelCase(usrStr: str, replaceUnderscore=True, fixMeList=None) -> str:	1✔
167	'''
168	Given a camel-cased string, or a mixture of numbers and characters,
169	create a space separated string.
170
171	If replaceUnderscore is True (default) then underscores also become spaces (but without the _)
172
173	>>> common.spaceCamelCase('thisIsATest')
174	'this Is A Test'
175	>>> common.spaceCamelCase('ThisIsATest')
176	'This Is A Test'
177	>>> common.spaceCamelCase('movement3')
178	'movement 3'
179	>>> common.spaceCamelCase('opus41no1')
180	'opus 41 no 1'
181	>>> common.spaceCamelCase('opus23402no219235')
182	'opus 23402 no 219235'
183	>>> common.spaceCamelCase('opus23402no219235').title()
184	'Opus 23402 No 219235'
185
186	There is a small list called fixMeList that can fix mistakes.
187
188	>>> common.spaceCamelCase('PMFC22')
189	'PMFC 22'
190
191	>>> common.spaceCamelCase('hello_myke')
192	'hello myke'
193	>>> common.spaceCamelCase('hello_myke', replaceUnderscore=False)
194	'hello_myke'
195	'''
196	numbers = '0123456789.'	1✔
197	firstNum = False	1✔
198	firstChar = False	1✔
199	isNumber = False	1✔
200	lastIsNum = False	1✔
201	post: list[str] = []	1✔
202
203	# do not split these...
204	if fixMeList is None:	1✔
205	fixupList = ('PMFC',)	1✔
206	else:
207	fixupList = fixMeList	×
208
209	for char in usrStr:	1✔
210	if char in numbers:	1✔
211	isNumber = True	1✔
212	else:
213	isNumber = False	1✔
214
215	if isNumber and not firstNum and not lastIsNum:	1✔
216	firstNum = True	1✔
217	else:
218	firstNum = False	1✔
219
220	# for chars
221	if not isNumber and not firstChar and lastIsNum:	1✔
222	firstChar = True	1✔
223	else:
224	firstChar = False	1✔
225
226	if post:	1✔
227	if char.isupper() or firstNum or firstChar:	1✔
228	post.append(' ')	1✔
229	post.append(char)	1✔
230	else: # first character
231	post.append(char)	1✔
232
233	if isNumber:	1✔
234	lastIsNum = True	1✔
235	else:
236	lastIsNum = False	1✔
237	postStr = ''.join(post)	1✔
238	for fixMe in fixupList:	1✔
239	fixMeSpaced = ' '.join(fixMe)	1✔
240	postStr = postStr.replace(fixMeSpaced, fixMe)	1✔
241
242	if replaceUnderscore:	1✔
243	postStr = postStr.replace('_', ' ')	1✔
244	return postStr	1✔
245
246
247	def getMd5(value=None) -> str:	1✔
248	# noinspection SpellCheckingInspection
249	'''
250	Return an md5 hash from a string. If no value is given then
251	the current time plus a random number is encoded.
252
253	>>> common.getMd5('test')
254	'098f6bcd4621d373cade4e832627b4f6'
255	'''
256	if value is None:	1✔
257	value = str(time.time()) + str(random.random())	1✔
258	m = hashlib.md5()	1✔
259	try:	1✔
260	m.update(value)	1✔
261	except TypeError: # unicode...	1✔
262	m.update(value.encode('UTF-8'))	1✔
263
264	return m.hexdigest()	1✔
265
266
267	def formatStr(msg,	1✔
268	*rest_of_message,
269	**keywords) -> str:
270	'''
271	DEPRECATED: do not use. May be removed at any time.
272
273	Format one or more data elements into string suitable for printing
274	straight to stderr or other outputs
275
276	>>> a = common.formatStr('test', '1', 2, 3)
277	>>> print(a)
278	test 1 2 3
279	<BLANKLINE>
280	'''
281	msg = [msg, *rest_of_message]	1✔
282	for i in range(len(msg)):	1✔
283	x = msg[i]	1✔
284	if isinstance(x, bytes):	1✔
285	msg[i] = x.decode('utf-8')	×
286	if not isinstance(x, str):	1✔
287	try:	1✔
288	msg[i] = repr(x)	1✔
289	except TypeError:	×
290	try:	×
291	msg[i] = x.decode('utf-8')	×
292	except AttributeError:	×
293	msg[i] = ''	×
294	return ' '.join(msg) + '\n'	1✔
295
296
297	def stripAccents(inputString: str) -> str:	1✔
298	r'''
299	removes accents from unicode strings.
300
301	>>> s = 'trés vite'
302	>>> 'é' in s
303	True
304	>>> common.stripAccents(s)
305	'tres vite'
306
307	Also handles the German Eszett
308
309	>>> common.stripAccents('Muß')
310	'Muss'
311	'''
312	nfkd_form = unicodedata.normalize('NFKD', inputString).replace('ß', 'ss')	1✔
313	return ''.join([c for c in nfkd_form if not unicodedata.combining(c)])	1✔
314
315
316	def normalizeFilename(name: str) -> str:	1✔
317	'''
318	take a name that might contain unicode characters, punctuation,
319	or spaces and
320	normalize it so that it is POSIX compliant (except for the limit
321	on length).
322
323	Takes in a string or unicode string and returns a string (unicode in Py3)
324	without any accented characters.
325
326	>>> common.normalizeFilename('03-Niccolò all’lessandra.not really.xml')
327	'03-Niccolo_alllessandra_not_really.xml'
328	'''
329	extension = None	1✔
330	lenName = len(name)	1✔
331
332	if lenName > 5 and name[-4] == '.':	1✔
333	extension = str(name[lenName - 4:])	1✔
334	name = name[:lenName - 4]	1✔
335
336	name = stripAccents(name)	1✔
337	name = name.encode('ascii', 'ignore').decode('UTF-8')	1✔
338	name = re.sub(r'[^\w-]', '_', name).strip()	1✔
339	if extension is not None:	1✔
340	name += extension	1✔
341	return name	1✔
342
343
344	def removePunctuation(s: str) -> str:	1✔
345	'''
346	Remove all punctuation from a string.
347
348	>>> common.removePunctuation('This, is! my (face).')
349	'This is my face'
350	'''
351	maketrans = str.maketrans('', '', string.punctuation)	1✔
352	out = s.translate(maketrans)	1✔
353	return out	1✔
354
355	@dataclasses.dataclass	1✔
356	class ParenthesesMatch:	1✔
357	start: int	1✔
358	end: int	1✔
359	text: str	1✔
360	nested: list[ParenthesesMatch]	1✔
361
362	def parenthesesMatch(	1✔
363	s: str,
364	open: str = '(', # pylint: disable=redefined-builtin
365	close: str = ')',
366	) -> list[ParenthesesMatch]:
367	r'''
368	Utility tool to return a list of parentheses matches for a string using a dataclass
369	called `ParenthesesMatch` which has indices of the `start` and `end`
370	of the match, and the `text` of the match, and a set of `nested`
371	ParenthesesMatch objects (which may have their own nested objects).
372
373	>>> st = r'Bologne wrote (a (whole) (lot) \(of\)) sym\(ph(on)ies\) concertantes.'
374	>>> common.stringTools.parenthesesMatch(st)
375	[ParenthesesMatch(start=15, end=37, text='a (whole) (lot) \\(of\\)',
376	nested=[ParenthesesMatch(start=18, end=23, text='whole', nested=[]),
377	ParenthesesMatch(start=26, end=29, text='lot', nested=[])]),
378	ParenthesesMatch(start=47, end=49, text='on', nested=[])]
379
380	Other brackets can be used:
381
382	>>> st = r'[Whammy bars] and [oboes] do [not [mix] very] [well.]'
383	>>> common.stringTools.parenthesesMatch(st, open='[', close=']')
384	[ParenthesesMatch(start=1, end=12, text='Whammy bars', nested=[]),
385	ParenthesesMatch(start=19, end=24, text='oboes', nested=[]),
386	ParenthesesMatch(start=30, end=44, text='not [mix] very',
387	nested=[ParenthesesMatch(start=35, end=38, text='mix', nested=[])]),
388	ParenthesesMatch(start=47, end=52, text='well.', nested=[])]
389
390	The `open` and `close` parameters can be multiple characters:
391
392	>>> st = r'Did you eat <<beans>> today <<Pythagoreas<<?>>>>'
393	>>> common.stringTools.parenthesesMatch(st, open='<<', close='>>')
394	[ParenthesesMatch(start=14, end=19, text='beans', nested=[]),
395	ParenthesesMatch(start=30, end=46, text='Pythagoreas<<?>>',
396	nested=[ParenthesesMatch(start=43, end=44, text='?', nested=[])])]
397
398	They cannot, however, be empty:
399
400	>>> common.stringTools.parenthesesMatch(st, open='', close='')
401	Traceback (most recent call last):
402	ValueError: Neither open nor close can be empty.
403
404	Unmatched opening or closing parentheses will raise a ValueError:
405
406	>>> common.stringTools.parenthesesMatch('My (parentheses (sometimes (continue',)
407	Traceback (most recent call last):
408	ValueError: Opening '(' at index 3 was never closed
409
410	>>> common.stringTools.parenthesesMatch('This is a <bad> example>', open='<', close='>')
411	Traceback (most recent call last):
412	ValueError: Closing '>' without '<' at index 23.
413
414	Note that using multiple characters like a prefix can have unintended consequences:
415
416	>>> st = r'[Pitch("C4"), [Pitch("D5"), Pitch("E6")], Pitch("Pity("Z9")")]'
417	>>> common.stringTools.parenthesesMatch(st, open='Pitch("', close='")')
418	Traceback (most recent call last):
419	ValueError: Closing '")' without 'Pitch("' at index 59.
420
421	So to do something like this, you might need to get creative:
422	>>> out = common.stringTools.parenthesesMatch(st, open='("', close='")')
423	>>> out
424	[ParenthesesMatch(start=8, end=10, text='C4', nested=[]),
425	ParenthesesMatch(start=22, end=24, text='D5', nested=[]),
426	ParenthesesMatch(start=35, end=37, text='E6', nested=[]),
427	ParenthesesMatch(start=49, end=59, text='Pity("Z9")',
428	nested=[ParenthesesMatch(start=55, end=57, text='Z9', nested=[])])]
429	>>> extractedPitches = []
430	>>> for match in out:
431	... if st[match.start - 7:match.start] == 'Pitch("':
432	... extractedPitches.append(match.text)
433	>>> extractedPitches
434	['C4', 'D5', 'E6', 'Pity("Z9")']
435
436	* New in v9.3.
437	'''
438	if not open or not close:	1✔
439	raise ValueError('Neither open nor close can be empty.')	1✔
440
441	mainMatch = ParenthesesMatch(-1, -1, '', [])	1✔
442	stack: list[ParenthesesMatch] = [mainMatch]	1✔
443
444	lastCharWasBackslash = False	1✔
445
446	i = 0	1✔
447	while i < len(s):	1✔
448	if (not lastCharWasBackslash	1✔
449	and s[i:i + len(open)] == open):
450	curPM = ParenthesesMatch(i + len(open), -1, '', [])	1✔
451	stack.append(curPM)	1✔
452	i += len(open)	1✔
453	continue	1✔
454	elif (not lastCharWasBackslash	1✔
455	and s[i:i + len(close)] == close):
456	if len(stack) <= 1:	1✔
457	raise ValueError(f'Closing {close!r} without {open!r} at index {i}.')	1✔
458	curPM = stack.pop()	1✔
459	curPM.end = i	1✔
460	curPM.text = s[curPM.start:i]	1✔
461	stack[-1].nested.append(curPM)	1✔
462	i += len(close)	1✔
463	continue	1✔
464
465	if s[i] == '\\':	1✔
466	lastCharWasBackslash = not lastCharWasBackslash	1✔
467	else:
468	lastCharWasBackslash = False	1✔
469	i += 1	1✔
470
471	if len(stack) > 1:	1✔
472	raise ValueError(f'Opening {open!r} at index {stack[1].start-1} was never closed')	1✔
473
474	return mainMatch.nested	1✔
475
476
477	# -----------------------------------------------------------------------------
478	if __name__ == '__main__':
479	import music21
480	music21.mainTest()

cuthbertLab / music21 / 5433206976

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous