cengal.text_processing.help_tools.versions.v_0.help_tools

View Source

  1#!/usr/bin/env python
  2# coding=utf-8
  3
  4# Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space>
  5# 
  6# Licensed under the Apache License, Version 2.0 (the "License");
  7# you may not use this file except in compliance with the License.
  8# You may obtain a copy of the License at
  9# 
 10#     http://www.apache.org/licenses/LICENSE-2.0
 11# 
 12# Unless required by applicable law or agreed to in writing, software
 13# distributed under the License is distributed on an "AS IS" BASIS,
 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15# See the License for the specific language governing permissions and
 16# limitations under the License.
 17
 18from cengal.code_flow_control.smart_values.versions.v_0 import ResultExistence
 19import string
 20from cengal.data_manipulation.conversion.binary import ubyte_to_bytes
 21from cengal.data_manipulation.conversion.sequence import get_slice_from_array
 22from typing import Tuple, Union
 23
 24"""
 25Module Docstring
 26Docstrings: http://www.python.org/dev/peps/pep-0257/
 27"""
 28
 29__author__ = "ButenkoMS <gtalk@butenkoms.space>"
 30__copyright__ = "Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space>"
 31__credits__ = ["ButenkoMS <gtalk@butenkoms.space>", ]
 32__license__ = "Apache License, Version 2.0"
 33__version__ = "4.4.1"
 34__maintainer__ = "ButenkoMS <gtalk@butenkoms.space>"
 35__email__ = "gtalk@butenkoms.space"
 36# __status__ = "Prototype"
 37__status__ = "Development"
 38# __status__ = "Production"
 39
 40
 41class AbstractSetOfSymbols:
 42    def __init__(self):
 43        self.ascii_word_delimiters = None
 44        self.ascii_word_delimiters__bytes = None
 45        self.ascii_word_delimiters__set = None
 46        self.ascii_word_delimiters__set_bytes = None
 47
 48        # without "_" symbol
 49        self.ascii_modern_word_delimiters = None
 50        self.ascii_modern_word_delimiters__bytes = None
 51        self.ascii_modern_word_delimiters__set = None
 52        self.ascii_modern_word_delimiters__set_bytes = None
 53
 54
 55class SetOfSymbols(AbstractSetOfSymbols):
 56    def __init__(self):
 57        super(SetOfSymbols, self).__init__()
 58        self.ascii_word_delimiters = string.punctuation + string.whitespace
 59        self.ascii_word_delimiters__bytes = self.ascii_word_delimiters.encode()
 60        self.ascii_word_delimiters__set = set(self.ascii_word_delimiters)
 61        self.ascii_word_delimiters__set_bytes = set()
 62        for delim_char in self.ascii_word_delimiters__set:
 63            self.ascii_word_delimiters__set_bytes.add(delim_char.encode())
 64
 65        self.ascii_modern_word_delimiters__set = set(self.ascii_word_delimiters__set)
 66        self.ascii_modern_word_delimiters__set.remove('_')
 67        self.ascii_modern_word_delimiters = ''.join(self.ascii_modern_word_delimiters__set)
 68        self.ascii_modern_word_delimiters__bytes = self.ascii_modern_word_delimiters.encode()
 69        self.ascii_modern_word_delimiters__set_bytes = set()
 70        for delim_char in self.ascii_modern_word_delimiters__set:
 71            self.ascii_modern_word_delimiters__set_bytes.add(delim_char.encode())
 72
 73
 74SET_OF_SYMBOLS = SetOfSymbols()
 75
 76
 77def get_text_in_brackets(data, left_b, right_b):
 78    # TODO: если в строке не найдена закрывающая скобка - последний символ строки будет удален. Проверить, не ломает ли
 79    # такое поведение алгоритмы в частности в upk-утилитах и в UCB-компиляторе
 80    left_offset = data.find(left_b)
 81    data = data[left_offset + len(left_b):]
 82    right_offset = data.find(right_b)
 83    data = data[:right_offset]
 84    return data
 85
 86
 87def get_text_in_brackets_offset(data, left_b, right_b, offset=0):
 88    # TODO: если в строке не найдена закрывающая скобка - последний символ строки будет удален. Проверить, не ломает ли
 89    # такое поведение алгоритмы в частности в upk-утилитах и в UCB-компиляторе
 90    result = None
 91    result_data = None
 92    result_offset = None
 93    if offset > 0:
 94        data = data[offset:]
 95    left_b_len = len(left_b)
 96    right_b_len = len(right_b)
 97    left_offset = data.find(left_b)
 98    data = data[left_offset + left_b_len:]
 99    right_offset = data.find(right_b)
100    result_data = data[:right_offset]
101    result_offset = offset + left_offset + left_b_len + right_offset + right_b_len
102    result = (result_data, result_offset)
103    return result
104
105
106def detach_slice_from_string(string, substring, offset=0):
107    slice_index = string.index(substring, offset)
108    slice_size = len(substring)
109    end_of_slice = slice_index + slice_size
110    string_before = string[:slice_index]
111    string_slice = string[slice_index:end_of_slice]
112    string_after = string[end_of_slice:]
113    result = (string_before, string_slice, string_after, end_of_slice)
114    return result
115
116
117def detach_slice_from_string__case_insensitive(string, substring, offset=0):
118    lowercase_string = string.lower()
119    lowercase_substring = substring.lower()
120    slice_index = lowercase_string.index(lowercase_substring, offset)
121    slice_size = len(substring)
122    end_of_slice = slice_index + slice_size
123    string_before = string[:slice_index]
124    string_slice = string[slice_index:end_of_slice]
125    string_after = string[end_of_slice:]
126    result = (string_before, string_slice, string_after, end_of_slice)
127    return result
128
129
130def find_substring(full_string: bytes, substring: bytes, offset: int=0)\
131        ->Tuple[Union[None, int], Union[None, int]]:
132    start_index = full_string.find(substring, offset)
133    word_start = True
134    if 0 > start_index:
135        word_start = False
136
137    end_index = start_index + len(substring)
138
139    if not word_start:
140        start_index = None
141        end_index = None
142
143    return start_index, end_index
144
145
146def find_substring_full_word(full_string: bytes, substring: bytes, offset: int=0, smart_word_bounds: bool=False)\
147        ->Tuple[ResultExistence, ResultExistence]:
148    start_index = ResultExistence(False, 0)
149    end_index = ResultExistence(False, None)
150
151    while start_index.result is not None:
152        start_index, end_index = find_substring_full_word__one_shot(full_string, substring, offset, smart_word_bounds)
153        if start_index.result is not None:
154            # substring was found
155            if start_index and end_index:
156                # full word was found
157                break
158            else:
159                # need to continue search from the new offset
160                offset = end_index.result
161        else:
162            # substring wasn't found
163            break
164
165    if start_index:
166        start_index = start_index.result
167    else:
168        start_index = None
169
170    if end_index:
171        end_index = end_index.result
172    else:
173        end_index = None
174
175    return start_index, end_index
176
177
178class FindSubstringErrorFullStringCanNotBeEmpty(Exception):
179    pass
180
181
182class FindSubstringErrorSubstringCanNotBeEmpty(Exception):
183    pass
184
185
186def find_substring_full_word__one_shot(full_string: bytes, substring: bytes, offset: int=0,
187                                       smart_word_bounds: bool=False)\
188        ->Tuple[ResultExistence, ResultExistence]:
189    delimiters = SET_OF_SYMBOLS.ascii_modern_word_delimiters__set_bytes
190
191    word_start = ResultExistence(False, None)
192    word_end = ResultExistence(False, None)
193
194    if not full_string:
195        return word_start, word_end
196    if not substring:
197        return word_start, word_end
198
199    is_word_start_is_delimiter = False
200    is_word_end_is_delimiter = False
201    if smart_word_bounds:
202        if ubyte_to_bytes(substring[0]) in delimiters:
203            is_word_start_is_delimiter = True
204        if ubyte_to_bytes(substring[-1]) in delimiters:
205            is_word_end_is_delimiter = True
206
207    word_start = ResultExistence(False, full_string.find(substring, offset))
208    word_end = ResultExistence(False, None)
209
210    if 0 > word_start.result:
211        word_start.existence = False
212        word_start.result = None
213    else:
214        if is_word_start_is_delimiter:
215            word_start.existence = True
216        elif 0 == word_start.result:
217            word_start.existence = True
218        elif ubyte_to_bytes(full_string[word_start.result - 1]) in delimiters:
219            word_start.existence = True
220
221    if word_start.result is not None:
222        word_end.result = word_start.result + len(substring)
223        full_string_len = len(full_string)
224        if is_word_end_is_delimiter:
225            word_start.existence = True
226        elif word_end.result > full_string_len:
227            word_end.existence = False
228        elif word_end.result == full_string_len:
229            word_end.existence = True
230        elif ubyte_to_bytes(full_string[word_end.result]) in delimiters:
231            word_end.existence = True
232
233    return word_start, word_end
234
235
236def check_is_slice_is_in_string(substring: bytes, full_string: bytes, check_whole_word=False,
237                                smart_word_bounds: bool=False):
238    if check_whole_word:
239        word_start, word_end = find_substring_full_word(full_string, substring, smart_word_bounds=smart_word_bounds)
240        if (word_start is not None) and (word_end is not None):
241            return True
242        else:
243            return False
244    else:
245        return substring in full_string
246
247
248def check_is_slice_is_in_string__case_insensitive(substring: bytes, full_string: bytes, check_whole_word=False,
249                                                  smart_word_bounds: bool=False):
250    return check_is_slice_is_in_string(substring.lower(), full_string.lower(), check_whole_word, smart_word_bounds)
251
252
253def detach_all_slices_from_string(string, substring, function__detach=None, function__check_is_in=None):
254    """
255    :param string: input string
256    :param substring: desired substring
257    :param function__detach: detach_slice_from_string (when None) or detach_slice_from_string__case_insensitive
258    :param function__check_is_in: check_is_slice_is_in_string (when None) or
259        check_is_slice_is_in_string__case_insensitive
260    :return: ([(original_string_part_0, string_slice), (original_string_part_1, string_slice), ...,
261        (original_string_part_N, string_slice)], string_after)
262    """
263    function__detach = function__detach or detach_slice_from_string
264    function__check_is_in = function__check_is_in or check_is_slice_is_in_string
265
266    result_list = list()
267    result = function__detach(string, substring)
268    last_string_after = result[2]
269    new_result = (result[0], result[1])
270    result_list.append(new_result)
271    while function__check_is_in(substring, result[2]):
272        result = function__detach(result[2], substring)
273        last_string_after = result[2]
274        new_result = (result[0], result[1])
275        result_list.append(new_result)
276    result = (result_list, last_string_after)
277    return result
278
279
280def detach_all_slices_from_string__case_insensitive(string, substring):
281    return detach_all_slices_from_string(
282        string, substring, detach_slice_from_string__case_insensitive, check_is_slice_is_in_string__case_insensitive)
283
284
285def is_printable(s, codec='utf8'):
286    try:
287        s.decode(codec)
288    except UnicodeDecodeError:
289        return False
290    else: 
291        return True
292
293
294def bytes_to_printable(bytes_data):
295    result = str(bytes_data)[2:-1]
296    return result
297
298
299def levenshtein_distance(a, b):
300    "Calculates the Levenshtein distance between a and b."
301    n, m = len(a), len(b)
302    if n > m:
303        # Make sure n <= m, to use O(min(n,m)) space
304        a, b = b, a
305        n, m = m, n
306
307    current_row = range(n+1) # Keep current and previous row, not entire matrix
308    for i in range(1, m+1):
309        previous_row, current_row = current_row, [i]+[0]*n
310        for j in range(1,n+1):
311            add, delete, change = previous_row[j]+1, current_row[j-1]+1, previous_row[j-1]
312            if a[j-1] != b[i-1]:
313                change += 1
314            current_row[j] = min(add, delete, change)
315
316    return current_row[n]
317
318
319def un_escape_str(text: str) -> str:
320    return bytes(text, 'utf-8').decode('unicode_escape')

class AbstractSetOfSymbols: View Source

42class AbstractSetOfSymbols:
43    def __init__(self):
44        self.ascii_word_delimiters = None
45        self.ascii_word_delimiters__bytes = None
46        self.ascii_word_delimiters__set = None
47        self.ascii_word_delimiters__set_bytes = None
48
49        # without "_" symbol
50        self.ascii_modern_word_delimiters = None
51        self.ascii_modern_word_delimiters__bytes = None
52        self.ascii_modern_word_delimiters__set = None
53        self.ascii_modern_word_delimiters__set_bytes = None

ascii_word_delimiters

ascii_word_delimiters__bytes

ascii_word_delimiters__set

ascii_word_delimiters__set_bytes

ascii_modern_word_delimiters

ascii_modern_word_delimiters__bytes

ascii_modern_word_delimiters__set

ascii_modern_word_delimiters__set_bytes

class SetOfSymbols(AbstractSetOfSymbols): View Source

56class SetOfSymbols(AbstractSetOfSymbols):
57    def __init__(self):
58        super(SetOfSymbols, self).__init__()
59        self.ascii_word_delimiters = string.punctuation + string.whitespace
60        self.ascii_word_delimiters__bytes = self.ascii_word_delimiters.encode()
61        self.ascii_word_delimiters__set = set(self.ascii_word_delimiters)
62        self.ascii_word_delimiters__set_bytes = set()
63        for delim_char in self.ascii_word_delimiters__set:
64            self.ascii_word_delimiters__set_bytes.add(delim_char.encode())
65
66        self.ascii_modern_word_delimiters__set = set(self.ascii_word_delimiters__set)
67        self.ascii_modern_word_delimiters__set.remove('_')
68        self.ascii_modern_word_delimiters = ''.join(self.ascii_modern_word_delimiters__set)
69        self.ascii_modern_word_delimiters__bytes = self.ascii_modern_word_delimiters.encode()
70        self.ascii_modern_word_delimiters__set_bytes = set()
71        for delim_char in self.ascii_modern_word_delimiters__set:
72            self.ascii_modern_word_delimiters__set_bytes.add(delim_char.encode())

ascii_word_delimiters

ascii_word_delimiters__bytes

ascii_word_delimiters__set

ascii_word_delimiters__set_bytes

ascii_modern_word_delimiters__set

ascii_modern_word_delimiters

ascii_modern_word_delimiters__bytes

ascii_modern_word_delimiters__set_bytes

SET_OF_SYMBOLS = <SetOfSymbols object>

def get_text_in_brackets(data, left_b, right_b): View Source

78def get_text_in_brackets(data, left_b, right_b):
79    # TODO: если в строке не найдена закрывающая скобка - последний символ строки будет удален. Проверить, не ломает ли
80    # такое поведение алгоритмы в частности в upk-утилитах и в UCB-компиляторе
81    left_offset = data.find(left_b)
82    data = data[left_offset + len(left_b):]
83    right_offset = data.find(right_b)
84    data = data[:right_offset]
85    return data

def get_text_in_brackets_offset(data, left_b, right_b, offset=0): View Source

 88def get_text_in_brackets_offset(data, left_b, right_b, offset=0):
 89    # TODO: если в строке не найдена закрывающая скобка - последний символ строки будет удален. Проверить, не ломает ли
 90    # такое поведение алгоритмы в частности в upk-утилитах и в UCB-компиляторе
 91    result = None
 92    result_data = None
 93    result_offset = None
 94    if offset > 0:
 95        data = data[offset:]
 96    left_b_len = len(left_b)
 97    right_b_len = len(right_b)
 98    left_offset = data.find(left_b)
 99    data = data[left_offset + left_b_len:]
100    right_offset = data.find(right_b)
101    result_data = data[:right_offset]
102    result_offset = offset + left_offset + left_b_len + right_offset + right_b_len
103    result = (result_data, result_offset)
104    return result

def detach_slice_from_string(string, substring, offset=0): View Source

107def detach_slice_from_string(string, substring, offset=0):
108    slice_index = string.index(substring, offset)
109    slice_size = len(substring)
110    end_of_slice = slice_index + slice_size
111    string_before = string[:slice_index]
112    string_slice = string[slice_index:end_of_slice]
113    string_after = string[end_of_slice:]
114    result = (string_before, string_slice, string_after, end_of_slice)
115    return result

def detach_slice_from_string__case_insensitive(string, substring, offset=0): View Source

118def detach_slice_from_string__case_insensitive(string, substring, offset=0):
119    lowercase_string = string.lower()
120    lowercase_substring = substring.lower()
121    slice_index = lowercase_string.index(lowercase_substring, offset)
122    slice_size = len(substring)
123    end_of_slice = slice_index + slice_size
124    string_before = string[:slice_index]
125    string_slice = string[slice_index:end_of_slice]
126    string_after = string[end_of_slice:]
127    result = (string_before, string_slice, string_after, end_of_slice)
128    return result

def find_substring( full_string: bytes, substring: bytes, offset: int = 0) -> Tuple[Union[NoneType, int], Union[NoneType, int]]: View Source

131def find_substring(full_string: bytes, substring: bytes, offset: int=0)\
132        ->Tuple[Union[None, int], Union[None, int]]:
133    start_index = full_string.find(substring, offset)
134    word_start = True
135    if 0 > start_index:
136        word_start = False
137
138    end_index = start_index + len(substring)
139
140    if not word_start:
141        start_index = None
142        end_index = None
143
144    return start_index, end_index

def find_substring_full_word( full_string: bytes, substring: bytes, offset: int = 0, smart_word_bounds: bool = False) -> Tuple[cengal.code_flow_control.smart_values.versions.v_0.result_types.ResultExistence, cengal.code_flow_control.smart_values.versions.v_0.result_types.ResultExistence]: View Source

147def find_substring_full_word(full_string: bytes, substring: bytes, offset: int=0, smart_word_bounds: bool=False)\
148        ->Tuple[ResultExistence, ResultExistence]:
149    start_index = ResultExistence(False, 0)
150    end_index = ResultExistence(False, None)
151
152    while start_index.result is not None:
153        start_index, end_index = find_substring_full_word__one_shot(full_string, substring, offset, smart_word_bounds)
154        if start_index.result is not None:
155            # substring was found
156            if start_index and end_index:
157                # full word was found
158                break
159            else:
160                # need to continue search from the new offset
161                offset = end_index.result
162        else:
163            # substring wasn't found
164            break
165
166    if start_index:
167        start_index = start_index.result
168    else:
169        start_index = None
170
171    if end_index:
172        end_index = end_index.result
173    else:
174        end_index = None
175
176    return start_index, end_index

class FindSubstringErrorFullStringCanNotBeEmpty(builtins.Exception): View Source

179class FindSubstringErrorFullStringCanNotBeEmpty(Exception):
180    pass

Common base class for all non-exit exceptions.

Inherited Members

builtins.Exception: Exception
builtins.BaseException: with_traceback; args

class FindSubstringErrorSubstringCanNotBeEmpty(builtins.Exception): View Source

183class FindSubstringErrorSubstringCanNotBeEmpty(Exception):
184    pass

Common base class for all non-exit exceptions.

Inherited Members

builtins.Exception: Exception
builtins.BaseException: with_traceback; args

def find_substring_full_word__one_shot( full_string: bytes, substring: bytes, offset: int = 0, smart_word_bounds: bool = False) -> Tuple[cengal.code_flow_control.smart_values.versions.v_0.result_types.ResultExistence, cengal.code_flow_control.smart_values.versions.v_0.result_types.ResultExistence]: View Source

187def find_substring_full_word__one_shot(full_string: bytes, substring: bytes, offset: int=0,
188                                       smart_word_bounds: bool=False)\
189        ->Tuple[ResultExistence, ResultExistence]:
190    delimiters = SET_OF_SYMBOLS.ascii_modern_word_delimiters__set_bytes
191
192    word_start = ResultExistence(False, None)
193    word_end = ResultExistence(False, None)
194
195    if not full_string:
196        return word_start, word_end
197    if not substring:
198        return word_start, word_end
199
200    is_word_start_is_delimiter = False
201    is_word_end_is_delimiter = False
202    if smart_word_bounds:
203        if ubyte_to_bytes(substring[0]) in delimiters:
204            is_word_start_is_delimiter = True
205        if ubyte_to_bytes(substring[-1]) in delimiters:
206            is_word_end_is_delimiter = True
207
208    word_start = ResultExistence(False, full_string.find(substring, offset))
209    word_end = ResultExistence(False, None)
210
211    if 0 > word_start.result:
212        word_start.existence = False
213        word_start.result = None
214    else:
215        if is_word_start_is_delimiter:
216            word_start.existence = True
217        elif 0 == word_start.result:
218            word_start.existence = True
219        elif ubyte_to_bytes(full_string[word_start.result - 1]) in delimiters:
220            word_start.existence = True
221
222    if word_start.result is not None:
223        word_end.result = word_start.result + len(substring)
224        full_string_len = len(full_string)
225        if is_word_end_is_delimiter:
226            word_start.existence = True
227        elif word_end.result > full_string_len:
228            word_end.existence = False
229        elif word_end.result == full_string_len:
230            word_end.existence = True
231        elif ubyte_to_bytes(full_string[word_end.result]) in delimiters:
232            word_end.existence = True
233
234    return word_start, word_end

def check_is_slice_is_in_string( substring: bytes, full_string: bytes, check_whole_word=False, smart_word_bounds: bool = False): View Source

237def check_is_slice_is_in_string(substring: bytes, full_string: bytes, check_whole_word=False,
238                                smart_word_bounds: bool=False):
239    if check_whole_word:
240        word_start, word_end = find_substring_full_word(full_string, substring, smart_word_bounds=smart_word_bounds)
241        if (word_start is not None) and (word_end is not None):
242            return True
243        else:
244            return False
245    else:
246        return substring in full_string

def check_is_slice_is_in_string__case_insensitive( substring: bytes, full_string: bytes, check_whole_word=False, smart_word_bounds: bool = False): View Source

249def check_is_slice_is_in_string__case_insensitive(substring: bytes, full_string: bytes, check_whole_word=False,
250                                                  smart_word_bounds: bool=False):
251    return check_is_slice_is_in_string(substring.lower(), full_string.lower(), check_whole_word, smart_word_bounds)

def detach_all_slices_from_string(string, substring, function__detach=None, function__check_is_in=None): View Source

254def detach_all_slices_from_string(string, substring, function__detach=None, function__check_is_in=None):
255    """
256    :param string: input string
257    :param substring: desired substring
258    :param function__detach: detach_slice_from_string (when None) or detach_slice_from_string__case_insensitive
259    :param function__check_is_in: check_is_slice_is_in_string (when None) or
260        check_is_slice_is_in_string__case_insensitive
261    :return: ([(original_string_part_0, string_slice), (original_string_part_1, string_slice), ...,
262        (original_string_part_N, string_slice)], string_after)
263    """
264    function__detach = function__detach or detach_slice_from_string
265    function__check_is_in = function__check_is_in or check_is_slice_is_in_string
266
267    result_list = list()
268    result = function__detach(string, substring)
269    last_string_after = result[2]
270    new_result = (result[0], result[1])
271    result_list.append(new_result)
272    while function__check_is_in(substring, result[2]):
273        result = function__detach(result[2], substring)
274        last_string_after = result[2]
275        new_result = (result[0], result[1])
276        result_list.append(new_result)
277    result = (result_list, last_string_after)
278    return result

:param string: input string :param substring: desired substring :param function__detach: detach_slice_from_string (when None) or detach_slice_from_string__case_insensitive :param function__check_is_in: check_is_slice_is_in_string (when None) or check_is_slice_is_in_string__case_insensitive :return: ([(original_string_part_0, string_slice), (original_string_part_1, string_slice), ..., (original_string_part_N, string_slice)], string_after)

def detach_all_slices_from_string__case_insensitive(string, substring): View Source

281def detach_all_slices_from_string__case_insensitive(string, substring):
282    return detach_all_slices_from_string(
283        string, substring, detach_slice_from_string__case_insensitive, check_is_slice_is_in_string__case_insensitive)

def is_printable(s, codec='utf8'): View Source

286def is_printable(s, codec='utf8'):
287    try:
288        s.decode(codec)
289    except UnicodeDecodeError:
290        return False
291    else: 
292        return True

def bytes_to_printable(bytes_data): View Source

295def bytes_to_printable(bytes_data):
296    result = str(bytes_data)[2:-1]
297    return result

def levenshtein_distance(a, b): View Source

300def levenshtein_distance(a, b):
301    "Calculates the Levenshtein distance between a and b."
302    n, m = len(a), len(b)
303    if n > m:
304        # Make sure n <= m, to use O(min(n,m)) space
305        a, b = b, a
306        n, m = m, n
307
308    current_row = range(n+1) # Keep current and previous row, not entire matrix
309    for i in range(1, m+1):
310        previous_row, current_row = current_row, [i]+[0]*n
311        for j in range(1,n+1):
312            add, delete, change = previous_row[j]+1, current_row[j-1]+1, previous_row[j-1]
313            if a[j-1] != b[i-1]:
314                change += 1
315            current_row[j] = min(add, delete, change)
316
317    return current_row[n]

Calculates the Levenshtein distance between a and b.

def un_escape_str(text: str) -> str: View Source

320def un_escape_str(text: str) -> str:
321    return bytes(text, 'utf-8').decode('unicode_escape')