cengal.text_processing.open_text_file.versions.v_0.open_text_file

  1#!/usr/bin/env python
  2# coding=utf-8
  3
  4# Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space>
  5# 
  6# Licensed under the Apache License, Version 2.0 (the "License");
  7# you may not use this file except in compliance with the License.
  8# You may obtain a copy of the License at
  9# 
 10#     http://www.apache.org/licenses/LICENSE-2.0
 11# 
 12# Unless required by applicable law or agreed to in writing, software
 13# distributed under the License is distributed on an "AS IS" BASIS,
 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15# See the License for the specific language governing permissions and
 16# limitations under the License.
 17
 18
 19__all__ = [
 20    'OpenTextFile',
 21    'TextFileInfo',
 22]
 23
 24
 25"""
 26Module Docstring
 27Docstrings: http://www.python.org/dev/peps/pep-0257/
 28"""
 29
 30__author__ = "ButenkoMS <gtalk@butenkoms.space>"
 31__copyright__ = "Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space>"
 32__credits__ = ["ButenkoMS <gtalk@butenkoms.space>", ]
 33__license__ = "Apache License, Version 2.0"
 34__version__ = "4.4.1"
 35__maintainer__ = "ButenkoMS <gtalk@butenkoms.space>"
 36__email__ = "gtalk@butenkoms.space"
 37# __status__ = "Prototype"
 38__status__ = "Development"
 39# __status__ = "Production"
 40
 41
 42from cengal.text_processing.encoding_detection import detect_and_decode
 43from cengal.code_flow_control.smart_values import ValueHolder
 44
 45from pathlib import PurePath
 46from os import PathLike
 47from io import IOBase
 48from typing import Union, Tuple, Optional, NamedTuple
 49
 50
 51class TextFileInfo(NamedTuple):
 52    text: ValueHolder[str]
 53    encoding: str
 54    bom_bytes: bytes
 55    bin_data: bytes
 56    file: IOBase
 57
 58
 59class OpenTextFile:
 60    def __init__(self, 
 61                file: Union[str, bytes, PathLike],
 62                mode: str = 'r+b',
 63                buffering: int = -1,
 64                encoding = None,
 65                errors = None,
 66                newline = None,
 67                closefd = True,
 68                opener = None,
 69                detect_as_utf8_when_possible: bool = True,
 70                check_text_for_utf8_compliance: bool = True,
 71                ):
 72        self.file: Union[str, bytes, PathLike] = file
 73        self.mode: str = mode
 74        self.buffering = buffering
 75        self.encoding = encoding
 76        self.proposed_encoding = encoding
 77        self.errors = errors
 78        self.newline = newline
 79        self.closefd = closefd
 80        self.opener = opener
 81        self.detect_as_utf8_when_possible: bool = detect_as_utf8_when_possible
 82        self.check_text_for_utf8_compliance: bool = check_text_for_utf8_compliance
 83        self.readable: bool = ('r' in mode) or ('+' in mode)
 84        self.writtable: bool = ('w' in mode) or ('a' in mode) or ('+' in mode)
 85        self.binary_mode: bool = 'b' in mode
 86        if self.binary_mode:
 87            self.encoding = None
 88        
 89        self.text_value_holder: Optional[ValueHolder[str]] = None
 90        self.detected_encoding: str = None
 91        self.bom_bytes: bytes = None
 92    
 93    def __enter__(self) -> TextFileInfo:
 94        if (not self.readable) and (not self.proposed_encoding):
 95            raise ValueError('Encoding must be specified for files without reading mode')
 96
 97        self.file = open(self.file, self.mode, self.buffering, self.encoding, self.errors, self.newline, self.closefd, self.opener)
 98        if self.binary_mode:
 99            if self.readable:
100                bin_data: bytes = self.file.read()
101                text, encoding, bom_bytes = detect_and_decode(bin_data, self.detect_as_utf8_when_possible, self.check_text_for_utf8_compliance)
102            else:
103                bin_data = bytes()
104                text = str()
105                encoding = self.proposed_encoding
106                bom_bytes = bytes()
107            
108            self.detected_encoding = encoding
109            self.bom_bytes = bom_bytes
110            self.text_value_holder = ValueHolder(self.writtable, text)
111            return TextFileInfo(self.text_value_holder, encoding, bom_bytes, bin_data, self.file)
112        else:
113            encoding = self.proposed_encoding
114            if self.readable:
115                text: str = self.file.read()
116                bin_text: bytes = text.encode(encoding)
117            else:
118                bin_text = bytes()
119                text = str()
120            
121            self.detected_encoding = encoding
122            bom_bytes = bytes()
123            self.bom_bytes = bom_bytes
124            self.text_value_holder = ValueHolder(self.writtable, text)
125            return TextFileInfo(self.text_value_holder, encoding, bom_bytes, bin_text, self.file)
126    
127    def __exit__(self, exc_type, exc_val, exc_tb):
128        if self.writtable and self.text_value_holder:
129            self.file.write(self.bom_bytes + self.text_value_holder.value.encode(self.detected_encoding))
130        
131        self.file.close()
132        return False
133    
134    async def __aenter__(self) -> TextFileInfo:
135        # TODO: implement async version using next backends in exact priority sequence: 
136        # ['https://github.com/mosquito/aiofile', 'https://github.com/Tinche/aiofiles', 'own asyncio thread based read-write implementation']. 
137        # Dependencies must be optional so own implementation is a last viable option.
138        return self.__enter__()
139    
140    async def __aexit__(self, exc_type, exc_val, exc_tb):
141        return self.__exit__(exc_type, exc_val, exc_tb)
class OpenTextFile:
 60class OpenTextFile:
 61    def __init__(self, 
 62                file: Union[str, bytes, PathLike],
 63                mode: str = 'r+b',
 64                buffering: int = -1,
 65                encoding = None,
 66                errors = None,
 67                newline = None,
 68                closefd = True,
 69                opener = None,
 70                detect_as_utf8_when_possible: bool = True,
 71                check_text_for_utf8_compliance: bool = True,
 72                ):
 73        self.file: Union[str, bytes, PathLike] = file
 74        self.mode: str = mode
 75        self.buffering = buffering
 76        self.encoding = encoding
 77        self.proposed_encoding = encoding
 78        self.errors = errors
 79        self.newline = newline
 80        self.closefd = closefd
 81        self.opener = opener
 82        self.detect_as_utf8_when_possible: bool = detect_as_utf8_when_possible
 83        self.check_text_for_utf8_compliance: bool = check_text_for_utf8_compliance
 84        self.readable: bool = ('r' in mode) or ('+' in mode)
 85        self.writtable: bool = ('w' in mode) or ('a' in mode) or ('+' in mode)
 86        self.binary_mode: bool = 'b' in mode
 87        if self.binary_mode:
 88            self.encoding = None
 89        
 90        self.text_value_holder: Optional[ValueHolder[str]] = None
 91        self.detected_encoding: str = None
 92        self.bom_bytes: bytes = None
 93    
 94    def __enter__(self) -> TextFileInfo:
 95        if (not self.readable) and (not self.proposed_encoding):
 96            raise ValueError('Encoding must be specified for files without reading mode')
 97
 98        self.file = open(self.file, self.mode, self.buffering, self.encoding, self.errors, self.newline, self.closefd, self.opener)
 99        if self.binary_mode:
100            if self.readable:
101                bin_data: bytes = self.file.read()
102                text, encoding, bom_bytes = detect_and_decode(bin_data, self.detect_as_utf8_when_possible, self.check_text_for_utf8_compliance)
103            else:
104                bin_data = bytes()
105                text = str()
106                encoding = self.proposed_encoding
107                bom_bytes = bytes()
108            
109            self.detected_encoding = encoding
110            self.bom_bytes = bom_bytes
111            self.text_value_holder = ValueHolder(self.writtable, text)
112            return TextFileInfo(self.text_value_holder, encoding, bom_bytes, bin_data, self.file)
113        else:
114            encoding = self.proposed_encoding
115            if self.readable:
116                text: str = self.file.read()
117                bin_text: bytes = text.encode(encoding)
118            else:
119                bin_text = bytes()
120                text = str()
121            
122            self.detected_encoding = encoding
123            bom_bytes = bytes()
124            self.bom_bytes = bom_bytes
125            self.text_value_holder = ValueHolder(self.writtable, text)
126            return TextFileInfo(self.text_value_holder, encoding, bom_bytes, bin_text, self.file)
127    
128    def __exit__(self, exc_type, exc_val, exc_tb):
129        if self.writtable and self.text_value_holder:
130            self.file.write(self.bom_bytes + self.text_value_holder.value.encode(self.detected_encoding))
131        
132        self.file.close()
133        return False
134    
135    async def __aenter__(self) -> TextFileInfo:
136        # TODO: implement async version using next backends in exact priority sequence: 
137        # ['https://github.com/mosquito/aiofile', 'https://github.com/Tinche/aiofiles', 'own asyncio thread based read-write implementation']. 
138        # Dependencies must be optional so own implementation is a last viable option.
139        return self.__enter__()
140    
141    async def __aexit__(self, exc_type, exc_val, exc_tb):
142        return self.__exit__(exc_type, exc_val, exc_tb)
OpenTextFile( file: typing.Union[str, bytes, os.PathLike], mode: str = 'r+b', buffering: int = -1, encoding=None, errors=None, newline=None, closefd=True, opener=None, detect_as_utf8_when_possible: bool = True, check_text_for_utf8_compliance: bool = True)
61    def __init__(self, 
62                file: Union[str, bytes, PathLike],
63                mode: str = 'r+b',
64                buffering: int = -1,
65                encoding = None,
66                errors = None,
67                newline = None,
68                closefd = True,
69                opener = None,
70                detect_as_utf8_when_possible: bool = True,
71                check_text_for_utf8_compliance: bool = True,
72                ):
73        self.file: Union[str, bytes, PathLike] = file
74        self.mode: str = mode
75        self.buffering = buffering
76        self.encoding = encoding
77        self.proposed_encoding = encoding
78        self.errors = errors
79        self.newline = newline
80        self.closefd = closefd
81        self.opener = opener
82        self.detect_as_utf8_when_possible: bool = detect_as_utf8_when_possible
83        self.check_text_for_utf8_compliance: bool = check_text_for_utf8_compliance
84        self.readable: bool = ('r' in mode) or ('+' in mode)
85        self.writtable: bool = ('w' in mode) or ('a' in mode) or ('+' in mode)
86        self.binary_mode: bool = 'b' in mode
87        if self.binary_mode:
88            self.encoding = None
89        
90        self.text_value_holder: Optional[ValueHolder[str]] = None
91        self.detected_encoding: str = None
92        self.bom_bytes: bytes = None
file: Union[str, bytes, os.PathLike]
mode: str
buffering
encoding
proposed_encoding
errors
newline
closefd
opener
detect_as_utf8_when_possible: bool
check_text_for_utf8_compliance: bool
readable: bool
writtable: bool
binary_mode: bool
text_value_holder: Union[cengal.code_flow_control.smart_values.versions.v_2.smart_values.ValueHolder[str], NoneType]
detected_encoding: str
bom_bytes: bytes
class TextFileInfo(builtins.tuple):
52class TextFileInfo(NamedTuple):
53    text: ValueHolder[str]
54    encoding: str
55    bom_bytes: bytes
56    bin_data: bytes
57    file: IOBase

TextFileInfo(text, encoding, bom_bytes, bin_data, file)

TextFileInfo( text: cengal.code_flow_control.smart_values.versions.v_2.smart_values.ValueHolder[str], encoding: str, bom_bytes: bytes, bin_data: bytes, file: io.IOBase)

Create new instance of TextFileInfo(text, encoding, bom_bytes, bin_data, file)

text: cengal.code_flow_control.smart_values.versions.v_2.smart_values.ValueHolder[str]

Alias for field number 0

encoding: str

Alias for field number 1

bom_bytes: bytes

Alias for field number 2

bin_data: bytes

Alias for field number 3

file: io.IOBase

Alias for field number 4

Inherited Members
builtins.tuple
index
count