cengal.data_containers.dynamic_tag_tree.versions.v_2.dynamic_tag_tree
1#!/usr/bin/env python 2# coding=utf-8 3 4# Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space> 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18 19from typing import Set, Dict, Tuple, Any, Hashable, Optional, FrozenSet, Callable 20 21 22""" 23Module Docstring 24Docstrings: http://www.python.org/dev/peps/pep-0257/ 25""" 26 27__author__ = "ButenkoMS <gtalk@butenkoms.space>" 28__copyright__ = "Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space>" 29__credits__ = ["ButenkoMS <gtalk@butenkoms.space>", ] 30__license__ = "Apache License, Version 2.0" 31__version__ = "4.4.1" 32__maintainer__ = "ButenkoMS <gtalk@butenkoms.space>" 33__email__ = "gtalk@butenkoms.space" 34# __status__ = "Prototype" 35__status__ = "Development" 36# __status__ = "Production" 37 38 39ItemID = Hashable 40TagID = Hashable 41UniqueItemID = Hashable 42 43 44class UniqueIdGen: 45 def __init__(self): 46 self.index = 0 47 48 def __call__(self): 49 result = self.index 50 self.index += 1 51 return result 52 53 54def tags_set_2_tuple(tags: Set[TagID]) -> Tuple[TagID]: 55 tags_list = list(tags) 56 tags_list.sort() 57 return tuple(tags_list) 58 59 60def default_growth_during_the_search_checker(): return True 61 62 63def default_scheduler(): pass 64 65 66class TagNotFound(Exception): 67 pass 68 69 70class DynamicTagTree: 71 def __init__(self, 72 growth_during_the_search_is_allowed: Optional[Callable] = None, 73 scheduler: Optional[Callable] = None): 74 self.growth_during_the_search_is_allowed: Callable = \ 75 growth_during_the_search_is_allowed or default_growth_during_the_search_checker 76 self.scheduler: Callable = scheduler or default_scheduler 77 self.uiidg = UniqueIdGen() 78 self.item_by_uiid: Dict[UniqueItemID, Tuple[ItemID, FrozenSet[TagID]]] = dict() 79 self.uiid_by_item: Dict[Tuple[ItemID, FrozenSet[TagID]], UniqueItemID] = dict() 80 self.item_tags_by_tag: Dict[TagID, Set[FrozenSet[TagID]]] = dict() 81 self.neighbor_tags_of_a_tag: Dict[TagID, Set[TagID]] = dict() 82 self.set_of_tags: Set[TagID] = set() 83 self.direct_items_by_tags: Dict[FrozenSet[TagID], Set[ItemID]] = dict() 84 self.tags_by_number_of_tags: Dict[int, Set[FrozenSet[TagID]]] = dict() 85 86 # self.tags_by_number_of_tags_by_tag: Dict[TagID, Dict[int, Set[FrozenSet[TagID]]]] = dict() 87 self.tags_len_by_tag: Dict[TagID, int] = dict() # key: tag; value: keyb from self.tags_by_number_of_tags 88 89 self.tags_by_number_of_tags_by_tag_by_near_tag: Dict[TagID, Dict[TagID, Dict[int, Set[FrozenSet[TagID]]]]] = \ 90 dict() # Dict[interested tag, Dict[near tag, Dict[len of tags set, Set[tag sets]]]] 91 self.tags_len_by_tag_by_near_tag: Dict[TagID, Dict[TagID, int]] = \ 92 dict() # Dict[interested tag, Dict[near tag, len of tags set]] 93 94 self.tags_inclusion_by_tags: Dict[FrozenSet[TagID], Set[FrozenSet[TagID]]] = dict() # key: interested tags set; 95 # value: bigger tags sets which includes interested tags set 96 self.tag_weight_by_tag_by_tags: Dict[FrozenSet[TagID], Dict[TagID, int]] = dict() 97 self.tags_by_tag_weight_by_tags: Dict[FrozenSet[TagID], Dict[int, Set[TagID]]] = dict() 98 self.nearest_non_empty_tags_by_some_tags: Dict[FrozenSet[TagID], Set[FrozenSet[TagID]]] = dict() # key: any 99 # interested tags set (input from user); value: set of known tags sets which has an item(-s) and includes given 100 # interested tags set 101 self.tags_by_itself: Dict[FrozenSet[TagID], FrozenSet[TagID]] = dict() # key: tags; value: same tags object 102 103 def set_scheduler(self, scheduler: Optional[Callable]): 104 self.scheduler = scheduler or default_scheduler 105 106 def get_scheduler(self): 107 return self.scheduler 108 109 def put(self, item_id: ItemID, tags: FrozenSet[TagID]) -> UniqueItemID: 110 unique_item_id: UniqueItemID = self.uiidg() 111 item = (item_id, tags) 112 tags_len = len(tags) 113 114 self.item_by_uiid[unique_item_id] = item 115 116 self.uiid_by_item[item] = unique_item_id 117 118 for tag in tags: 119 # self.item_tags_by_tag 120 if tag not in self.item_tags_by_tag: 121 self.item_tags_by_tag[tag] = set() 122 123 self.item_tags_by_tag[tag].add(tags) 124 125 # self.neighbor_tags_of_a_tag 126 if tag not in self.neighbor_tags_of_a_tag: 127 self.neighbor_tags_of_a_tag[tag] = set() 128 129 self.neighbor_tags_of_a_tag[tag].update(tags - tag) 130 131 # self.set_of_tags 132 self.set_of_tags.add(tag) 133 134 # self.direct_items_by_tags 135 if tags not in self.direct_items_by_tags: 136 self.direct_items_by_tags[tags] = set() 137 138 self.direct_items_by_tags[tags].add(item_id) 139 140 # self.tags_by_number_of_tags 141 if tags_len not in self.tags_by_number_of_tags: 142 self.tags_by_number_of_tags[tags_len] = set() 143 144 self.tags_by_number_of_tags[tags_len].add(tags) 145 146 # self.tags_len_by_tag 147 for tag in tags: 148 self.tags_len_by_tag[tag] = tags_len 149 150 def get(self, item_id: ItemID, tags: FrozenSet[TagID]) -> Optional[UniqueItemID]: 151 return self.uiid_by_item.get((item_id, tags), None) 152 153 def item(self, unique_item_id: UniqueItemID) -> Optional[Tuple[ItemID, FrozenSet[TagID]]]: 154 return self.item_by_uiid.get(unique_item_id, None) 155 156 def delete(self, unique_item_id: UniqueItemID): 157 pass 158 159 def list_direct_items(self, tags: FrozenSet[TagID]): 160 pass 161 162 def list_all_items(self, tags: FrozenSet[TagID]): 163 pass 164 165 def list_direct_tags(self, tags: FrozenSet[TagID]): 166 pass 167 168 def list_all_tags(self, tags: FrozenSet[TagID]): 169 pass 170 171 def get_nearest_non_empty_tags_set(self, tags: FrozenSet[TagID]) -> Optional[FrozenSet[TagID]]: 172 return self._gen_nearest_non_empty_tags_by_some_tags(True, self._normalise_tags(self._check_tags(tags))) 173 174 def _gen_nearest_non_empty_tags_by_some_tags( 175 self, is_search_request: bool, tags: FrozenSet[TagID]) -> Optional[Set[FrozenSet[TagID]]]: 176 if tags in self.tags_by_itself: 177 return set(self.tags_by_itself[tags]) 178 179 if tags in self.nearest_non_empty_tags_by_some_tags: 180 return self.nearest_non_empty_tags_by_some_tags[tags] 181 182 nearest_non_empty_tags_set = set() 183 184 if tags in self.tags_inclusion_by_tags: 185 inclusion_tags = self.tags_inclusion_by_tags[tags] 186 min_tags_len = len(min(inclusion_tags)) 187 for tags2 in inclusion_tags: 188 if len(tags2) == min_tags_len: 189 nearest_non_empty_tags_set.add(tags2) 190 else: 191 minimal_nearest_non_empty_tags_size = len(tags) + 1 192 tags_by_number_of_tags: Dict[int, Set[FrozenSet[TagID]]] = dict() 193 for tag in tags: 194 lookup1: Dict[TagID, Dict[int, Set[FrozenSet[TagID]]]] = \ 195 self.near_tags_by_number_of_tags_by_tag_by_tag[tag] 196 for tag2 in tags: 197 if tag == tag2: 198 continue 199 if tag2 not in lookup1: 200 return None 201 lookup2: Dict[int, Set[FrozenSet[TagID]]] = lookup1[tag2] 202 sorted_tags_sizes = sorted(lookup2.keys()) 203 for tags_size in sorted_tags_sizes: 204 if tags_size < minimal_nearest_non_empty_tags_size: 205 continue 206 lookup3 = lookup2[tags_size] 207 for another_tags in lookup3: 208 if tags != (tags & another_tags): 209 continue 210 if tags_size not in tags_by_number_of_tags: 211 tags_by_number_of_tags[tags_size] = set() 212 tags_by_number_of_tags[tags_size].add(another_tags) 213 min_tags_len = min(tags_by_number_of_tags.keys()) 214 nearest_non_empty_tags_set = tags_by_number_of_tags[min_tags_len] 215 216 if self._is_growth_allowed(is_search_request): 217 self.tags_inclusion_by_tags[tags] = set() 218 for inclusion_tags_set in tags_by_number_of_tags.values(): 219 self.tags_inclusion_by_tags[tags].update(inclusion_tags_set) 220 221 if self._is_growth_allowed(is_search_request): 222 self.nearest_non_empty_tags_by_some_tags[tags] = nearest_non_empty_tags_set 223 224 return nearest_non_empty_tags_set 225 226 def _normalise_tags(self, tags: FrozenSet[TagID]) -> FrozenSet[TagID]: 227 if tags in self.tags_by_itself: 228 return self.tags_by_itself[tags] 229 else: 230 return tags 231 232 def _check_tags(self, tags: FrozenSet[TagID]) -> FrozenSet[TagID]: 233 if not (tags == (tags & self.set_of_tags)): 234 raise TagNotFound(tags - (tags & self.set_of_tags)) 235 return tags 236 237 def _is_growth_allowed(self, is_search_request: bool): 238 return (not is_search_request) or (is_search_request and self.growth_during_the_search_is_allowed())
ItemID =
typing.Hashable
TagID =
typing.Hashable
UniqueItemID =
typing.Hashable
class
UniqueIdGen:
def
default_growth_during_the_search_checker():
61def default_growth_during_the_search_checker(): return True
def
default_scheduler():
64def default_scheduler(): pass
class
TagNotFound(builtins.Exception):
Common base class for all non-exit exceptions.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
- args
class
DynamicTagTree:
71class DynamicTagTree: 72 def __init__(self, 73 growth_during_the_search_is_allowed: Optional[Callable] = None, 74 scheduler: Optional[Callable] = None): 75 self.growth_during_the_search_is_allowed: Callable = \ 76 growth_during_the_search_is_allowed or default_growth_during_the_search_checker 77 self.scheduler: Callable = scheduler or default_scheduler 78 self.uiidg = UniqueIdGen() 79 self.item_by_uiid: Dict[UniqueItemID, Tuple[ItemID, FrozenSet[TagID]]] = dict() 80 self.uiid_by_item: Dict[Tuple[ItemID, FrozenSet[TagID]], UniqueItemID] = dict() 81 self.item_tags_by_tag: Dict[TagID, Set[FrozenSet[TagID]]] = dict() 82 self.neighbor_tags_of_a_tag: Dict[TagID, Set[TagID]] = dict() 83 self.set_of_tags: Set[TagID] = set() 84 self.direct_items_by_tags: Dict[FrozenSet[TagID], Set[ItemID]] = dict() 85 self.tags_by_number_of_tags: Dict[int, Set[FrozenSet[TagID]]] = dict() 86 87 # self.tags_by_number_of_tags_by_tag: Dict[TagID, Dict[int, Set[FrozenSet[TagID]]]] = dict() 88 self.tags_len_by_tag: Dict[TagID, int] = dict() # key: tag; value: keyb from self.tags_by_number_of_tags 89 90 self.tags_by_number_of_tags_by_tag_by_near_tag: Dict[TagID, Dict[TagID, Dict[int, Set[FrozenSet[TagID]]]]] = \ 91 dict() # Dict[interested tag, Dict[near tag, Dict[len of tags set, Set[tag sets]]]] 92 self.tags_len_by_tag_by_near_tag: Dict[TagID, Dict[TagID, int]] = \ 93 dict() # Dict[interested tag, Dict[near tag, len of tags set]] 94 95 self.tags_inclusion_by_tags: Dict[FrozenSet[TagID], Set[FrozenSet[TagID]]] = dict() # key: interested tags set; 96 # value: bigger tags sets which includes interested tags set 97 self.tag_weight_by_tag_by_tags: Dict[FrozenSet[TagID], Dict[TagID, int]] = dict() 98 self.tags_by_tag_weight_by_tags: Dict[FrozenSet[TagID], Dict[int, Set[TagID]]] = dict() 99 self.nearest_non_empty_tags_by_some_tags: Dict[FrozenSet[TagID], Set[FrozenSet[TagID]]] = dict() # key: any 100 # interested tags set (input from user); value: set of known tags sets which has an item(-s) and includes given 101 # interested tags set 102 self.tags_by_itself: Dict[FrozenSet[TagID], FrozenSet[TagID]] = dict() # key: tags; value: same tags object 103 104 def set_scheduler(self, scheduler: Optional[Callable]): 105 self.scheduler = scheduler or default_scheduler 106 107 def get_scheduler(self): 108 return self.scheduler 109 110 def put(self, item_id: ItemID, tags: FrozenSet[TagID]) -> UniqueItemID: 111 unique_item_id: UniqueItemID = self.uiidg() 112 item = (item_id, tags) 113 tags_len = len(tags) 114 115 self.item_by_uiid[unique_item_id] = item 116 117 self.uiid_by_item[item] = unique_item_id 118 119 for tag in tags: 120 # self.item_tags_by_tag 121 if tag not in self.item_tags_by_tag: 122 self.item_tags_by_tag[tag] = set() 123 124 self.item_tags_by_tag[tag].add(tags) 125 126 # self.neighbor_tags_of_a_tag 127 if tag not in self.neighbor_tags_of_a_tag: 128 self.neighbor_tags_of_a_tag[tag] = set() 129 130 self.neighbor_tags_of_a_tag[tag].update(tags - tag) 131 132 # self.set_of_tags 133 self.set_of_tags.add(tag) 134 135 # self.direct_items_by_tags 136 if tags not in self.direct_items_by_tags: 137 self.direct_items_by_tags[tags] = set() 138 139 self.direct_items_by_tags[tags].add(item_id) 140 141 # self.tags_by_number_of_tags 142 if tags_len not in self.tags_by_number_of_tags: 143 self.tags_by_number_of_tags[tags_len] = set() 144 145 self.tags_by_number_of_tags[tags_len].add(tags) 146 147 # self.tags_len_by_tag 148 for tag in tags: 149 self.tags_len_by_tag[tag] = tags_len 150 151 def get(self, item_id: ItemID, tags: FrozenSet[TagID]) -> Optional[UniqueItemID]: 152 return self.uiid_by_item.get((item_id, tags), None) 153 154 def item(self, unique_item_id: UniqueItemID) -> Optional[Tuple[ItemID, FrozenSet[TagID]]]: 155 return self.item_by_uiid.get(unique_item_id, None) 156 157 def delete(self, unique_item_id: UniqueItemID): 158 pass 159 160 def list_direct_items(self, tags: FrozenSet[TagID]): 161 pass 162 163 def list_all_items(self, tags: FrozenSet[TagID]): 164 pass 165 166 def list_direct_tags(self, tags: FrozenSet[TagID]): 167 pass 168 169 def list_all_tags(self, tags: FrozenSet[TagID]): 170 pass 171 172 def get_nearest_non_empty_tags_set(self, tags: FrozenSet[TagID]) -> Optional[FrozenSet[TagID]]: 173 return self._gen_nearest_non_empty_tags_by_some_tags(True, self._normalise_tags(self._check_tags(tags))) 174 175 def _gen_nearest_non_empty_tags_by_some_tags( 176 self, is_search_request: bool, tags: FrozenSet[TagID]) -> Optional[Set[FrozenSet[TagID]]]: 177 if tags in self.tags_by_itself: 178 return set(self.tags_by_itself[tags]) 179 180 if tags in self.nearest_non_empty_tags_by_some_tags: 181 return self.nearest_non_empty_tags_by_some_tags[tags] 182 183 nearest_non_empty_tags_set = set() 184 185 if tags in self.tags_inclusion_by_tags: 186 inclusion_tags = self.tags_inclusion_by_tags[tags] 187 min_tags_len = len(min(inclusion_tags)) 188 for tags2 in inclusion_tags: 189 if len(tags2) == min_tags_len: 190 nearest_non_empty_tags_set.add(tags2) 191 else: 192 minimal_nearest_non_empty_tags_size = len(tags) + 1 193 tags_by_number_of_tags: Dict[int, Set[FrozenSet[TagID]]] = dict() 194 for tag in tags: 195 lookup1: Dict[TagID, Dict[int, Set[FrozenSet[TagID]]]] = \ 196 self.near_tags_by_number_of_tags_by_tag_by_tag[tag] 197 for tag2 in tags: 198 if tag == tag2: 199 continue 200 if tag2 not in lookup1: 201 return None 202 lookup2: Dict[int, Set[FrozenSet[TagID]]] = lookup1[tag2] 203 sorted_tags_sizes = sorted(lookup2.keys()) 204 for tags_size in sorted_tags_sizes: 205 if tags_size < minimal_nearest_non_empty_tags_size: 206 continue 207 lookup3 = lookup2[tags_size] 208 for another_tags in lookup3: 209 if tags != (tags & another_tags): 210 continue 211 if tags_size not in tags_by_number_of_tags: 212 tags_by_number_of_tags[tags_size] = set() 213 tags_by_number_of_tags[tags_size].add(another_tags) 214 min_tags_len = min(tags_by_number_of_tags.keys()) 215 nearest_non_empty_tags_set = tags_by_number_of_tags[min_tags_len] 216 217 if self._is_growth_allowed(is_search_request): 218 self.tags_inclusion_by_tags[tags] = set() 219 for inclusion_tags_set in tags_by_number_of_tags.values(): 220 self.tags_inclusion_by_tags[tags].update(inclusion_tags_set) 221 222 if self._is_growth_allowed(is_search_request): 223 self.nearest_non_empty_tags_by_some_tags[tags] = nearest_non_empty_tags_set 224 225 return nearest_non_empty_tags_set 226 227 def _normalise_tags(self, tags: FrozenSet[TagID]) -> FrozenSet[TagID]: 228 if tags in self.tags_by_itself: 229 return self.tags_by_itself[tags] 230 else: 231 return tags 232 233 def _check_tags(self, tags: FrozenSet[TagID]) -> FrozenSet[TagID]: 234 if not (tags == (tags & self.set_of_tags)): 235 raise TagNotFound(tags - (tags & self.set_of_tags)) 236 return tags 237 238 def _is_growth_allowed(self, is_search_request: bool): 239 return (not is_search_request) or (is_search_request and self.growth_during_the_search_is_allowed())
DynamicTagTree( growth_during_the_search_is_allowed: Union[Callable, NoneType] = None, scheduler: Union[Callable, NoneType] = None)
72 def __init__(self, 73 growth_during_the_search_is_allowed: Optional[Callable] = None, 74 scheduler: Optional[Callable] = None): 75 self.growth_during_the_search_is_allowed: Callable = \ 76 growth_during_the_search_is_allowed or default_growth_during_the_search_checker 77 self.scheduler: Callable = scheduler or default_scheduler 78 self.uiidg = UniqueIdGen() 79 self.item_by_uiid: Dict[UniqueItemID, Tuple[ItemID, FrozenSet[TagID]]] = dict() 80 self.uiid_by_item: Dict[Tuple[ItemID, FrozenSet[TagID]], UniqueItemID] = dict() 81 self.item_tags_by_tag: Dict[TagID, Set[FrozenSet[TagID]]] = dict() 82 self.neighbor_tags_of_a_tag: Dict[TagID, Set[TagID]] = dict() 83 self.set_of_tags: Set[TagID] = set() 84 self.direct_items_by_tags: Dict[FrozenSet[TagID], Set[ItemID]] = dict() 85 self.tags_by_number_of_tags: Dict[int, Set[FrozenSet[TagID]]] = dict() 86 87 # self.tags_by_number_of_tags_by_tag: Dict[TagID, Dict[int, Set[FrozenSet[TagID]]]] = dict() 88 self.tags_len_by_tag: Dict[TagID, int] = dict() # key: tag; value: keyb from self.tags_by_number_of_tags 89 90 self.tags_by_number_of_tags_by_tag_by_near_tag: Dict[TagID, Dict[TagID, Dict[int, Set[FrozenSet[TagID]]]]] = \ 91 dict() # Dict[interested tag, Dict[near tag, Dict[len of tags set, Set[tag sets]]]] 92 self.tags_len_by_tag_by_near_tag: Dict[TagID, Dict[TagID, int]] = \ 93 dict() # Dict[interested tag, Dict[near tag, len of tags set]] 94 95 self.tags_inclusion_by_tags: Dict[FrozenSet[TagID], Set[FrozenSet[TagID]]] = dict() # key: interested tags set; 96 # value: bigger tags sets which includes interested tags set 97 self.tag_weight_by_tag_by_tags: Dict[FrozenSet[TagID], Dict[TagID, int]] = dict() 98 self.tags_by_tag_weight_by_tags: Dict[FrozenSet[TagID], Dict[int, Set[TagID]]] = dict() 99 self.nearest_non_empty_tags_by_some_tags: Dict[FrozenSet[TagID], Set[FrozenSet[TagID]]] = dict() # key: any 100 # interested tags set (input from user); value: set of known tags sets which has an item(-s) and includes given 101 # interested tags set 102 self.tags_by_itself: Dict[FrozenSet[TagID], FrozenSet[TagID]] = dict() # key: tags; value: same tags object
def
put(self, item_id: Hashable, tags: FrozenSet[Hashable]) -> Hashable:
110 def put(self, item_id: ItemID, tags: FrozenSet[TagID]) -> UniqueItemID: 111 unique_item_id: UniqueItemID = self.uiidg() 112 item = (item_id, tags) 113 tags_len = len(tags) 114 115 self.item_by_uiid[unique_item_id] = item 116 117 self.uiid_by_item[item] = unique_item_id 118 119 for tag in tags: 120 # self.item_tags_by_tag 121 if tag not in self.item_tags_by_tag: 122 self.item_tags_by_tag[tag] = set() 123 124 self.item_tags_by_tag[tag].add(tags) 125 126 # self.neighbor_tags_of_a_tag 127 if tag not in self.neighbor_tags_of_a_tag: 128 self.neighbor_tags_of_a_tag[tag] = set() 129 130 self.neighbor_tags_of_a_tag[tag].update(tags - tag) 131 132 # self.set_of_tags 133 self.set_of_tags.add(tag) 134 135 # self.direct_items_by_tags 136 if tags not in self.direct_items_by_tags: 137 self.direct_items_by_tags[tags] = set() 138 139 self.direct_items_by_tags[tags].add(item_id) 140 141 # self.tags_by_number_of_tags 142 if tags_len not in self.tags_by_number_of_tags: 143 self.tags_by_number_of_tags[tags_len] = set() 144 145 self.tags_by_number_of_tags[tags_len].add(tags) 146 147 # self.tags_len_by_tag 148 for tag in tags: 149 self.tags_len_by_tag[tag] = tags_len