cengal.data_containers.dynamic_tag_tree.versions.v_1.TagDB
Module Docstring Docstrings: http://www.python.org/dev/peps/pep-0257/
1#!/usr/bin/env python 2# coding=utf-8 3 4# Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space> 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18 19""" 20Module Docstring 21Docstrings: http://www.python.org/dev/peps/pep-0257/ 22""" 23 24 25__author__ = "ButenkoMS <gtalk@butenkoms.space>" 26__copyright__ = "Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space>" 27__credits__ = ["ButenkoMS <gtalk@butenkoms.space>", ] 28__license__ = "Apache License, Version 2.0" 29__version__ = "4.4.1" 30__maintainer__ = "ButenkoMS <gtalk@butenkoms.space>" 31__email__ = "gtalk@butenkoms.space" 32# __status__ = "Prototype" 33__status__ = "Development" 34# __status__ = "Production" 35 36 37__author__ = 'Mikhail Butenko <gtalk@mikhail-butenko.in.ua>' 38 39from cengal.data_generation.id_generator import IDGenerator 40from contextlib import contextmanager 41from typing import Hashable, Tuple, Dict, Any 42from cengal.data_containers.compound_dict_management.standard_library.key__hashable__to__value__set import AddToCompoundDict__Set 43from cengal.data_containers.compound_dict_management.standard_library.key_counter import KeyCounter 44from cengal.parallel_execution.coroutines.coro_standard_services.loop_yield import gly, CoroPriority 45from cengal.parallel_execution.coroutines.coro_scheduler import EntityStatsMixin 46 47 48SMART_TREE_TYPE = 0 # smart tree. Умное дерево тегов: сеть отображенная на древо. Возвращает только список 49 # непосредственных подтегов текущего пути, но не их подтеги; возвращает элементы текущего пути, но не элементы 50 # из подпутей 51SMART_TREE_TYPE_WITH_INTERNAL_MENU = 1 # smart tree with internal menu. В древо встроено меню, позволяющее прямо из 52 # древа производить смену типа вывода: SMART_TREE_TYPE, FULL_TREE_TYPE и PLAIN_PSEUDO_TREE_TYPE. На каждый тип 53 # вывода будет доступен подтег/подпапка, внутри когорого уже будет нормальное древо элементов, но уже выбранного 54 # типа 55FULL_TREE_TYPE = 2 # full tree with all tags - with repeats and without filtering. Список айтемов - как у 56 # SMART_TREE_TYPE, но при этом список тегов - как у PLAIN_PSEUDO_TREE_TYPE 57PLAIN_PSEUDO_TREE_TYPE = 3 # plain tags and items set (will show all tags, subtags and items of 58 # current hm... dir - current tag set). Показывает все теги и подтеги единым списком - как у примитивных теговых 59 # файловых систем; показывает все элементы текущего пути + все элементы всех под-путей 60 61USUAL_TREE_TYPE = PLAIN_PSEUDO_TREE_TYPE 62 63_ROOT_TAG = r'k{1+vdcY#m8t-4m9`)G2\b]/O\'Rzqyr@FEO~%./nGPzl)[^q 0RS!.bCPh ?fag{8~{SGj;Ss3U85Q-:' 64 65 66class ToManyIdenticalItemsOnTheGivenTagPathError(Exception): 67 pass 68 69 70class UnknownTreeTypeError(Exception): 71 pass 72 73 74class LockableMixin: 75 @property 76 def lock(self) -> bool: 77 raise NotImplementedError 78 79 @lock.setter 80 def lock(self, value: bool) -> bool: 81 raise NotImplementedError 82 83 84@contextmanager 85def obj_locker(obj: LockableMixin): 86 obj.lock = True 87 try: 88 yield obj.lock 89 finally: 90 obj.lock = False 91 92 93class Example(LockableMixin): 94 def __init__(self) -> None: 95 super().__init__() 96 self.lock: bool = False 97 98 def write_coroutine(self): 99 with obj_locker(self): 100 pass 101 102 def read_coroutine(self): 103 if not self.lock: 104 pass 105 106 107class TagDB(EntityStatsMixin): 108 def __init__(self, default_priority: CoroPriority = CoroPriority.normal): 109 self.default_priority: CoroPriority = default_priority 110 111 self.itemsID = IDGenerator() 112 113 self.itemIDsForItem = {} # key - item hash; data - set of itemIDs 114 115 self.itemsSet = {} # key - ItemID; data - binItem 116 # TODO: заменить список тегов на хеш единожды сохраненного списка тегов 117 self.itemWithTags = {} # key - ItemID; data - sorted common TagsTuple's hash 118 119 # TODO: убрать tagsNumPerItemID из кода. Заменить этот список itemID - на список hashOfTheTagHashTuple 120 self.tagsNumPerItemID = {} # key - number of tags in this ItemID group; data - set of itemIDs which are have 121 # needed number of tags 122 123 self.tagsSet = {} # key - tag hash; data - binTag 124 self.tagWithItems = {} # key - Tag hash; data - set of itemIDs 125 self.tagsQnt = {} # key - Tag hash; data - quantity of the items with this tag 126 127 128 self.commonTagSets = {} # key - sorted common TagsTuple's hash; data - sorted TagsTuple 129 self.itemsOnTheCommonTagSets = {} # key - sorted common TagsTuple's hash; data - set of itemIDs 130 self.tagsQntPerCommonTagSet = {} # key - number of tags; data - set of TagsTuple hashes 131 self.setOfTagGroupQnt = set() # {tagQntInGroup1, tagQntInGroup2, ..., tagQntInGroupN} where 132 # each Group is an key of the self.tagsQntPerCommonTagSet 133 134 # TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple] 135 # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3} 136 # TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ..., itemID_3}] 137 138 # TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] и вычитывать это из него 139 # и/или 140 # TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashTuple_1, hashOfTheTagHashTuple_2, ... 141 # , hashOfTheTagHashTuple_N}] и вычитывать это из него, а потом уже и из каждого tagHashSet 142 # где hashOfTheTagHashTuple - это tagHashTuple.__hash__() 143 144 def get_entity_stats(self, stats_level: 'EntityStatsMixin.StatsLevel' = EntityStatsMixin.StatsLevel.debug) -> Tuple[str, Dict[str, Any]]: 145 return type(self).__name__, { 146 'items num': len(self.itemsSet), 147 'tags num': len(self.tagsSet), 148 } 149 150 def get_root_tag(self): 151 return str(_ROOT_TAG) 152 153 def add_tag(self, binTag): 154 tagHash = binTag.__hash__() 155 self.tagsSet[tagHash] = binTag 156 if tagHash not in self.tagsSet: 157 self.tagsQnt[tagHash] = 0 158 if tagHash not in self.tagWithItems: 159 self.tagWithItems[tagHash] = set() 160 161 def remove_tag(self, binTag): 162 # will try to delete given tag. If there is at least one item with this tag, than function will fail 163 # and will return False; otherwise it will delete given tag and will return True. 164 functionResult = False 165 tagHash = binTag.__hash__() 166 if tagHash in self.tagsSet: 167 if tagHash in self.tagsQnt: 168 if self.tagsQnt[tagHash] == 0: 169 del self.tagsSet[tagHash] 170 del self.tagsQnt[tagHash] 171 functionResult = True 172 else: 173 functionResult = False 174 else: 175 del self.tagsSet[tagHash] 176 functionResult = True 177 178 if functionResult: 179 if tagHash in self.tagWithItems: 180 del self.tagWithItems[tagHash] 181 182 return functionResult 183 184 def add_item(self, binItem, binTags): 185 # will add new item and return it's dynamic ID or None object If this Item already exist on the given tag path 186 # Or will raise an exception if we already have more than one binItem (another item that is identical to 187 # the given binItem) on this tag path 188 binTags = set(binTags) 189 190 if self.get_root_tag() not in binTags: 191 binTags.add(self.get_root_tag()) 192 193 # may raise an exception in this place. Nope - from now it will be not 194 if self.get_itemID_from_item_and_tags(binTags, binItem) is not None: 195 return None 196 197 itemID = self.itemsID() 198 self.itemsSet[itemID] = binItem 199 200 itemHash = binItem.__hash__() 201 if itemHash in self.itemIDsForItem: 202 IDsSet = self.itemIDsForItem[itemHash] 203 IDsSet.add(itemID) 204 # self.itemIDsForItem[itemHash] = IDsSet 205 else: 206 self.itemIDsForItem[itemHash] = {itemID} 207 208 tagQnt = len(binTags) 209 if tagQnt in self.tagsNumPerItemID: 210 itemIDsSet = self.tagsNumPerItemID[tagQnt] 211 itemIDsSet.add(itemID) 212 # self.tagsNumPerItemID[tagQnt] = itemIDsSet 213 else: 214 self.tagsNumPerItemID[tagQnt] = {itemID} 215 216 binTagHashes = set() 217 218 for tag in binTags: 219 self.add_tag(tag) 220 tagHash = tag.__hash__() 221 binTagHashes.add(tagHash) 222 setOfItems = self.tagWithItems[tagHash] 223 if itemID not in setOfItems: 224 setOfItems.add(itemID) 225 if tagHash in self.tagsQnt: 226 self.tagsQnt[tagHash] += 1 227 else: 228 self.tagsQnt[tagHash] = 1 229 self.tagWithItems[tagHash] = setOfItems 230 231 sortedTagTuple = tuple(self.sort_tag_hash_list_by_hash(binTagHashes)) 232 hashOfTheSortedTagTuple = sortedTagTuple.__hash__() 233 self.itemWithTags[itemID] = hashOfTheSortedTagTuple 234 235 self.commonTagSets[hashOfTheSortedTagTuple] = sortedTagTuple 236 237 if hashOfTheSortedTagTuple in self.itemsOnTheCommonTagSets: 238 itemIDsSet = self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] 239 itemIDsSet.add(itemID) 240 # self.itemsOnTheCommonTagSets[tagQnt] = itemIDsSet 241 else: 242 self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] = {itemID} 243 244 lenOfTheSortedTagTuple = len(sortedTagTuple) 245 if lenOfTheSortedTagTuple in self.tagsQntPerCommonTagSet: 246 itemIDsSet = self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] 247 itemIDsSet.add(hashOfTheSortedTagTuple) 248 # self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = itemIDsSet 249 else: 250 self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = {hashOfTheSortedTagTuple} 251 self.setOfTagGroupQnt.add(lenOfTheSortedTagTuple) 252 253 return itemID 254 255 def remove_item_by_itemID(self, itemID): 256 ly = gly(self.default_priority) 257 258 if itemID in self.itemsSet: 259 itemHash = self.itemsSet[itemID].__hash__() 260 del self.itemsSet[itemID] 261 262 if itemHash in self.itemIDsForItem: 263 IDsSet = self.itemIDsForItem[itemHash] 264 IDsSet.difference_update({itemID}) 265 # self.itemIDsForItem[itemHash] = IDsSet 266 if len(IDsSet) < 1: 267 del self.itemIDsForItem[itemHash] 268 269 if itemID in self.itemWithTags: 270 commonTagTupleHash = self.itemWithTags[itemID] 271 tagTuple = self.commonTagSets[commonTagTupleHash] 272 numberOfTags = len(tagTuple) 273 274 if commonTagTupleHash in self.itemsOnTheCommonTagSets: 275 IDsSet = self.itemsOnTheCommonTagSets[commonTagTupleHash] 276 IDsSet.difference_update({itemID}) 277 # self.itemsOnTheCommonTagSets[commonTagTupleHash] = IDsSet 278 if len(IDsSet) < 1: 279 del self.itemsOnTheCommonTagSets[commonTagTupleHash] 280 del self.commonTagSets[commonTagTupleHash] 281 if numberOfTags in self.tagsQntPerCommonTagSet: 282 setOfTagTuplesHashes = self.tagsQntPerCommonTagSet[numberOfTags] 283 setOfTagTuplesHashes.difference_update({numberOfTags}) 284 # self.tagsQntPerCommonTagSet[numberOfTags] = setOfTagTuplesHashes 285 if len(setOfTagTuplesHashes) < 1: 286 del self.tagsQntPerCommonTagSet[numberOfTags] 287 self.setOfTagGroupQnt.difference_update({numberOfTags}) 288 289 del self.itemWithTags[itemID] 290 291 setOfTagHashes = set(tagTuple) 292 293 tagsQnt = len(setOfTagHashes) 294 if tagsQnt in self.tagsNumPerItemID: 295 IDsSet = self.tagsNumPerItemID[tagsQnt] 296 IDsSet.difference_update({itemID}) 297 # self.tagsNumPerItemID[tagsQnt] = IDsSet 298 if len(IDsSet) < 1: 299 del self.tagsNumPerItemID[tagsQnt] 300 301 for tagHash in setOfTagHashes: 302 ly() 303 if tagHash in self.tagsQnt: 304 tagsQuantity = self.tagsQnt[tagHash] 305 tagsQuantity -= 1 306 if tagsQuantity < 1: 307 tagsQuantity = 0 308 self.tagsQnt[tagHash] = tagsQuantity 309 if tagsQuantity < 1: 310 del self.tagsQnt[tagHash] 311 del self.tagsSet[tagHash] 312 if tagHash in self.tagWithItems: 313 IDsSet = self.tagWithItems[tagHash] 314 IDsSet.difference_update({itemID}) 315 if len(IDsSet) < 1: 316 del self.tagWithItems[tagHash] 317 # self.tagWithItems[tagHash] = IDsSet 318 319 self.itemsID.remove_id(itemID) 320 321 def remove_item(self, binTags, binItem): 322 # will return ItemId for deleted item or None object if Item is not exist 323 # Or will raise an exception if we already have more than one binItem (another item that is identical to 324 # the given binItem) on this tag path 325 binTags = set(binTags) 326 if self.get_root_tag() not in binTags: 327 binTags.add(self.get_root_tag()) 328 itemID = self.get_itemID_from_item_and_tags(binTags, binItem) 329 if itemID is not None: 330 self.remove_item_by_itemID(itemID) 331 return itemID 332 333 def __OLD__get_itemID_from_item_and_tags(self, binTags, binItem): 334 if self.get_root_tag() not in binTags: 335 binTags.append(self.get_root_tag()) 336 potentialIDs = set(self.get_potential_itemIDs_from_item(binItem)) 337 itemIDsSet = set(self.get_itemIDs_from_tags(binTags, SMART_TREE_TYPE)) 338 resultItemIDsList = potentialIDs & itemIDsSet 339 if len(resultItemIDsList) == 0: 340 return None 341 elif len(resultItemIDsList) == 1: 342 resultItemID = resultItemIDsList.pop() # we have assume that we'll have only one item in intersection 343 # between potential IDs and Items that have (and have only) given tag list (without another tags in the 344 # path to this items). We need to check it in the adding new item to the given tag path. 345 return resultItemID 346 elif len(resultItemIDsList) > 1: 347 raise ToManyIdenticalItemsOnTheGivenTagPathError() 348 349 def get_itemID_from_item_and_tags(self, binTags, binItem): 350 ly = gly(self.default_priority) 351 352 binTags = set(binTags) 353 354 if self.get_root_tag() not in binTags: 355 binTags.add(self.get_root_tag()) 356 357 potentialIDs = self.get_potential_itemIDs_from_item(binItem) 358 setOfBinTagsHashes = set() 359 for tag in binTags: 360 ly() 361 setOfBinTagsHashes.add(tag.__hash__()) 362 for itemID in potentialIDs: 363 ly() 364 currentItemTagsSet = self.get_tagsHashes_from_single_item(itemID, isWithoutRootHash=False) 365 if setOfBinTagsHashes == currentItemTagsSet: 366 return itemID 367 return None 368 369 def tag_hash_list_2_tag_list(self, tagHashList): 370 ly = gly(self.default_priority) 371 372 tagList = list() 373 for tagHash in tagHashList: 374 ly() 375 tagList.append(self.tagsSet[tagHash]) 376 return tagList 377 378 def get_item_and_tags_from_itemID(self, itemID): 379 commonTagTupleHash = self.itemWithTags[itemID] 380 tagSet = set(self.commonTagSets[commonTagTupleHash]) - {self.get_root_tag().__hash__()} 381 sortedTagHashList = self.sort_tag_hash_list_by_qnt(tagSet) 382 result = (self.itemsSet[itemID], self.tag_hash_list_2_tag_list(sortedTagHashList)) 383 return result 384 385 # @profile 386 def get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None): 387 ly = gly(self.default_priority) 388 389 tagsQnt = self.tagsQnt 390 if local_tags_qnt is not None: 391 tagsQnt = local_tags_qnt 392 tag_hash_set = set(tagHashSet) 393 tag_by_qnt = dict() 394 tag_by_qnt__filler = AddToCompoundDict__Set(tag_by_qnt) 395 biggest_qnt = 0 396 for tag_hash in tag_hash_set: 397 ly() 398 qnt = tagsQnt[tag_hash] 399 if qnt > biggest_qnt: 400 biggest_qnt = qnt 401 # if qnt not in tag_by_qnt: 402 # tag_by_qnt[qnt] = set() 403 # tag_by_qnt[qnt].add(tag_hash) 404 tag_by_qnt__filler.add(qnt, tag_hash) 405 406 result = None 407 if len(tag_by_qnt) > 0: 408 # biggest_qnt = max(tag_by_qnt) 409 result = tuple(tag_by_qnt[biggest_qnt]) 410 else: 411 result = tuple() 412 return result 413 414 def sort_tag_hash_list_by_qnt(self, tagHashSet): 415 # will return sorted tag list - not sorted tag hash list 416 ly = gly(self.default_priority) 417 418 tagHashSet = set(tagHashSet) 419 rawTagList = list() 420 for tagHash in tagHashSet: 421 ly() 422 tagWithWeight = (tagHash, self.tagsQnt[tagHash]) 423 rawTagList.append(tagWithWeight) 424 return self.sort_raw_tag_list(rawTagList) 425 426 def sort_tag_list_by_qnt(self, binTags): 427 ly = gly(self.default_priority) 428 429 binTags = set(binTags) 430 rawTagList = list() 431 for tag in binTags: 432 ly() 433 tagHash = tag.__hash__() 434 tagWithWeight = (tag, self.tagsQnt[tagHash]) 435 rawTagList.append(tagWithWeight) 436 return self.sort_raw_tag_list(rawTagList) 437 438 def sort_tag_hash_list_by_hash(self, tagHashSet): 439 # will return sorted tag list - not sorted tag hash list 440 ly = gly(self.default_priority) 441 442 tagHashSet = set(tagHashSet) 443 rawTagList = list() 444 for tagHash in tagHashSet: 445 ly() 446 tagWithWeight = (tagHash, tagHash) 447 rawTagList.append(tagWithWeight) 448 return self.sort_raw_tag_list(rawTagList) 449 450 def sort_tag_list_by_hash(self, binTags): 451 ly = gly(self.default_priority) 452 453 binTags = set(binTags) 454 rawTagList = list() 455 for tag in binTags: 456 ly() 457 tagHash = tag.__hash__() 458 tagWithWeight = (tag, tagHash) 459 rawTagList.append(tagWithWeight) 460 return self.sort_raw_tag_list(rawTagList) 461 462 def sort_raw_tag_list(self, rawTagList): 463 # will return sorted tag list 464 ly = gly(self.default_priority) 465 466 rawTagList = sorted(rawTagList, key=lambda tagAndWeight: tagAndWeight[1], reverse=True) 467 tagList = list() 468 for rawTag in rawTagList: 469 ly() 470 tagList.append(rawTag[0]) 471 return tagList 472 473 def get_itemIDs_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 474 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False): 475 # TODO: исправить ошибку: SMART_TREE_TYPE: возвращает не только список файлов в текущей директории, но и из 476 # непосредственных подпапок данной папки 477 478 # treeType - type of the graph tree representation: show all tags with replies (pure representation); 479 # show only relevant tags; etc. 480 # return set of itemIDs 481 ly = gly(self.default_priority) 482 483 binTags = set(binTags) 484 485 if self.get_root_tag() not in binTags: 486 binTags.add(self.get_root_tag()) 487 488 tagHashSet = set() 489 for binTag in binTags: 490 ly() 491 tagHashSet.add(binTag.__hash__()) 492 493 # PLAIN_PSEUDO_TREE_TYPE 494 interceptionOfItemsWithTags = set() 495 if (treeType == PLAIN_PSEUDO_TREE_TYPE) or isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 496 binTagsQnt = len(tagHashSet) 497 commonTagGroupHashSet = set() 498 tagSet = set() 499 itemIDSet = set() 500 binTagHashTuple = tuple(self.sort_tag_hash_list_by_hash(tagHashSet)) 501 hashOfTheBinTagHashTuple = binTagHashTuple.__hash__() 502 if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets: 503 itemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple] 504 for commonTagQnt in self.setOfTagGroupQnt: 505 ly() 506 if commonTagQnt > binTagsQnt: 507 setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt] 508 commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes) 509 for commonTagGroupHash in commonTagGroupHashSet: 510 ly() 511 commonTagHashTuple = self.commonTagSets[commonTagGroupHash] 512 commonTagHashSet = set(commonTagHashTuple) 513 if tagHashSet.issubset(commonTagHashSet): 514 itemIDSet.update(self.itemsOnTheCommonTagSets[commonTagGroupHash]) 515 # # if len(tagHashSet & commonTagHashSet) == len(tagHashSet): 516 # res_set = tagHashSet.intersection(commonTagHashSet) 517 # if len(res_set) == binTagsQnt: 518 # itemIDSet = itemIDSet | self.itemsOnTheCommonTagSets[commonTagGroupHash] 519 interceptionOfItemsWithTags = itemIDSet 520 521 # isFirstHash = True 522 # for tag in binTags: 523 # tagHash = tag.__hash__() 524 # if tagHash in self.tagWithItems: 525 # if isFirstHash: 526 # interceptionOfItemsWithTags = self.tagWithItems[tagHash] 527 # isFirstHash = False 528 # else: 529 # itemsWithTag = self.tagWithItems[tagHash] 530 # interceptionOfItemsWithTags = interceptionOfItemsWithTags & itemsWithTag 531 # else: 532 # # TODO: произвести такую же провеку в get_items_from_tags() и build_smart_tree() 533 # if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 534 # result = (set(), set()) 535 # return result 536 # else: 537 # return set() 538 539 resultItemIDSet = interceptionOfItemsWithTags 540 setOfAllInternalItemIDsForThisSetOfTags = interceptionOfItemsWithTags 541 542 # SMART_TREE_TYPE or FULL_TREE_TYPE 543 if (treeType == SMART_TREE_TYPE) or (treeType == FULL_TREE_TYPE): 544 resultItemIDSet = set() 545 binTagHashTuple = self.sort_tag_hash_list_by_hash(tagHashSet) 546 binTagHashTuple = tuple(binTagHashTuple) 547 hashOfTheBinTagHashTuple = binTagHashTuple.__hash__() 548 if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets: 549 resultItemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple] 550 551 # filteredItemIDsSet = set() 552 # tagQnt = len(binTags) 553 # # for itemID in setOfAllInternalItemIDsForThisSetOfTags: 554 # # if len(self.itemWithTags[itemID]) == tagQnt: 555 # # # _TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] 556 # # # и вычитывать это из него 557 # # # и/или 558 # # # _TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashSet_1, hashOfTheTagHashSet_2, ... 559 # # # , hashOfTheTagHashSet_3}] и вычитывать это из него, а потом уже и из каждого tagHashSet 560 # # # где hashOfTheTagHashSet - это tagHashSet.__hash__() 561 # # filteredItemIDsSet.add(itemID) 562 # if tagQnt in self.tagsNumPerItemID: 563 # filteredItemIDsSet = setOfAllInternalItemIDsForThisSetOfTags & self.tagsNumPerItemID[tagQnt] 564 # 565 # resultItemIDSet = set() 566 # tagHashSet = set() 567 # for binTag in binTags: 568 # tagHashSet.add(binTag.__hash__()) 569 # for itemID in filteredItemIDsSet: 570 # commonTagTupleHash = self.itemWithTags[itemID] 571 # tagSet = set(self.commonTagSets[commonTagTupleHash]) 572 # if tagSet == tagHashSet: 573 # # _TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple] 574 # # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3} 575 # # _TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ... 576 # # , itemID_3}] 577 # resultItemIDSet.add(itemID) 578 elif treeType == PLAIN_PSEUDO_TREE_TYPE: 579 # already implemented (see bellow). Don't touch this code! 580 pass 581 else: 582 raise UnknownTreeTypeError() 583 584 if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 585 result = (set(resultItemIDSet), set(setOfAllInternalItemIDsForThisSetOfTags)) 586 return result 587 else: 588 return set(resultItemIDSet) 589 590 def get_items_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 591 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False): 592 # treeType - type of the graph tree representation: show all tags with replies (pure representation); 593 # show only relevant tags; etc. 594 # return set of itemIDs 595 ly = gly(self.default_priority) 596 597 binTags = set(binTags) 598 itemIDsSet = self.get_itemIDs_from_tags(binTags, treeType=treeType, 599 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags= 600 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags) 601 if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 602 itemSet = set() 603 for itemID in itemIDsSet[0]: 604 ly() 605 itemSet.add(self.itemsSet[itemID]) 606 result = (tuple(itemSet), tuple(itemIDsSet[1])) # result == (usual items set, additional set of all 607 # internal itemIDs) 608 return result 609 else: 610 itemSet = set() 611 for itemID in itemIDsSet: 612 ly() 613 itemSet.add(self.itemsSet[itemID]) 614 return tuple(itemSet) 615 616 def get_tagHashes_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 617 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None): 618 # where "itemIDsSet" is externally given "get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE)" 619 # so "itemIDsSet" is a set of the all items inside the "folder" binTags (including items from "subfolders") 620 # treeType - the same as in the "get_items_from_tags()" method 621 # return set of itemIDs 622 # prePreparedSetOfAllInternalItemIDsForThisSetOfTags can be generated by: 623 # a) get_itemIDs_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) 624 # a) get_itemIDs_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True) 625 # c) get_items_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True) 626 # d) get_items_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) || BUT: it'll return item set - not itemID 627 # set 628 ly = gly(self.default_priority) 629 630 binTags = set(binTags) 631 632 resultTagHashSet = set() 633 634 setOfAllInternalItemIDs = set() 635 if prePreparedSetOfAllInternalItemIDsForThisSetOfTags is None: 636 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 637 else: 638 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDsForThisSetOfTags 639 640 if (treeType == PLAIN_PSEUDO_TREE_TYPE) or (treeType == FULL_TREE_TYPE): 641 binTagHashes = set() 642 for tag in binTags: 643 ly() 644 binTagHashes.add(tag.__hash__()) 645 tagHashSet = set() 646 for itemID in setOfAllInternalItemIDs: 647 ly() 648 if itemID in self.itemWithTags: 649 commonTagTupleHash = self.itemWithTags[itemID] 650 tagSet = set(self.commonTagSets[commonTagTupleHash]) 651 tagHashSet.update(tagSet) 652 resultTagHashSet = tagHashSet - binTagHashes 653 elif treeType == SMART_TREE_TYPE: 654 # smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs) 655 smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs, 656 zeroSliceOnly=True) 657 if 0 in smartTree: 658 resultTagHashSet = smartTree[0] 659 # filteredItemIDsList = list() 660 # tagQnt = len(binTags) 661 # for itemID in listOfAllInternalItemIDs: 662 # if len(self.itemWithTags[itemID]) == (tagQnt + 1): 663 # filteredItemIDsList.append(itemID) 664 # 665 # tagHashSet = set() 666 # for itemID in filteredItemIDsList: 667 # tagHashSet.update(set(self.itemWithTags[itemID])) 668 # givenTagHashes = set() 669 # for tag in binTags: 670 # givenTagHashes.add(tag.__hash__()) 671 # tagHashSet.difference_update(givenTagHashes) 672 # ##resultTagHashList = list(tagHashSet) 673 # # если остановиться тут - то мы увидим не все папки: мы не увидим папки непосредственно в которых есть 674 # # только другие подпапки, но ни одного файла. 675 # # 676 # # значит далее мы должны исключить все файлы, которые имеют только что найденные теги, и начать строить 677 # # древо тегов для оставшихся 678 # # 679 # # а далее - повторить все это в цикле, увеличив при проверке кол-во тегов еще раз на единицу (и используя 680 # # уже оставшийся после отсеивания набор файлов). В итоге кол-во итераций зависит не от количества файлов, 681 # # а от максимальной фактически имеющейся вложенности файлов внутри тегов-каталогов 682 else: 683 raise UnknownTreeTypeError() 684 685 sortedTagHashList = self.sort_tag_hash_list_by_qnt(resultTagHashSet - {self.get_root_tag().__hash__()}) 686 return list(sortedTagHashList) 687 688 def get_tags_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 689 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None): 690 result = self.get_tagHashes_from_tags(binTags, treeType=treeType, 691 prePreparedSetOfAllInternalItemIDsForThisSetOfTags= 692 prePreparedSetOfAllInternalItemIDsForThisSetOfTags) 693 return tuple(self.tag_hash_list_2_tag_list(result)) 694 695 def build_smart_tree(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 696 ly = gly(self.default_priority) 697 698 startingBinTags = set(startingBinTags) 699 700 if self.get_root_tag() not in startingBinTags: 701 startingBinTags.add(self.get_root_tag()) 702 703 startingTagHashes = set() 704 for tag in startingBinTags: 705 ly() 706 startingTagHashes.add(tag.__hash__()) 707 708 if prePreparedSetOfAllInternalItemIDs is None: 709 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 710 else: 711 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 712 713 smartTree = {0: set()} 714 smartTree__filler = AddToCompoundDict__Set(smartTree) 715 local_tags_qnt = dict() 716 local_tags_qnt__filler = KeyCounter(local_tags_qnt) 717 for itemID in setOfAllInternalItemIDs: 718 ly() 719 commonTagTupleHash = self.itemWithTags[itemID] 720 tagSet = set(self.commonTagSets[commonTagTupleHash]) 721 setOfTags = tagSet 722 setOfTags = setOfTags - startingTagHashes 723 for tag_hash in setOfTags: 724 ly() 725 # if tag_hash not in local_tags_qnt: 726 # local_tags_qnt[tag_hash] = 0 727 # local_tags_qnt[tag_hash] += 1 728 local_tags_qnt__filler.add(tag_hash) 729 730 for itemID in setOfAllInternalItemIDs: 731 ly() 732 commonTagTupleHash = self.itemWithTags[itemID] 733 tagSet = set(self.commonTagSets[commonTagTupleHash]) 734 setOfTags = tagSet 735 setOfTags = setOfTags - startingTagHashes 736 listOfTagHashes = None 737 if zeroSliceOnly: 738 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 739 else: 740 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 741 742 lastTagHash = None 743 lastTagHashQnt = None 744 treeLevel = 0 745 for tagHash in listOfTagHashes: 746 ly() 747 # currentTagHashQnt = self.tagsQnt[tagHash] 748 currentTagHashQnt = local_tags_qnt[tagHash] 749 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 750 pass 751 else: 752 treeLevel += 1 753 lastTagHash = None 754 lastTagHashQnt = None 755 # if treeLevel not in smartTree: 756 # smartTree[treeLevel] = set() 757 # # tagsSetOnTheLevel = smartTree[treeLevel] 758 # # tagsSetOnTheLevel.add(tagHash) 759 # # smartTree[treeLevel] = tagsSetOnTheLevel 760 # smartTree[treeLevel].add(tagHash) 761 smartTree__filler.add(treeLevel, tagHash) 762 lastTagHash = tagHash 763 lastTagHashQnt = currentTagHashQnt 764 return dict(smartTree) 765 766 def build_smart_tree_2(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 767 ly = gly(self.default_priority) 768 769 startingBinTags = set(startingBinTags) 770 771 if self.get_root_tag() not in startingBinTags: 772 startingBinTags.add(self.get_root_tag()) 773 774 startingTagHashes = set() 775 for tag in startingBinTags: 776 ly() 777 startingTagHashes.add(tag.__hash__()) 778 779 if prePreparedSetOfAllInternalItemIDs is None: 780 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 781 else: 782 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 783 784 smartTree = {0: set()} 785 smartTree__filler = AddToCompoundDict__Set(smartTree) 786 local_tags_qnt = dict() 787 local_tags_qnt__filler = KeyCounter(local_tags_qnt) 788 for itemID in setOfAllInternalItemIDs: 789 ly() 790 commonTagTupleHash = self.itemWithTags[itemID] 791 tagSet = set(self.commonTagSets[commonTagTupleHash]) 792 setOfTags = tagSet 793 setOfTags = setOfTags - startingTagHashes 794 for tag_hash in setOfTags: 795 ly() 796 # if tag_hash not in local_tags_qnt: 797 # local_tags_qnt[tag_hash] = 0 798 # local_tags_qnt[tag_hash] += 1 799 local_tags_qnt__filler.add(tag_hash) 800 801 for itemID in setOfAllInternalItemIDs: 802 ly() 803 commonTagTupleHash = self.itemWithTags[itemID] 804 tagSet = set(self.commonTagSets[commonTagTupleHash]) 805 setOfTags = tagSet 806 setOfTags = setOfTags - startingTagHashes 807 listOfTagHashes = None 808 if zeroSliceOnly: 809 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 810 else: 811 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 812 813 lastTagHash = None 814 lastTagHashQnt = None 815 treeLevel = 0 816 for tagHash in listOfTagHashes: 817 ly() 818 # currentTagHashQnt = self.tagsQnt[tagHash] 819 currentTagHashQnt = local_tags_qnt[tagHash] 820 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 821 pass 822 else: 823 treeLevel += 1 824 lastTagHash = None 825 lastTagHashQnt = None 826 # if treeLevel not in smartTree: 827 # smartTree[treeLevel] = set() 828 # # tagsSetOnTheLevel = smartTree[treeLevel] 829 # # tagsSetOnTheLevel.add(tagHash) 830 # # smartTree[treeLevel] = tagsSetOnTheLevel 831 # smartTree[treeLevel].add(tagHash) 832 smartTree__filler.add(treeLevel, tagHash) 833 lastTagHash = tagHash 834 lastTagHashQnt = currentTagHashQnt 835 return dict(smartTree) 836 837 def get_all_from_tags(self, binTags, treeType=USUAL_TREE_TYPE): 838 binTags = set(binTags) 839 items = self.get_items_from_tags(binTags, treeType=treeType, 840 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=True) 841 if len(items[1]) > 0: 842 tags = self.get_tags_from_tags(binTags, treeType=treeType, 843 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=items[1]) 844 result = (tuple(tags), set(items[0])) 845 return result 846 else: 847 result = (tuple(), set()) 848 return result 849 850 def get_tagsHashes_from_single_item(self, itemID, isWithoutRootHash=True): 851 if itemID in self.itemWithTags: 852 commonTagTupleHash = self.itemWithTags[itemID] 853 tagSet = set(self.commonTagSets[commonTagTupleHash]) 854 if isWithoutRootHash: 855 return set(tagSet - {self.get_root_tag().__hash__()}) 856 else: 857 return set(tagSet) 858 else: 859 return set() 860 861 def get_potential_itemIDs_from_item(self, binItem): 862 itemHash = binItem.__hash__() 863 if itemHash in self.itemIDsForItem: 864 return set(self.itemIDsForItem[itemHash]) 865 else: 866 return set() 867 868 def is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags): 869 ly = gly(self.default_priority) 870 871 binTags = set(binTags) 872 873 if self.get_root_tag() not in binTags: 874 binTags.add(self.get_root_tag()) 875 876 tagHashSet = set() 877 for binTag in binTags: 878 ly() 879 tagHashSet.add(binTag.__hash__()) 880 881 binTagsQnt = len(tagHashSet) 882 commonTagGroupHashSet = set() 883 # setOfLenOfTheCommonTagHashSetForChecking = set() 884 setOfTheTagsIntersection = None 885 for commonTagQnt in self.setOfTagGroupQnt: 886 ly() 887 if commonTagQnt > binTagsQnt: 888 setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt] 889 commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes) 890 for commonTagGroupHash in commonTagGroupHashSet: 891 ly() 892 commonTagHashTuple = self.commonTagSets[commonTagGroupHash] 893 commonTagHashSet = set(commonTagHashTuple) 894 if len(tagHashSet & commonTagHashSet) == len(tagHashSet): 895 if tagHashSet != commonTagHashSet: 896 if setOfTheTagsIntersection is None: 897 setOfTheTagsIntersection = commonTagHashSet 898 else: 899 setOfTheTagsIntersection = setOfTheTagsIntersection & commonTagHashSet 900 # if tagHashSet != commonTagHashSet: 901 # setOfLenOfTheCommonTagHashSetForChecking.add(len(commonTagHashSet)) 902 # minimalTagPath = min(setOfLenOfTheCommonTagHashSetForChecking) 903 # pathDiff = minimalTagPath - len(tagHashSet) 904 # if pathDiff > 0: 905 if setOfTheTagsIntersection is None: 906 return list() 907 908 setOfTheTagsForAReduction = setOfTheTagsIntersection - tagHashSet 909 910 sortedTagHashList = self.sort_tag_hash_list_by_qnt(setOfTheTagsForAReduction - {self.get_root_tag().__hash__()}) 911 return list(sortedTagHashList) 912 913 def get_tags_for_a_smart_redirection(self, binTags): 914 result = self.is_smart_redirection_for_a_tag_path_reduction_needed(binTags) 915 return tuple(self.tag_hash_list_2_tag_list(result))
SMART_TREE_TYPE =
0
SMART_TREE_TYPE_WITH_INTERNAL_MENU =
1
FULL_TREE_TYPE =
2
PLAIN_PSEUDO_TREE_TYPE =
3
USUAL_TREE_TYPE =
3
class
ToManyIdenticalItemsOnTheGivenTagPathError(builtins.Exception):
Common base class for all non-exit exceptions.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
- args
class
UnknownTreeTypeError(builtins.Exception):
Common base class for all non-exit exceptions.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
- args
class
LockableMixin:
class
TagDB(cengal.parallel_execution.coroutines.coro_scheduler.versions.v_0.coro_scheduler.EntityStatsMixin):
108class TagDB(EntityStatsMixin): 109 def __init__(self, default_priority: CoroPriority = CoroPriority.normal): 110 self.default_priority: CoroPriority = default_priority 111 112 self.itemsID = IDGenerator() 113 114 self.itemIDsForItem = {} # key - item hash; data - set of itemIDs 115 116 self.itemsSet = {} # key - ItemID; data - binItem 117 # TODO: заменить список тегов на хеш единожды сохраненного списка тегов 118 self.itemWithTags = {} # key - ItemID; data - sorted common TagsTuple's hash 119 120 # TODO: убрать tagsNumPerItemID из кода. Заменить этот список itemID - на список hashOfTheTagHashTuple 121 self.tagsNumPerItemID = {} # key - number of tags in this ItemID group; data - set of itemIDs which are have 122 # needed number of tags 123 124 self.tagsSet = {} # key - tag hash; data - binTag 125 self.tagWithItems = {} # key - Tag hash; data - set of itemIDs 126 self.tagsQnt = {} # key - Tag hash; data - quantity of the items with this tag 127 128 129 self.commonTagSets = {} # key - sorted common TagsTuple's hash; data - sorted TagsTuple 130 self.itemsOnTheCommonTagSets = {} # key - sorted common TagsTuple's hash; data - set of itemIDs 131 self.tagsQntPerCommonTagSet = {} # key - number of tags; data - set of TagsTuple hashes 132 self.setOfTagGroupQnt = set() # {tagQntInGroup1, tagQntInGroup2, ..., tagQntInGroupN} where 133 # each Group is an key of the self.tagsQntPerCommonTagSet 134 135 # TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple] 136 # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3} 137 # TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ..., itemID_3}] 138 139 # TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] и вычитывать это из него 140 # и/или 141 # TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashTuple_1, hashOfTheTagHashTuple_2, ... 142 # , hashOfTheTagHashTuple_N}] и вычитывать это из него, а потом уже и из каждого tagHashSet 143 # где hashOfTheTagHashTuple - это tagHashTuple.__hash__() 144 145 def get_entity_stats(self, stats_level: 'EntityStatsMixin.StatsLevel' = EntityStatsMixin.StatsLevel.debug) -> Tuple[str, Dict[str, Any]]: 146 return type(self).__name__, { 147 'items num': len(self.itemsSet), 148 'tags num': len(self.tagsSet), 149 } 150 151 def get_root_tag(self): 152 return str(_ROOT_TAG) 153 154 def add_tag(self, binTag): 155 tagHash = binTag.__hash__() 156 self.tagsSet[tagHash] = binTag 157 if tagHash not in self.tagsSet: 158 self.tagsQnt[tagHash] = 0 159 if tagHash not in self.tagWithItems: 160 self.tagWithItems[tagHash] = set() 161 162 def remove_tag(self, binTag): 163 # will try to delete given tag. If there is at least one item with this tag, than function will fail 164 # and will return False; otherwise it will delete given tag and will return True. 165 functionResult = False 166 tagHash = binTag.__hash__() 167 if tagHash in self.tagsSet: 168 if tagHash in self.tagsQnt: 169 if self.tagsQnt[tagHash] == 0: 170 del self.tagsSet[tagHash] 171 del self.tagsQnt[tagHash] 172 functionResult = True 173 else: 174 functionResult = False 175 else: 176 del self.tagsSet[tagHash] 177 functionResult = True 178 179 if functionResult: 180 if tagHash in self.tagWithItems: 181 del self.tagWithItems[tagHash] 182 183 return functionResult 184 185 def add_item(self, binItem, binTags): 186 # will add new item and return it's dynamic ID or None object If this Item already exist on the given tag path 187 # Or will raise an exception if we already have more than one binItem (another item that is identical to 188 # the given binItem) on this tag path 189 binTags = set(binTags) 190 191 if self.get_root_tag() not in binTags: 192 binTags.add(self.get_root_tag()) 193 194 # may raise an exception in this place. Nope - from now it will be not 195 if self.get_itemID_from_item_and_tags(binTags, binItem) is not None: 196 return None 197 198 itemID = self.itemsID() 199 self.itemsSet[itemID] = binItem 200 201 itemHash = binItem.__hash__() 202 if itemHash in self.itemIDsForItem: 203 IDsSet = self.itemIDsForItem[itemHash] 204 IDsSet.add(itemID) 205 # self.itemIDsForItem[itemHash] = IDsSet 206 else: 207 self.itemIDsForItem[itemHash] = {itemID} 208 209 tagQnt = len(binTags) 210 if tagQnt in self.tagsNumPerItemID: 211 itemIDsSet = self.tagsNumPerItemID[tagQnt] 212 itemIDsSet.add(itemID) 213 # self.tagsNumPerItemID[tagQnt] = itemIDsSet 214 else: 215 self.tagsNumPerItemID[tagQnt] = {itemID} 216 217 binTagHashes = set() 218 219 for tag in binTags: 220 self.add_tag(tag) 221 tagHash = tag.__hash__() 222 binTagHashes.add(tagHash) 223 setOfItems = self.tagWithItems[tagHash] 224 if itemID not in setOfItems: 225 setOfItems.add(itemID) 226 if tagHash in self.tagsQnt: 227 self.tagsQnt[tagHash] += 1 228 else: 229 self.tagsQnt[tagHash] = 1 230 self.tagWithItems[tagHash] = setOfItems 231 232 sortedTagTuple = tuple(self.sort_tag_hash_list_by_hash(binTagHashes)) 233 hashOfTheSortedTagTuple = sortedTagTuple.__hash__() 234 self.itemWithTags[itemID] = hashOfTheSortedTagTuple 235 236 self.commonTagSets[hashOfTheSortedTagTuple] = sortedTagTuple 237 238 if hashOfTheSortedTagTuple in self.itemsOnTheCommonTagSets: 239 itemIDsSet = self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] 240 itemIDsSet.add(itemID) 241 # self.itemsOnTheCommonTagSets[tagQnt] = itemIDsSet 242 else: 243 self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] = {itemID} 244 245 lenOfTheSortedTagTuple = len(sortedTagTuple) 246 if lenOfTheSortedTagTuple in self.tagsQntPerCommonTagSet: 247 itemIDsSet = self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] 248 itemIDsSet.add(hashOfTheSortedTagTuple) 249 # self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = itemIDsSet 250 else: 251 self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = {hashOfTheSortedTagTuple} 252 self.setOfTagGroupQnt.add(lenOfTheSortedTagTuple) 253 254 return itemID 255 256 def remove_item_by_itemID(self, itemID): 257 ly = gly(self.default_priority) 258 259 if itemID in self.itemsSet: 260 itemHash = self.itemsSet[itemID].__hash__() 261 del self.itemsSet[itemID] 262 263 if itemHash in self.itemIDsForItem: 264 IDsSet = self.itemIDsForItem[itemHash] 265 IDsSet.difference_update({itemID}) 266 # self.itemIDsForItem[itemHash] = IDsSet 267 if len(IDsSet) < 1: 268 del self.itemIDsForItem[itemHash] 269 270 if itemID in self.itemWithTags: 271 commonTagTupleHash = self.itemWithTags[itemID] 272 tagTuple = self.commonTagSets[commonTagTupleHash] 273 numberOfTags = len(tagTuple) 274 275 if commonTagTupleHash in self.itemsOnTheCommonTagSets: 276 IDsSet = self.itemsOnTheCommonTagSets[commonTagTupleHash] 277 IDsSet.difference_update({itemID}) 278 # self.itemsOnTheCommonTagSets[commonTagTupleHash] = IDsSet 279 if len(IDsSet) < 1: 280 del self.itemsOnTheCommonTagSets[commonTagTupleHash] 281 del self.commonTagSets[commonTagTupleHash] 282 if numberOfTags in self.tagsQntPerCommonTagSet: 283 setOfTagTuplesHashes = self.tagsQntPerCommonTagSet[numberOfTags] 284 setOfTagTuplesHashes.difference_update({numberOfTags}) 285 # self.tagsQntPerCommonTagSet[numberOfTags] = setOfTagTuplesHashes 286 if len(setOfTagTuplesHashes) < 1: 287 del self.tagsQntPerCommonTagSet[numberOfTags] 288 self.setOfTagGroupQnt.difference_update({numberOfTags}) 289 290 del self.itemWithTags[itemID] 291 292 setOfTagHashes = set(tagTuple) 293 294 tagsQnt = len(setOfTagHashes) 295 if tagsQnt in self.tagsNumPerItemID: 296 IDsSet = self.tagsNumPerItemID[tagsQnt] 297 IDsSet.difference_update({itemID}) 298 # self.tagsNumPerItemID[tagsQnt] = IDsSet 299 if len(IDsSet) < 1: 300 del self.tagsNumPerItemID[tagsQnt] 301 302 for tagHash in setOfTagHashes: 303 ly() 304 if tagHash in self.tagsQnt: 305 tagsQuantity = self.tagsQnt[tagHash] 306 tagsQuantity -= 1 307 if tagsQuantity < 1: 308 tagsQuantity = 0 309 self.tagsQnt[tagHash] = tagsQuantity 310 if tagsQuantity < 1: 311 del self.tagsQnt[tagHash] 312 del self.tagsSet[tagHash] 313 if tagHash in self.tagWithItems: 314 IDsSet = self.tagWithItems[tagHash] 315 IDsSet.difference_update({itemID}) 316 if len(IDsSet) < 1: 317 del self.tagWithItems[tagHash] 318 # self.tagWithItems[tagHash] = IDsSet 319 320 self.itemsID.remove_id(itemID) 321 322 def remove_item(self, binTags, binItem): 323 # will return ItemId for deleted item or None object if Item is not exist 324 # Or will raise an exception if we already have more than one binItem (another item that is identical to 325 # the given binItem) on this tag path 326 binTags = set(binTags) 327 if self.get_root_tag() not in binTags: 328 binTags.add(self.get_root_tag()) 329 itemID = self.get_itemID_from_item_and_tags(binTags, binItem) 330 if itemID is not None: 331 self.remove_item_by_itemID(itemID) 332 return itemID 333 334 def __OLD__get_itemID_from_item_and_tags(self, binTags, binItem): 335 if self.get_root_tag() not in binTags: 336 binTags.append(self.get_root_tag()) 337 potentialIDs = set(self.get_potential_itemIDs_from_item(binItem)) 338 itemIDsSet = set(self.get_itemIDs_from_tags(binTags, SMART_TREE_TYPE)) 339 resultItemIDsList = potentialIDs & itemIDsSet 340 if len(resultItemIDsList) == 0: 341 return None 342 elif len(resultItemIDsList) == 1: 343 resultItemID = resultItemIDsList.pop() # we have assume that we'll have only one item in intersection 344 # between potential IDs and Items that have (and have only) given tag list (without another tags in the 345 # path to this items). We need to check it in the adding new item to the given tag path. 346 return resultItemID 347 elif len(resultItemIDsList) > 1: 348 raise ToManyIdenticalItemsOnTheGivenTagPathError() 349 350 def get_itemID_from_item_and_tags(self, binTags, binItem): 351 ly = gly(self.default_priority) 352 353 binTags = set(binTags) 354 355 if self.get_root_tag() not in binTags: 356 binTags.add(self.get_root_tag()) 357 358 potentialIDs = self.get_potential_itemIDs_from_item(binItem) 359 setOfBinTagsHashes = set() 360 for tag in binTags: 361 ly() 362 setOfBinTagsHashes.add(tag.__hash__()) 363 for itemID in potentialIDs: 364 ly() 365 currentItemTagsSet = self.get_tagsHashes_from_single_item(itemID, isWithoutRootHash=False) 366 if setOfBinTagsHashes == currentItemTagsSet: 367 return itemID 368 return None 369 370 def tag_hash_list_2_tag_list(self, tagHashList): 371 ly = gly(self.default_priority) 372 373 tagList = list() 374 for tagHash in tagHashList: 375 ly() 376 tagList.append(self.tagsSet[tagHash]) 377 return tagList 378 379 def get_item_and_tags_from_itemID(self, itemID): 380 commonTagTupleHash = self.itemWithTags[itemID] 381 tagSet = set(self.commonTagSets[commonTagTupleHash]) - {self.get_root_tag().__hash__()} 382 sortedTagHashList = self.sort_tag_hash_list_by_qnt(tagSet) 383 result = (self.itemsSet[itemID], self.tag_hash_list_2_tag_list(sortedTagHashList)) 384 return result 385 386 # @profile 387 def get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None): 388 ly = gly(self.default_priority) 389 390 tagsQnt = self.tagsQnt 391 if local_tags_qnt is not None: 392 tagsQnt = local_tags_qnt 393 tag_hash_set = set(tagHashSet) 394 tag_by_qnt = dict() 395 tag_by_qnt__filler = AddToCompoundDict__Set(tag_by_qnt) 396 biggest_qnt = 0 397 for tag_hash in tag_hash_set: 398 ly() 399 qnt = tagsQnt[tag_hash] 400 if qnt > biggest_qnt: 401 biggest_qnt = qnt 402 # if qnt not in tag_by_qnt: 403 # tag_by_qnt[qnt] = set() 404 # tag_by_qnt[qnt].add(tag_hash) 405 tag_by_qnt__filler.add(qnt, tag_hash) 406 407 result = None 408 if len(tag_by_qnt) > 0: 409 # biggest_qnt = max(tag_by_qnt) 410 result = tuple(tag_by_qnt[biggest_qnt]) 411 else: 412 result = tuple() 413 return result 414 415 def sort_tag_hash_list_by_qnt(self, tagHashSet): 416 # will return sorted tag list - not sorted tag hash list 417 ly = gly(self.default_priority) 418 419 tagHashSet = set(tagHashSet) 420 rawTagList = list() 421 for tagHash in tagHashSet: 422 ly() 423 tagWithWeight = (tagHash, self.tagsQnt[tagHash]) 424 rawTagList.append(tagWithWeight) 425 return self.sort_raw_tag_list(rawTagList) 426 427 def sort_tag_list_by_qnt(self, binTags): 428 ly = gly(self.default_priority) 429 430 binTags = set(binTags) 431 rawTagList = list() 432 for tag in binTags: 433 ly() 434 tagHash = tag.__hash__() 435 tagWithWeight = (tag, self.tagsQnt[tagHash]) 436 rawTagList.append(tagWithWeight) 437 return self.sort_raw_tag_list(rawTagList) 438 439 def sort_tag_hash_list_by_hash(self, tagHashSet): 440 # will return sorted tag list - not sorted tag hash list 441 ly = gly(self.default_priority) 442 443 tagHashSet = set(tagHashSet) 444 rawTagList = list() 445 for tagHash in tagHashSet: 446 ly() 447 tagWithWeight = (tagHash, tagHash) 448 rawTagList.append(tagWithWeight) 449 return self.sort_raw_tag_list(rawTagList) 450 451 def sort_tag_list_by_hash(self, binTags): 452 ly = gly(self.default_priority) 453 454 binTags = set(binTags) 455 rawTagList = list() 456 for tag in binTags: 457 ly() 458 tagHash = tag.__hash__() 459 tagWithWeight = (tag, tagHash) 460 rawTagList.append(tagWithWeight) 461 return self.sort_raw_tag_list(rawTagList) 462 463 def sort_raw_tag_list(self, rawTagList): 464 # will return sorted tag list 465 ly = gly(self.default_priority) 466 467 rawTagList = sorted(rawTagList, key=lambda tagAndWeight: tagAndWeight[1], reverse=True) 468 tagList = list() 469 for rawTag in rawTagList: 470 ly() 471 tagList.append(rawTag[0]) 472 return tagList 473 474 def get_itemIDs_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 475 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False): 476 # TODO: исправить ошибку: SMART_TREE_TYPE: возвращает не только список файлов в текущей директории, но и из 477 # непосредственных подпапок данной папки 478 479 # treeType - type of the graph tree representation: show all tags with replies (pure representation); 480 # show only relevant tags; etc. 481 # return set of itemIDs 482 ly = gly(self.default_priority) 483 484 binTags = set(binTags) 485 486 if self.get_root_tag() not in binTags: 487 binTags.add(self.get_root_tag()) 488 489 tagHashSet = set() 490 for binTag in binTags: 491 ly() 492 tagHashSet.add(binTag.__hash__()) 493 494 # PLAIN_PSEUDO_TREE_TYPE 495 interceptionOfItemsWithTags = set() 496 if (treeType == PLAIN_PSEUDO_TREE_TYPE) or isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 497 binTagsQnt = len(tagHashSet) 498 commonTagGroupHashSet = set() 499 tagSet = set() 500 itemIDSet = set() 501 binTagHashTuple = tuple(self.sort_tag_hash_list_by_hash(tagHashSet)) 502 hashOfTheBinTagHashTuple = binTagHashTuple.__hash__() 503 if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets: 504 itemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple] 505 for commonTagQnt in self.setOfTagGroupQnt: 506 ly() 507 if commonTagQnt > binTagsQnt: 508 setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt] 509 commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes) 510 for commonTagGroupHash in commonTagGroupHashSet: 511 ly() 512 commonTagHashTuple = self.commonTagSets[commonTagGroupHash] 513 commonTagHashSet = set(commonTagHashTuple) 514 if tagHashSet.issubset(commonTagHashSet): 515 itemIDSet.update(self.itemsOnTheCommonTagSets[commonTagGroupHash]) 516 # # if len(tagHashSet & commonTagHashSet) == len(tagHashSet): 517 # res_set = tagHashSet.intersection(commonTagHashSet) 518 # if len(res_set) == binTagsQnt: 519 # itemIDSet = itemIDSet | self.itemsOnTheCommonTagSets[commonTagGroupHash] 520 interceptionOfItemsWithTags = itemIDSet 521 522 # isFirstHash = True 523 # for tag in binTags: 524 # tagHash = tag.__hash__() 525 # if tagHash in self.tagWithItems: 526 # if isFirstHash: 527 # interceptionOfItemsWithTags = self.tagWithItems[tagHash] 528 # isFirstHash = False 529 # else: 530 # itemsWithTag = self.tagWithItems[tagHash] 531 # interceptionOfItemsWithTags = interceptionOfItemsWithTags & itemsWithTag 532 # else: 533 # # TODO: произвести такую же провеку в get_items_from_tags() и build_smart_tree() 534 # if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 535 # result = (set(), set()) 536 # return result 537 # else: 538 # return set() 539 540 resultItemIDSet = interceptionOfItemsWithTags 541 setOfAllInternalItemIDsForThisSetOfTags = interceptionOfItemsWithTags 542 543 # SMART_TREE_TYPE or FULL_TREE_TYPE 544 if (treeType == SMART_TREE_TYPE) or (treeType == FULL_TREE_TYPE): 545 resultItemIDSet = set() 546 binTagHashTuple = self.sort_tag_hash_list_by_hash(tagHashSet) 547 binTagHashTuple = tuple(binTagHashTuple) 548 hashOfTheBinTagHashTuple = binTagHashTuple.__hash__() 549 if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets: 550 resultItemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple] 551 552 # filteredItemIDsSet = set() 553 # tagQnt = len(binTags) 554 # # for itemID in setOfAllInternalItemIDsForThisSetOfTags: 555 # # if len(self.itemWithTags[itemID]) == tagQnt: 556 # # # _TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] 557 # # # и вычитывать это из него 558 # # # и/или 559 # # # _TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashSet_1, hashOfTheTagHashSet_2, ... 560 # # # , hashOfTheTagHashSet_3}] и вычитывать это из него, а потом уже и из каждого tagHashSet 561 # # # где hashOfTheTagHashSet - это tagHashSet.__hash__() 562 # # filteredItemIDsSet.add(itemID) 563 # if tagQnt in self.tagsNumPerItemID: 564 # filteredItemIDsSet = setOfAllInternalItemIDsForThisSetOfTags & self.tagsNumPerItemID[tagQnt] 565 # 566 # resultItemIDSet = set() 567 # tagHashSet = set() 568 # for binTag in binTags: 569 # tagHashSet.add(binTag.__hash__()) 570 # for itemID in filteredItemIDsSet: 571 # commonTagTupleHash = self.itemWithTags[itemID] 572 # tagSet = set(self.commonTagSets[commonTagTupleHash]) 573 # if tagSet == tagHashSet: 574 # # _TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple] 575 # # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3} 576 # # _TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ... 577 # # , itemID_3}] 578 # resultItemIDSet.add(itemID) 579 elif treeType == PLAIN_PSEUDO_TREE_TYPE: 580 # already implemented (see bellow). Don't touch this code! 581 pass 582 else: 583 raise UnknownTreeTypeError() 584 585 if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 586 result = (set(resultItemIDSet), set(setOfAllInternalItemIDsForThisSetOfTags)) 587 return result 588 else: 589 return set(resultItemIDSet) 590 591 def get_items_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 592 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False): 593 # treeType - type of the graph tree representation: show all tags with replies (pure representation); 594 # show only relevant tags; etc. 595 # return set of itemIDs 596 ly = gly(self.default_priority) 597 598 binTags = set(binTags) 599 itemIDsSet = self.get_itemIDs_from_tags(binTags, treeType=treeType, 600 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags= 601 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags) 602 if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 603 itemSet = set() 604 for itemID in itemIDsSet[0]: 605 ly() 606 itemSet.add(self.itemsSet[itemID]) 607 result = (tuple(itemSet), tuple(itemIDsSet[1])) # result == (usual items set, additional set of all 608 # internal itemIDs) 609 return result 610 else: 611 itemSet = set() 612 for itemID in itemIDsSet: 613 ly() 614 itemSet.add(self.itemsSet[itemID]) 615 return tuple(itemSet) 616 617 def get_tagHashes_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 618 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None): 619 # where "itemIDsSet" is externally given "get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE)" 620 # so "itemIDsSet" is a set of the all items inside the "folder" binTags (including items from "subfolders") 621 # treeType - the same as in the "get_items_from_tags()" method 622 # return set of itemIDs 623 # prePreparedSetOfAllInternalItemIDsForThisSetOfTags can be generated by: 624 # a) get_itemIDs_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) 625 # a) get_itemIDs_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True) 626 # c) get_items_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True) 627 # d) get_items_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) || BUT: it'll return item set - not itemID 628 # set 629 ly = gly(self.default_priority) 630 631 binTags = set(binTags) 632 633 resultTagHashSet = set() 634 635 setOfAllInternalItemIDs = set() 636 if prePreparedSetOfAllInternalItemIDsForThisSetOfTags is None: 637 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 638 else: 639 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDsForThisSetOfTags 640 641 if (treeType == PLAIN_PSEUDO_TREE_TYPE) or (treeType == FULL_TREE_TYPE): 642 binTagHashes = set() 643 for tag in binTags: 644 ly() 645 binTagHashes.add(tag.__hash__()) 646 tagHashSet = set() 647 for itemID in setOfAllInternalItemIDs: 648 ly() 649 if itemID in self.itemWithTags: 650 commonTagTupleHash = self.itemWithTags[itemID] 651 tagSet = set(self.commonTagSets[commonTagTupleHash]) 652 tagHashSet.update(tagSet) 653 resultTagHashSet = tagHashSet - binTagHashes 654 elif treeType == SMART_TREE_TYPE: 655 # smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs) 656 smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs, 657 zeroSliceOnly=True) 658 if 0 in smartTree: 659 resultTagHashSet = smartTree[0] 660 # filteredItemIDsList = list() 661 # tagQnt = len(binTags) 662 # for itemID in listOfAllInternalItemIDs: 663 # if len(self.itemWithTags[itemID]) == (tagQnt + 1): 664 # filteredItemIDsList.append(itemID) 665 # 666 # tagHashSet = set() 667 # for itemID in filteredItemIDsList: 668 # tagHashSet.update(set(self.itemWithTags[itemID])) 669 # givenTagHashes = set() 670 # for tag in binTags: 671 # givenTagHashes.add(tag.__hash__()) 672 # tagHashSet.difference_update(givenTagHashes) 673 # ##resultTagHashList = list(tagHashSet) 674 # # если остановиться тут - то мы увидим не все папки: мы не увидим папки непосредственно в которых есть 675 # # только другие подпапки, но ни одного файла. 676 # # 677 # # значит далее мы должны исключить все файлы, которые имеют только что найденные теги, и начать строить 678 # # древо тегов для оставшихся 679 # # 680 # # а далее - повторить все это в цикле, увеличив при проверке кол-во тегов еще раз на единицу (и используя 681 # # уже оставшийся после отсеивания набор файлов). В итоге кол-во итераций зависит не от количества файлов, 682 # # а от максимальной фактически имеющейся вложенности файлов внутри тегов-каталогов 683 else: 684 raise UnknownTreeTypeError() 685 686 sortedTagHashList = self.sort_tag_hash_list_by_qnt(resultTagHashSet - {self.get_root_tag().__hash__()}) 687 return list(sortedTagHashList) 688 689 def get_tags_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 690 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None): 691 result = self.get_tagHashes_from_tags(binTags, treeType=treeType, 692 prePreparedSetOfAllInternalItemIDsForThisSetOfTags= 693 prePreparedSetOfAllInternalItemIDsForThisSetOfTags) 694 return tuple(self.tag_hash_list_2_tag_list(result)) 695 696 def build_smart_tree(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 697 ly = gly(self.default_priority) 698 699 startingBinTags = set(startingBinTags) 700 701 if self.get_root_tag() not in startingBinTags: 702 startingBinTags.add(self.get_root_tag()) 703 704 startingTagHashes = set() 705 for tag in startingBinTags: 706 ly() 707 startingTagHashes.add(tag.__hash__()) 708 709 if prePreparedSetOfAllInternalItemIDs is None: 710 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 711 else: 712 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 713 714 smartTree = {0: set()} 715 smartTree__filler = AddToCompoundDict__Set(smartTree) 716 local_tags_qnt = dict() 717 local_tags_qnt__filler = KeyCounter(local_tags_qnt) 718 for itemID in setOfAllInternalItemIDs: 719 ly() 720 commonTagTupleHash = self.itemWithTags[itemID] 721 tagSet = set(self.commonTagSets[commonTagTupleHash]) 722 setOfTags = tagSet 723 setOfTags = setOfTags - startingTagHashes 724 for tag_hash in setOfTags: 725 ly() 726 # if tag_hash not in local_tags_qnt: 727 # local_tags_qnt[tag_hash] = 0 728 # local_tags_qnt[tag_hash] += 1 729 local_tags_qnt__filler.add(tag_hash) 730 731 for itemID in setOfAllInternalItemIDs: 732 ly() 733 commonTagTupleHash = self.itemWithTags[itemID] 734 tagSet = set(self.commonTagSets[commonTagTupleHash]) 735 setOfTags = tagSet 736 setOfTags = setOfTags - startingTagHashes 737 listOfTagHashes = None 738 if zeroSliceOnly: 739 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 740 else: 741 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 742 743 lastTagHash = None 744 lastTagHashQnt = None 745 treeLevel = 0 746 for tagHash in listOfTagHashes: 747 ly() 748 # currentTagHashQnt = self.tagsQnt[tagHash] 749 currentTagHashQnt = local_tags_qnt[tagHash] 750 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 751 pass 752 else: 753 treeLevel += 1 754 lastTagHash = None 755 lastTagHashQnt = None 756 # if treeLevel not in smartTree: 757 # smartTree[treeLevel] = set() 758 # # tagsSetOnTheLevel = smartTree[treeLevel] 759 # # tagsSetOnTheLevel.add(tagHash) 760 # # smartTree[treeLevel] = tagsSetOnTheLevel 761 # smartTree[treeLevel].add(tagHash) 762 smartTree__filler.add(treeLevel, tagHash) 763 lastTagHash = tagHash 764 lastTagHashQnt = currentTagHashQnt 765 return dict(smartTree) 766 767 def build_smart_tree_2(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 768 ly = gly(self.default_priority) 769 770 startingBinTags = set(startingBinTags) 771 772 if self.get_root_tag() not in startingBinTags: 773 startingBinTags.add(self.get_root_tag()) 774 775 startingTagHashes = set() 776 for tag in startingBinTags: 777 ly() 778 startingTagHashes.add(tag.__hash__()) 779 780 if prePreparedSetOfAllInternalItemIDs is None: 781 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 782 else: 783 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 784 785 smartTree = {0: set()} 786 smartTree__filler = AddToCompoundDict__Set(smartTree) 787 local_tags_qnt = dict() 788 local_tags_qnt__filler = KeyCounter(local_tags_qnt) 789 for itemID in setOfAllInternalItemIDs: 790 ly() 791 commonTagTupleHash = self.itemWithTags[itemID] 792 tagSet = set(self.commonTagSets[commonTagTupleHash]) 793 setOfTags = tagSet 794 setOfTags = setOfTags - startingTagHashes 795 for tag_hash in setOfTags: 796 ly() 797 # if tag_hash not in local_tags_qnt: 798 # local_tags_qnt[tag_hash] = 0 799 # local_tags_qnt[tag_hash] += 1 800 local_tags_qnt__filler.add(tag_hash) 801 802 for itemID in setOfAllInternalItemIDs: 803 ly() 804 commonTagTupleHash = self.itemWithTags[itemID] 805 tagSet = set(self.commonTagSets[commonTagTupleHash]) 806 setOfTags = tagSet 807 setOfTags = setOfTags - startingTagHashes 808 listOfTagHashes = None 809 if zeroSliceOnly: 810 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 811 else: 812 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 813 814 lastTagHash = None 815 lastTagHashQnt = None 816 treeLevel = 0 817 for tagHash in listOfTagHashes: 818 ly() 819 # currentTagHashQnt = self.tagsQnt[tagHash] 820 currentTagHashQnt = local_tags_qnt[tagHash] 821 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 822 pass 823 else: 824 treeLevel += 1 825 lastTagHash = None 826 lastTagHashQnt = None 827 # if treeLevel not in smartTree: 828 # smartTree[treeLevel] = set() 829 # # tagsSetOnTheLevel = smartTree[treeLevel] 830 # # tagsSetOnTheLevel.add(tagHash) 831 # # smartTree[treeLevel] = tagsSetOnTheLevel 832 # smartTree[treeLevel].add(tagHash) 833 smartTree__filler.add(treeLevel, tagHash) 834 lastTagHash = tagHash 835 lastTagHashQnt = currentTagHashQnt 836 return dict(smartTree) 837 838 def get_all_from_tags(self, binTags, treeType=USUAL_TREE_TYPE): 839 binTags = set(binTags) 840 items = self.get_items_from_tags(binTags, treeType=treeType, 841 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=True) 842 if len(items[1]) > 0: 843 tags = self.get_tags_from_tags(binTags, treeType=treeType, 844 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=items[1]) 845 result = (tuple(tags), set(items[0])) 846 return result 847 else: 848 result = (tuple(), set()) 849 return result 850 851 def get_tagsHashes_from_single_item(self, itemID, isWithoutRootHash=True): 852 if itemID in self.itemWithTags: 853 commonTagTupleHash = self.itemWithTags[itemID] 854 tagSet = set(self.commonTagSets[commonTagTupleHash]) 855 if isWithoutRootHash: 856 return set(tagSet - {self.get_root_tag().__hash__()}) 857 else: 858 return set(tagSet) 859 else: 860 return set() 861 862 def get_potential_itemIDs_from_item(self, binItem): 863 itemHash = binItem.__hash__() 864 if itemHash in self.itemIDsForItem: 865 return set(self.itemIDsForItem[itemHash]) 866 else: 867 return set() 868 869 def is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags): 870 ly = gly(self.default_priority) 871 872 binTags = set(binTags) 873 874 if self.get_root_tag() not in binTags: 875 binTags.add(self.get_root_tag()) 876 877 tagHashSet = set() 878 for binTag in binTags: 879 ly() 880 tagHashSet.add(binTag.__hash__()) 881 882 binTagsQnt = len(tagHashSet) 883 commonTagGroupHashSet = set() 884 # setOfLenOfTheCommonTagHashSetForChecking = set() 885 setOfTheTagsIntersection = None 886 for commonTagQnt in self.setOfTagGroupQnt: 887 ly() 888 if commonTagQnt > binTagsQnt: 889 setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt] 890 commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes) 891 for commonTagGroupHash in commonTagGroupHashSet: 892 ly() 893 commonTagHashTuple = self.commonTagSets[commonTagGroupHash] 894 commonTagHashSet = set(commonTagHashTuple) 895 if len(tagHashSet & commonTagHashSet) == len(tagHashSet): 896 if tagHashSet != commonTagHashSet: 897 if setOfTheTagsIntersection is None: 898 setOfTheTagsIntersection = commonTagHashSet 899 else: 900 setOfTheTagsIntersection = setOfTheTagsIntersection & commonTagHashSet 901 # if tagHashSet != commonTagHashSet: 902 # setOfLenOfTheCommonTagHashSetForChecking.add(len(commonTagHashSet)) 903 # minimalTagPath = min(setOfLenOfTheCommonTagHashSetForChecking) 904 # pathDiff = minimalTagPath - len(tagHashSet) 905 # if pathDiff > 0: 906 if setOfTheTagsIntersection is None: 907 return list() 908 909 setOfTheTagsForAReduction = setOfTheTagsIntersection - tagHashSet 910 911 sortedTagHashList = self.sort_tag_hash_list_by_qnt(setOfTheTagsForAReduction - {self.get_root_tag().__hash__()}) 912 return list(sortedTagHashList) 913 914 def get_tags_for_a_smart_redirection(self, binTags): 915 result = self.is_smart_redirection_for_a_tag_path_reduction_needed(binTags) 916 return tuple(self.tag_hash_list_2_tag_list(result))
TagDB( default_priority: cengal.parallel_execution.coroutines.coro_standard_services.loop_yield.versions.v_0.loop_yield.CoroPriority = <CoroPriority.normal: 1>)
109 def __init__(self, default_priority: CoroPriority = CoroPriority.normal): 110 self.default_priority: CoroPriority = default_priority 111 112 self.itemsID = IDGenerator() 113 114 self.itemIDsForItem = {} # key - item hash; data - set of itemIDs 115 116 self.itemsSet = {} # key - ItemID; data - binItem 117 # TODO: заменить список тегов на хеш единожды сохраненного списка тегов 118 self.itemWithTags = {} # key - ItemID; data - sorted common TagsTuple's hash 119 120 # TODO: убрать tagsNumPerItemID из кода. Заменить этот список itemID - на список hashOfTheTagHashTuple 121 self.tagsNumPerItemID = {} # key - number of tags in this ItemID group; data - set of itemIDs which are have 122 # needed number of tags 123 124 self.tagsSet = {} # key - tag hash; data - binTag 125 self.tagWithItems = {} # key - Tag hash; data - set of itemIDs 126 self.tagsQnt = {} # key - Tag hash; data - quantity of the items with this tag 127 128 129 self.commonTagSets = {} # key - sorted common TagsTuple's hash; data - sorted TagsTuple 130 self.itemsOnTheCommonTagSets = {} # key - sorted common TagsTuple's hash; data - set of itemIDs 131 self.tagsQntPerCommonTagSet = {} # key - number of tags; data - set of TagsTuple hashes 132 self.setOfTagGroupQnt = set() # {tagQntInGroup1, tagQntInGroup2, ..., tagQntInGroupN} where 133 # each Group is an key of the self.tagsQntPerCommonTagSet 134 135 # TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple] 136 # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3} 137 # TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ..., itemID_3}] 138 139 # TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] и вычитывать это из него 140 # и/или 141 # TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashTuple_1, hashOfTheTagHashTuple_2, ... 142 # , hashOfTheTagHashTuple_N}] и вычитывать это из него, а потом уже и из каждого tagHashSet 143 # где hashOfTheTagHashTuple - это tagHashTuple.__hash__()
default_priority: cengal.parallel_execution.coroutines.coro_standard_services.loop_yield.versions.v_0.loop_yield.CoroPriority
def
get_entity_stats( self, stats_level: cengal.parallel_execution.coroutines.coro_scheduler.versions.v_0.coro_scheduler.EntityStatsMixin.StatsLevel = <StatsLevel.debug: 1>) -> Tuple[str, Dict[str, Any]]:
def
remove_tag(self, binTag):
162 def remove_tag(self, binTag): 163 # will try to delete given tag. If there is at least one item with this tag, than function will fail 164 # and will return False; otherwise it will delete given tag and will return True. 165 functionResult = False 166 tagHash = binTag.__hash__() 167 if tagHash in self.tagsSet: 168 if tagHash in self.tagsQnt: 169 if self.tagsQnt[tagHash] == 0: 170 del self.tagsSet[tagHash] 171 del self.tagsQnt[tagHash] 172 functionResult = True 173 else: 174 functionResult = False 175 else: 176 del self.tagsSet[tagHash] 177 functionResult = True 178 179 if functionResult: 180 if tagHash in self.tagWithItems: 181 del self.tagWithItems[tagHash] 182 183 return functionResult
def
add_item(self, binItem, binTags):
185 def add_item(self, binItem, binTags): 186 # will add new item and return it's dynamic ID or None object If this Item already exist on the given tag path 187 # Or will raise an exception if we already have more than one binItem (another item that is identical to 188 # the given binItem) on this tag path 189 binTags = set(binTags) 190 191 if self.get_root_tag() not in binTags: 192 binTags.add(self.get_root_tag()) 193 194 # may raise an exception in this place. Nope - from now it will be not 195 if self.get_itemID_from_item_and_tags(binTags, binItem) is not None: 196 return None 197 198 itemID = self.itemsID() 199 self.itemsSet[itemID] = binItem 200 201 itemHash = binItem.__hash__() 202 if itemHash in self.itemIDsForItem: 203 IDsSet = self.itemIDsForItem[itemHash] 204 IDsSet.add(itemID) 205 # self.itemIDsForItem[itemHash] = IDsSet 206 else: 207 self.itemIDsForItem[itemHash] = {itemID} 208 209 tagQnt = len(binTags) 210 if tagQnt in self.tagsNumPerItemID: 211 itemIDsSet = self.tagsNumPerItemID[tagQnt] 212 itemIDsSet.add(itemID) 213 # self.tagsNumPerItemID[tagQnt] = itemIDsSet 214 else: 215 self.tagsNumPerItemID[tagQnt] = {itemID} 216 217 binTagHashes = set() 218 219 for tag in binTags: 220 self.add_tag(tag) 221 tagHash = tag.__hash__() 222 binTagHashes.add(tagHash) 223 setOfItems = self.tagWithItems[tagHash] 224 if itemID not in setOfItems: 225 setOfItems.add(itemID) 226 if tagHash in self.tagsQnt: 227 self.tagsQnt[tagHash] += 1 228 else: 229 self.tagsQnt[tagHash] = 1 230 self.tagWithItems[tagHash] = setOfItems 231 232 sortedTagTuple = tuple(self.sort_tag_hash_list_by_hash(binTagHashes)) 233 hashOfTheSortedTagTuple = sortedTagTuple.__hash__() 234 self.itemWithTags[itemID] = hashOfTheSortedTagTuple 235 236 self.commonTagSets[hashOfTheSortedTagTuple] = sortedTagTuple 237 238 if hashOfTheSortedTagTuple in self.itemsOnTheCommonTagSets: 239 itemIDsSet = self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] 240 itemIDsSet.add(itemID) 241 # self.itemsOnTheCommonTagSets[tagQnt] = itemIDsSet 242 else: 243 self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] = {itemID} 244 245 lenOfTheSortedTagTuple = len(sortedTagTuple) 246 if lenOfTheSortedTagTuple in self.tagsQntPerCommonTagSet: 247 itemIDsSet = self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] 248 itemIDsSet.add(hashOfTheSortedTagTuple) 249 # self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = itemIDsSet 250 else: 251 self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = {hashOfTheSortedTagTuple} 252 self.setOfTagGroupQnt.add(lenOfTheSortedTagTuple) 253 254 return itemID
def
remove_item_by_itemID(self, itemID):
256 def remove_item_by_itemID(self, itemID): 257 ly = gly(self.default_priority) 258 259 if itemID in self.itemsSet: 260 itemHash = self.itemsSet[itemID].__hash__() 261 del self.itemsSet[itemID] 262 263 if itemHash in self.itemIDsForItem: 264 IDsSet = self.itemIDsForItem[itemHash] 265 IDsSet.difference_update({itemID}) 266 # self.itemIDsForItem[itemHash] = IDsSet 267 if len(IDsSet) < 1: 268 del self.itemIDsForItem[itemHash] 269 270 if itemID in self.itemWithTags: 271 commonTagTupleHash = self.itemWithTags[itemID] 272 tagTuple = self.commonTagSets[commonTagTupleHash] 273 numberOfTags = len(tagTuple) 274 275 if commonTagTupleHash in self.itemsOnTheCommonTagSets: 276 IDsSet = self.itemsOnTheCommonTagSets[commonTagTupleHash] 277 IDsSet.difference_update({itemID}) 278 # self.itemsOnTheCommonTagSets[commonTagTupleHash] = IDsSet 279 if len(IDsSet) < 1: 280 del self.itemsOnTheCommonTagSets[commonTagTupleHash] 281 del self.commonTagSets[commonTagTupleHash] 282 if numberOfTags in self.tagsQntPerCommonTagSet: 283 setOfTagTuplesHashes = self.tagsQntPerCommonTagSet[numberOfTags] 284 setOfTagTuplesHashes.difference_update({numberOfTags}) 285 # self.tagsQntPerCommonTagSet[numberOfTags] = setOfTagTuplesHashes 286 if len(setOfTagTuplesHashes) < 1: 287 del self.tagsQntPerCommonTagSet[numberOfTags] 288 self.setOfTagGroupQnt.difference_update({numberOfTags}) 289 290 del self.itemWithTags[itemID] 291 292 setOfTagHashes = set(tagTuple) 293 294 tagsQnt = len(setOfTagHashes) 295 if tagsQnt in self.tagsNumPerItemID: 296 IDsSet = self.tagsNumPerItemID[tagsQnt] 297 IDsSet.difference_update({itemID}) 298 # self.tagsNumPerItemID[tagsQnt] = IDsSet 299 if len(IDsSet) < 1: 300 del self.tagsNumPerItemID[tagsQnt] 301 302 for tagHash in setOfTagHashes: 303 ly() 304 if tagHash in self.tagsQnt: 305 tagsQuantity = self.tagsQnt[tagHash] 306 tagsQuantity -= 1 307 if tagsQuantity < 1: 308 tagsQuantity = 0 309 self.tagsQnt[tagHash] = tagsQuantity 310 if tagsQuantity < 1: 311 del self.tagsQnt[tagHash] 312 del self.tagsSet[tagHash] 313 if tagHash in self.tagWithItems: 314 IDsSet = self.tagWithItems[tagHash] 315 IDsSet.difference_update({itemID}) 316 if len(IDsSet) < 1: 317 del self.tagWithItems[tagHash] 318 # self.tagWithItems[tagHash] = IDsSet 319 320 self.itemsID.remove_id(itemID)
def
remove_item(self, binTags, binItem):
322 def remove_item(self, binTags, binItem): 323 # will return ItemId for deleted item or None object if Item is not exist 324 # Or will raise an exception if we already have more than one binItem (another item that is identical to 325 # the given binItem) on this tag path 326 binTags = set(binTags) 327 if self.get_root_tag() not in binTags: 328 binTags.add(self.get_root_tag()) 329 itemID = self.get_itemID_from_item_and_tags(binTags, binItem) 330 if itemID is not None: 331 self.remove_item_by_itemID(itemID) 332 return itemID
def
get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None):
387 def get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None): 388 ly = gly(self.default_priority) 389 390 tagsQnt = self.tagsQnt 391 if local_tags_qnt is not None: 392 tagsQnt = local_tags_qnt 393 tag_hash_set = set(tagHashSet) 394 tag_by_qnt = dict() 395 tag_by_qnt__filler = AddToCompoundDict__Set(tag_by_qnt) 396 biggest_qnt = 0 397 for tag_hash in tag_hash_set: 398 ly() 399 qnt = tagsQnt[tag_hash] 400 if qnt > biggest_qnt: 401 biggest_qnt = qnt 402 # if qnt not in tag_by_qnt: 403 # tag_by_qnt[qnt] = set() 404 # tag_by_qnt[qnt].add(tag_hash) 405 tag_by_qnt__filler.add(qnt, tag_hash) 406 407 result = None 408 if len(tag_by_qnt) > 0: 409 # biggest_qnt = max(tag_by_qnt) 410 result = tuple(tag_by_qnt[biggest_qnt]) 411 else: 412 result = tuple() 413 return result
def
sort_tag_hash_list_by_qnt(self, tagHashSet):
415 def sort_tag_hash_list_by_qnt(self, tagHashSet): 416 # will return sorted tag list - not sorted tag hash list 417 ly = gly(self.default_priority) 418 419 tagHashSet = set(tagHashSet) 420 rawTagList = list() 421 for tagHash in tagHashSet: 422 ly() 423 tagWithWeight = (tagHash, self.tagsQnt[tagHash]) 424 rawTagList.append(tagWithWeight) 425 return self.sort_raw_tag_list(rawTagList)
def
sort_tag_list_by_qnt(self, binTags):
427 def sort_tag_list_by_qnt(self, binTags): 428 ly = gly(self.default_priority) 429 430 binTags = set(binTags) 431 rawTagList = list() 432 for tag in binTags: 433 ly() 434 tagHash = tag.__hash__() 435 tagWithWeight = (tag, self.tagsQnt[tagHash]) 436 rawTagList.append(tagWithWeight) 437 return self.sort_raw_tag_list(rawTagList)
def
sort_tag_hash_list_by_hash(self, tagHashSet):
439 def sort_tag_hash_list_by_hash(self, tagHashSet): 440 # will return sorted tag list - not sorted tag hash list 441 ly = gly(self.default_priority) 442 443 tagHashSet = set(tagHashSet) 444 rawTagList = list() 445 for tagHash in tagHashSet: 446 ly() 447 tagWithWeight = (tagHash, tagHash) 448 rawTagList.append(tagWithWeight) 449 return self.sort_raw_tag_list(rawTagList)
def
sort_tag_list_by_hash(self, binTags):
451 def sort_tag_list_by_hash(self, binTags): 452 ly = gly(self.default_priority) 453 454 binTags = set(binTags) 455 rawTagList = list() 456 for tag in binTags: 457 ly() 458 tagHash = tag.__hash__() 459 tagWithWeight = (tag, tagHash) 460 rawTagList.append(tagWithWeight) 461 return self.sort_raw_tag_list(rawTagList)
def
sort_raw_tag_list(self, rawTagList):
463 def sort_raw_tag_list(self, rawTagList): 464 # will return sorted tag list 465 ly = gly(self.default_priority) 466 467 rawTagList = sorted(rawTagList, key=lambda tagAndWeight: tagAndWeight[1], reverse=True) 468 tagList = list() 469 for rawTag in rawTagList: 470 ly() 471 tagList.append(rawTag[0]) 472 return tagList
def
build_smart_tree( self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
696 def build_smart_tree(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 697 ly = gly(self.default_priority) 698 699 startingBinTags = set(startingBinTags) 700 701 if self.get_root_tag() not in startingBinTags: 702 startingBinTags.add(self.get_root_tag()) 703 704 startingTagHashes = set() 705 for tag in startingBinTags: 706 ly() 707 startingTagHashes.add(tag.__hash__()) 708 709 if prePreparedSetOfAllInternalItemIDs is None: 710 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 711 else: 712 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 713 714 smartTree = {0: set()} 715 smartTree__filler = AddToCompoundDict__Set(smartTree) 716 local_tags_qnt = dict() 717 local_tags_qnt__filler = KeyCounter(local_tags_qnt) 718 for itemID in setOfAllInternalItemIDs: 719 ly() 720 commonTagTupleHash = self.itemWithTags[itemID] 721 tagSet = set(self.commonTagSets[commonTagTupleHash]) 722 setOfTags = tagSet 723 setOfTags = setOfTags - startingTagHashes 724 for tag_hash in setOfTags: 725 ly() 726 # if tag_hash not in local_tags_qnt: 727 # local_tags_qnt[tag_hash] = 0 728 # local_tags_qnt[tag_hash] += 1 729 local_tags_qnt__filler.add(tag_hash) 730 731 for itemID in setOfAllInternalItemIDs: 732 ly() 733 commonTagTupleHash = self.itemWithTags[itemID] 734 tagSet = set(self.commonTagSets[commonTagTupleHash]) 735 setOfTags = tagSet 736 setOfTags = setOfTags - startingTagHashes 737 listOfTagHashes = None 738 if zeroSliceOnly: 739 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 740 else: 741 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 742 743 lastTagHash = None 744 lastTagHashQnt = None 745 treeLevel = 0 746 for tagHash in listOfTagHashes: 747 ly() 748 # currentTagHashQnt = self.tagsQnt[tagHash] 749 currentTagHashQnt = local_tags_qnt[tagHash] 750 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 751 pass 752 else: 753 treeLevel += 1 754 lastTagHash = None 755 lastTagHashQnt = None 756 # if treeLevel not in smartTree: 757 # smartTree[treeLevel] = set() 758 # # tagsSetOnTheLevel = smartTree[treeLevel] 759 # # tagsSetOnTheLevel.add(tagHash) 760 # # smartTree[treeLevel] = tagsSetOnTheLevel 761 # smartTree[treeLevel].add(tagHash) 762 smartTree__filler.add(treeLevel, tagHash) 763 lastTagHash = tagHash 764 lastTagHashQnt = currentTagHashQnt 765 return dict(smartTree)
def
build_smart_tree_2( self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
767 def build_smart_tree_2(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 768 ly = gly(self.default_priority) 769 770 startingBinTags = set(startingBinTags) 771 772 if self.get_root_tag() not in startingBinTags: 773 startingBinTags.add(self.get_root_tag()) 774 775 startingTagHashes = set() 776 for tag in startingBinTags: 777 ly() 778 startingTagHashes.add(tag.__hash__()) 779 780 if prePreparedSetOfAllInternalItemIDs is None: 781 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 782 else: 783 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 784 785 smartTree = {0: set()} 786 smartTree__filler = AddToCompoundDict__Set(smartTree) 787 local_tags_qnt = dict() 788 local_tags_qnt__filler = KeyCounter(local_tags_qnt) 789 for itemID in setOfAllInternalItemIDs: 790 ly() 791 commonTagTupleHash = self.itemWithTags[itemID] 792 tagSet = set(self.commonTagSets[commonTagTupleHash]) 793 setOfTags = tagSet 794 setOfTags = setOfTags - startingTagHashes 795 for tag_hash in setOfTags: 796 ly() 797 # if tag_hash not in local_tags_qnt: 798 # local_tags_qnt[tag_hash] = 0 799 # local_tags_qnt[tag_hash] += 1 800 local_tags_qnt__filler.add(tag_hash) 801 802 for itemID in setOfAllInternalItemIDs: 803 ly() 804 commonTagTupleHash = self.itemWithTags[itemID] 805 tagSet = set(self.commonTagSets[commonTagTupleHash]) 806 setOfTags = tagSet 807 setOfTags = setOfTags - startingTagHashes 808 listOfTagHashes = None 809 if zeroSliceOnly: 810 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 811 else: 812 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 813 814 lastTagHash = None 815 lastTagHashQnt = None 816 treeLevel = 0 817 for tagHash in listOfTagHashes: 818 ly() 819 # currentTagHashQnt = self.tagsQnt[tagHash] 820 currentTagHashQnt = local_tags_qnt[tagHash] 821 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 822 pass 823 else: 824 treeLevel += 1 825 lastTagHash = None 826 lastTagHashQnt = None 827 # if treeLevel not in smartTree: 828 # smartTree[treeLevel] = set() 829 # # tagsSetOnTheLevel = smartTree[treeLevel] 830 # # tagsSetOnTheLevel.add(tagHash) 831 # # smartTree[treeLevel] = tagsSetOnTheLevel 832 # smartTree[treeLevel].add(tagHash) 833 smartTree__filler.add(treeLevel, tagHash) 834 lastTagHash = tagHash 835 lastTagHashQnt = currentTagHashQnt 836 return dict(smartTree)
def
is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags):
869 def is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags): 870 ly = gly(self.default_priority) 871 872 binTags = set(binTags) 873 874 if self.get_root_tag() not in binTags: 875 binTags.add(self.get_root_tag()) 876 877 tagHashSet = set() 878 for binTag in binTags: 879 ly() 880 tagHashSet.add(binTag.__hash__()) 881 882 binTagsQnt = len(tagHashSet) 883 commonTagGroupHashSet = set() 884 # setOfLenOfTheCommonTagHashSetForChecking = set() 885 setOfTheTagsIntersection = None 886 for commonTagQnt in self.setOfTagGroupQnt: 887 ly() 888 if commonTagQnt > binTagsQnt: 889 setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt] 890 commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes) 891 for commonTagGroupHash in commonTagGroupHashSet: 892 ly() 893 commonTagHashTuple = self.commonTagSets[commonTagGroupHash] 894 commonTagHashSet = set(commonTagHashTuple) 895 if len(tagHashSet & commonTagHashSet) == len(tagHashSet): 896 if tagHashSet != commonTagHashSet: 897 if setOfTheTagsIntersection is None: 898 setOfTheTagsIntersection = commonTagHashSet 899 else: 900 setOfTheTagsIntersection = setOfTheTagsIntersection & commonTagHashSet 901 # if tagHashSet != commonTagHashSet: 902 # setOfLenOfTheCommonTagHashSetForChecking.add(len(commonTagHashSet)) 903 # minimalTagPath = min(setOfLenOfTheCommonTagHashSetForChecking) 904 # pathDiff = minimalTagPath - len(tagHashSet) 905 # if pathDiff > 0: 906 if setOfTheTagsIntersection is None: 907 return list() 908 909 setOfTheTagsForAReduction = setOfTheTagsIntersection - tagHashSet 910 911 sortedTagHashList = self.sort_tag_hash_list_by_qnt(setOfTheTagsForAReduction - {self.get_root_tag().__hash__()}) 912 return list(sortedTagHashList)
Inherited Members
- cengal.parallel_execution.coroutines.coro_scheduler.versions.v_0.coro_scheduler.EntityStatsMixin
- StatsLevel