cengal.data_containers.dynamic_tag_tree.versions.v_0.TagDB
Module Docstring Docstrings: http://www.python.org/dev/peps/pep-0257/
1#!/usr/bin/env python 2# coding=utf-8 3 4# Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space> 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18 19""" 20Module Docstring 21Docstrings: http://www.python.org/dev/peps/pep-0257/ 22""" 23 24 25__author__ = "ButenkoMS <gtalk@butenkoms.space>" 26__copyright__ = "Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space>" 27__credits__ = ["ButenkoMS <gtalk@butenkoms.space>", ] 28__license__ = "Apache License, Version 2.0" 29__version__ = "4.4.1" 30__maintainer__ = "ButenkoMS <gtalk@butenkoms.space>" 31__email__ = "gtalk@butenkoms.space" 32# __status__ = "Prototype" 33__status__ = "Development" 34# __status__ = "Production" 35 36 37__author__ = 'Mikhail Butenko <gtalk@mikhail-butenko.in.ua>' 38 39from cengal.data_generation.id_generator import IDGenerator 40# from help_tools import AddToCompoundDict, ResultExistence 41from cengal.code_flow_control.smart_values.versions.v_0 import ResultExistence 42from cengal.data_containers.compound_dict_management import AddToCompoundDict 43 44 45SMART_TREE_TYPE = 0 # smart tree. Умное дерево тегов: сеть отображенная на древо. Возвращает только список 46 # непосредственных подтегов текущего пути, но не их подтеги; возвращает элементы текущего пути, но не элементы 47 # из подпутей 48SMART_TREE_TYPE_WITH_INTERNAL_MENU = 1 # smart tree with internal menu. В древо встроено меню, позволяющее прямо из 49 # древа производить смену типа вывода: SMART_TREE_TYPE, FULL_TREE_TYPE и PLAIN_PSEUDO_TREE_TYPE. На каждый тип 50 # вывода будет доступен подтег/подпапка, внутри когорого уже будет нормальное древо элементов, но уже выбранного 51 # типа 52FULL_TREE_TYPE = 2 # full tree with all tags - with repeats and without filtering. Список айтемов - как у 53 # SMART_TREE_TYPE, но при этом список тегов - как у PLAIN_PSEUDO_TREE_TYPE 54PLAIN_PSEUDO_TREE_TYPE = 3 # plain tags and items set (will show all tags, subtags and items of 55 # current hm... dir - current tag set). Показывает все теги и подтеги единым списком - как у примитивных теговых 56 # файловых систем; показывает все элементы текущего пути + все элементы всех под-путей 57 58USUAL_TREE_TYPE = PLAIN_PSEUDO_TREE_TYPE 59 60_ROOT_TAG = r'k{1+vdcY#m8t-4m9`)G2\b]/O\'Rzqyr@FEO~%./nGPzl)[^q 0RS!.bCPh ?fag{8~{SGj;Ss3U85Q-:' 61 62 63class ToManyIdenticalItemsOnTheGivenTagPathError(Exception): 64 pass 65 66 67class UnknownTreeTypeError(Exception): 68 pass 69 70 71class AddToCompoundDict__Set(AddToCompoundDict): 72 def __init__(self, original_dict): 73 super().__init__( 74 original_dict, 75 lambda: set(), 76 lambda original_dict, key, value: original_dict[key].add(value) 77 ) 78 79 80class TagDB: 81 82 def __init__(self): 83 super().__init__() 84 85 self.itemsID = IDGenerator() 86 87 self.itemIDsForItem = {} # key - item hash; data - set of itemIDs 88 89 self.itemsSet = {} # key - ItemID; data - binItem 90 # TODO: заменить список тегов на хеш единожды сохраненного списка тегов 91 self.itemWithTags = {} # key - ItemID; data - sorted common TagsTuple's hash 92 93 # TODO: убрать tagsNumPerItemID из кода. Заменить этот список itemID - на список hashOfTheTagHashTuple 94 self.tagsNumPerItemID = {} # key - number of tags in this ItemID group; data - set of itemIDs which are have 95 # needed number of tags 96 97 self.tagsSet = {} # key - tag hash; data - binTag 98 self.tagWithItems = {} # key - Tag hash; data - set of itemIDs 99 self.tagsQnt = {} # key - Tag hash; data - quantity of the items with this tag 100 101 102 self.commonTagSets = {} # key - sorted common TagsTuple's hash; data - sorted TagsTuple 103 self.itemsOnTheCommonTagSets = {} # key - sorted common TagsTuple's hash; data - set of itemIDs 104 self.tagsQntPerCommonTagSet = {} # key - number of tags; data - set of TagsTuple hashes 105 self.setOfTagGroupQnt = set() # {tagQntInGroup1, tagQntInGroup2, ..., tagQntInGroupN} where 106 # each Group is an key of the self.tagsQntPerCommonTagSet 107 108 # TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple] 109 # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3} 110 # TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ..., itemID_3}] 111 112 # TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] и вычитывать это из него 113 # и/или 114 # TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashTuple_1, hashOfTheTagHashTuple_2, ... 115 # , hashOfTheTagHashTuple_N}] и вычитывать это из него, а потом уже и из каждого tagHashSet 116 # где hashOfTheTagHashTuple - это tagHashTuple.__hash__() 117 118 def get_root_tag(self): 119 return str(_ROOT_TAG) 120 121 def add_tag(self, binTag): 122 tagHash = binTag.__hash__() 123 self.tagsSet[tagHash] = binTag 124 if tagHash not in self.tagsSet: 125 self.tagsQnt[tagHash] = 0 126 if tagHash not in self.tagWithItems: 127 self.tagWithItems[tagHash] = set() 128 129 def remove_tag(self, binTag): 130 # will try to delete given tag. If there is at least one item with this tag, than function will fail 131 # and will return False; otherwise it will delete given tag and will return True. 132 functionResult = False 133 tagHash = binTag.__hash__() 134 if tagHash in self.tagsSet: 135 if tagHash in self.tagsQnt: 136 if self.tagsQnt[tagHash] == 0: 137 del self.tagsSet[tagHash] 138 del self.tagsQnt[tagHash] 139 functionResult = True 140 else: 141 functionResult = False 142 else: 143 del self.tagsSet[tagHash] 144 functionResult = True 145 146 if functionResult: 147 if tagHash in self.tagWithItems: 148 del self.tagWithItems[tagHash] 149 150 return functionResult 151 152 def add_item(self, binItem, binTags): 153 # will add new item and return it's dynamic ID or None object If this Item already exist on the given tag path 154 # Or will raise an exception if we already have more than one binItem (another item that is identical to 155 # the given binItem) on this tag path 156 157 binTags = set(binTags) 158 159 if self.get_root_tag() not in binTags: 160 binTags.add(self.get_root_tag()) 161 162 # may raise an exception in this place. Nope - from now it will be not 163 if self.get_itemID_from_item_and_tags(binTags, binItem) is not None: 164 return None 165 166 itemID = self.itemsID() 167 self.itemsSet[itemID] = binItem 168 169 itemHash = binItem.__hash__() 170 if itemHash in self.itemIDsForItem: 171 IDsSet = self.itemIDsForItem[itemHash] 172 IDsSet.add(itemID) 173 # self.itemIDsForItem[itemHash] = IDsSet 174 else: 175 self.itemIDsForItem[itemHash] = {itemID} 176 177 tagQnt = len(binTags) 178 if tagQnt in self.tagsNumPerItemID: 179 itemIDsSet = self.tagsNumPerItemID[tagQnt] 180 itemIDsSet.add(itemID) 181 # self.tagsNumPerItemID[tagQnt] = itemIDsSet 182 else: 183 self.tagsNumPerItemID[tagQnt] = {itemID} 184 185 binTagHashes = set() 186 187 for tag in binTags: 188 self.add_tag(tag) 189 tagHash = tag.__hash__() 190 binTagHashes.add(tagHash) 191 setOfItems = self.tagWithItems[tagHash] 192 if itemID not in setOfItems: 193 setOfItems.add(itemID) 194 if tagHash in self.tagsQnt: 195 self.tagsQnt[tagHash] += 1 196 else: 197 self.tagsQnt[tagHash] = 1 198 self.tagWithItems[tagHash] = setOfItems 199 200 sortedTagTuple = tuple(self.sort_tag_hash_list_by_hash(binTagHashes)) 201 hashOfTheSortedTagTuple = sortedTagTuple.__hash__() 202 self.itemWithTags[itemID] = hashOfTheSortedTagTuple 203 204 self.commonTagSets[hashOfTheSortedTagTuple] = sortedTagTuple 205 206 if hashOfTheSortedTagTuple in self.itemsOnTheCommonTagSets: 207 itemIDsSet = self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] 208 itemIDsSet.add(itemID) 209 # self.itemsOnTheCommonTagSets[tagQnt] = itemIDsSet 210 else: 211 self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] = {itemID} 212 213 lenOfTheSortedTagTuple = len(sortedTagTuple) 214 if lenOfTheSortedTagTuple in self.tagsQntPerCommonTagSet: 215 itemIDsSet = self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] 216 itemIDsSet.add(hashOfTheSortedTagTuple) 217 # self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = itemIDsSet 218 else: 219 self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = {hashOfTheSortedTagTuple} 220 self.setOfTagGroupQnt.add(lenOfTheSortedTagTuple) 221 222 return itemID 223 224 def remove_item_by_itemID(self, itemID): 225 if itemID in self.itemsSet: 226 itemHash = self.itemsSet[itemID].__hash__() 227 del self.itemsSet[itemID] 228 229 if itemHash in self.itemIDsForItem: 230 IDsSet = self.itemIDsForItem[itemHash] 231 IDsSet.difference_update({itemID}) 232 # self.itemIDsForItem[itemHash] = IDsSet 233 if len(IDsSet) < 1: 234 del self.itemIDsForItem[itemHash] 235 236 if itemID in self.itemWithTags: 237 commonTagTupleHash = self.itemWithTags[itemID] 238 tagTuple = self.commonTagSets[commonTagTupleHash] 239 numberOfTags = len(tagTuple) 240 241 if commonTagTupleHash in self.itemsOnTheCommonTagSets: 242 IDsSet = self.itemsOnTheCommonTagSets[commonTagTupleHash] 243 IDsSet.difference_update({itemID}) 244 # self.itemsOnTheCommonTagSets[commonTagTupleHash] = IDsSet 245 if len(IDsSet) < 1: 246 del self.itemsOnTheCommonTagSets[commonTagTupleHash] 247 del self.commonTagSets[commonTagTupleHash] 248 if numberOfTags in self.tagsQntPerCommonTagSet: 249 setOfTagTuplesHashes = self.tagsQntPerCommonTagSet[numberOfTags] 250 setOfTagTuplesHashes.difference_update({numberOfTags}) 251 # self.tagsQntPerCommonTagSet[numberOfTags] = setOfTagTuplesHashes 252 if len(setOfTagTuplesHashes) < 1: 253 del self.tagsQntPerCommonTagSet[numberOfTags] 254 self.setOfTagGroupQnt.difference_update({numberOfTags}) 255 256 del self.itemWithTags[itemID] 257 258 setOfTagHashes = set(tagTuple) 259 260 tagsQnt = len(setOfTagHashes) 261 if tagsQnt in self.tagsNumPerItemID: 262 IDsSet = self.tagsNumPerItemID[tagsQnt] 263 IDsSet.difference_update({itemID}) 264 # self.tagsNumPerItemID[tagsQnt] = IDsSet 265 if len(IDsSet) < 1: 266 del self.tagsNumPerItemID[tagsQnt] 267 268 for tagHash in setOfTagHashes: 269 if tagHash in self.tagsQnt: 270 tagsQuantity = self.tagsQnt[tagHash] 271 tagsQuantity -= 1 272 if tagsQuantity < 1: 273 tagsQuantity = 0 274 self.tagsQnt[tagHash] = tagsQuantity 275 if tagsQuantity < 1: 276 del self.tagsQnt[tagHash] 277 del self.tagsSet[tagHash] 278 if tagHash in self.tagWithItems: 279 IDsSet = self.tagWithItems[tagHash] 280 IDsSet.difference_update({itemID}) 281 if len(IDsSet) < 1: 282 del self.tagWithItems[tagHash] 283 # self.tagWithItems[tagHash] = IDsSet 284 285 self.itemsID.remove_id(itemID) 286 287 def remove_item(self, binTags, binItem): 288 # will return ItemId for deleted item or None object if Item is not exist 289 # Or will raise an exception if we already have more than one binItem (another item that is identical to 290 # the given binItem) on this tag path 291 binTags = set(binTags) 292 if self.get_root_tag() not in binTags: 293 binTags.add(self.get_root_tag()) 294 itemID = self.get_itemID_from_item_and_tags(binTags, binItem) 295 if itemID is not None: 296 self.remove_item_by_itemID(itemID) 297 return itemID 298 299 def __OLD__get_itemID_from_item_and_tags(self, binTags, binItem): 300 if self.get_root_tag() not in binTags: 301 binTags.append(self.get_root_tag()) 302 potentialIDs = set(self.get_potential_itemIDs_from_item(binItem)) 303 itemIDsSet = set(self.get_itemIDs_from_tags(binTags, SMART_TREE_TYPE)) 304 resultItemIDsList = potentialIDs & itemIDsSet 305 if len(resultItemIDsList) == 0: 306 return None 307 elif len(resultItemIDsList) == 1: 308 resultItemID = resultItemIDsList.pop() # we have assume that we'll have only one item in intersection 309 # between potential IDs and Items that have (and have only) given tag list (without another tags in the 310 # path to this items). We need to check it in the adding new item to the given tag path. 311 return resultItemID 312 elif len(resultItemIDsList) > 1: 313 raise ToManyIdenticalItemsOnTheGivenTagPathError() 314 315 def get_itemID_from_item_and_tags(self, binTags, binItem): 316 binTags = set(binTags) 317 318 if self.get_root_tag() not in binTags: 319 binTags.add(self.get_root_tag()) 320 321 potentialIDs = self.get_potential_itemIDs_from_item(binItem) 322 setOfBinTagsHashes = set() 323 for tag in binTags: 324 setOfBinTagsHashes.add(tag.__hash__()) 325 for itemID in potentialIDs: 326 currentItemTagsSet = self.get_tagsHashes_from_single_item(itemID, isWithoutRootHash=False) 327 if setOfBinTagsHashes == currentItemTagsSet: 328 return itemID 329 return None 330 331 def tag_hash_list_2_tag_list(self, tagHashList): 332 tagList = list() 333 for tagHash in tagHashList: 334 tagList.append(self.tagsSet[tagHash]) 335 return tagList 336 337 def get_item_and_tags_from_itemID(self, itemID): 338 commonTagTupleHash = self.itemWithTags[itemID] 339 tagSet = set(self.commonTagSets[commonTagTupleHash]) - {self.get_root_tag().__hash__()} 340 sortedTagHashList = self.sort_tag_hash_list_by_qnt(tagSet) 341 result = (self.itemsSet[itemID], self.tag_hash_list_2_tag_list(sortedTagHashList)) 342 return result 343 344 # @profile 345 def get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None): 346 tagsQnt = self.tagsQnt 347 if local_tags_qnt is not None: 348 tagsQnt = local_tags_qnt 349 tag_hash_set = set(tagHashSet) 350 tag_by_qnt = dict() 351 tag_by_qnt__filler = AddToCompoundDict__Set(tag_by_qnt) 352 biggest_qnt = 0 353 for tag_hash in tag_hash_set: 354 qnt = tagsQnt[tag_hash] 355 if qnt > biggest_qnt: 356 biggest_qnt = qnt 357 # if qnt not in tag_by_qnt: 358 # tag_by_qnt[qnt] = set() 359 # tag_by_qnt[qnt].add(tag_hash) 360 tag_by_qnt__filler.add(qnt, tag_hash) 361 362 result = None 363 if len(tag_by_qnt) > 0: 364 # biggest_qnt = max(tag_by_qnt) 365 result = tuple(tag_by_qnt[biggest_qnt]) 366 else: 367 result = tuple() 368 return result 369 370 def sort_tag_hash_list_by_qnt(self, tagHashSet): 371 # will return sorted tag list - not sorted tag hash list 372 tagHashSet = set(tagHashSet) 373 rawTagList = list() 374 for tagHash in tagHashSet: 375 tagWithWeight = (tagHash, self.tagsQnt[tagHash]) 376 rawTagList.append(tagWithWeight) 377 return self.sort_raw_tag_list(rawTagList) 378 379 def sort_tag_list_by_qnt(self, binTags): 380 binTags = set(binTags) 381 rawTagList = list() 382 for tag in binTags: 383 tagHash = tag.__hash__() 384 tagWithWeight = (tag, self.tagsQnt[tagHash]) 385 rawTagList.append(tagWithWeight) 386 return self.sort_raw_tag_list(rawTagList) 387 388 def sort_tag_hash_list_by_hash(self, tagHashSet): 389 # will return sorted tag list - not sorted tag hash list 390 tagHashSet = set(tagHashSet) 391 rawTagList = list() 392 for tagHash in tagHashSet: 393 tagWithWeight = (tagHash, tagHash) 394 rawTagList.append(tagWithWeight) 395 return self.sort_raw_tag_list(rawTagList) 396 397 def sort_tag_list_by_hash(self, binTags): 398 binTags = set(binTags) 399 rawTagList = list() 400 for tag in binTags: 401 tagHash = tag.__hash__() 402 tagWithWeight = (tag, tagHash) 403 rawTagList.append(tagWithWeight) 404 return self.sort_raw_tag_list(rawTagList) 405 406 def sort_raw_tag_list(self, rawTagList): 407 # will return sorted tag list 408 rawTagList = sorted(rawTagList, key=lambda tagAndWeight: tagAndWeight[1], reverse=True) 409 tagList = list() 410 for rawTag in rawTagList: 411 tagList.append(rawTag[0]) 412 return tagList 413 414 def get_itemIDs_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 415 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False): 416 # TODO: исправить ошибку: SMART_TREE_TYPE: возвращает не только список файлов в текущей директории, но и из 417 # непосредственных подпапок данной папки 418 419 # treeType - type of the graph tree representation: show all tags with replies (pure representation); 420 # show only relevant tags; etc. 421 # return set of itemIDs 422 423 binTags = set(binTags) 424 425 if self.get_root_tag() not in binTags: 426 binTags.add(self.get_root_tag()) 427 428 tagHashSet = set() 429 for binTag in binTags: 430 tagHashSet.add(binTag.__hash__()) 431 432 # PLAIN_PSEUDO_TREE_TYPE 433 interceptionOfItemsWithTags = set() 434 if (treeType == PLAIN_PSEUDO_TREE_TYPE) or isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 435 binTagsQnt = len(tagHashSet) 436 commonTagGroupHashSet = set() 437 tagSet = set() 438 itemIDSet = set() 439 binTagHashTuple = tuple(self.sort_tag_hash_list_by_hash(tagHashSet)) 440 hashOfTheBinTagHashTuple = binTagHashTuple.__hash__() 441 if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets: 442 itemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple] 443 for commonTagQnt in self.setOfTagGroupQnt: 444 if commonTagQnt > binTagsQnt: 445 setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt] 446 commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes) 447 for commonTagGroupHash in commonTagGroupHashSet: 448 commonTagHashTuple = self.commonTagSets[commonTagGroupHash] 449 commonTagHashSet = set(commonTagHashTuple) 450 if tagHashSet.issubset(commonTagHashSet): 451 itemIDSet.update(self.itemsOnTheCommonTagSets[commonTagGroupHash]) 452 # # if len(tagHashSet & commonTagHashSet) == len(tagHashSet): 453 # res_set = tagHashSet.intersection(commonTagHashSet) 454 # if len(res_set) == binTagsQnt: 455 # itemIDSet = itemIDSet | self.itemsOnTheCommonTagSets[commonTagGroupHash] 456 interceptionOfItemsWithTags = itemIDSet 457 458 # isFirstHash = True 459 # for tag in binTags: 460 # tagHash = tag.__hash__() 461 # if tagHash in self.tagWithItems: 462 # if isFirstHash: 463 # interceptionOfItemsWithTags = self.tagWithItems[tagHash] 464 # isFirstHash = False 465 # else: 466 # itemsWithTag = self.tagWithItems[tagHash] 467 # interceptionOfItemsWithTags = interceptionOfItemsWithTags & itemsWithTag 468 # else: 469 # # TODO: произвести такую же провеку в get_items_from_tags() и build_smart_tree() 470 # if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 471 # result = (set(), set()) 472 # return result 473 # else: 474 # return set() 475 476 resultItemIDSet = interceptionOfItemsWithTags 477 setOfAllInternalItemIDsForThisSetOfTags = interceptionOfItemsWithTags 478 479 # SMART_TREE_TYPE or FULL_TREE_TYPE 480 if (treeType == SMART_TREE_TYPE) or (treeType == FULL_TREE_TYPE): 481 resultItemIDSet = set() 482 binTagHashTuple = self.sort_tag_hash_list_by_hash(tagHashSet) 483 binTagHashTuple = tuple(binTagHashTuple) 484 hashOfTheBinTagHashTuple = binTagHashTuple.__hash__() 485 if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets: 486 resultItemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple] 487 488 # filteredItemIDsSet = set() 489 # tagQnt = len(binTags) 490 # # for itemID in setOfAllInternalItemIDsForThisSetOfTags: 491 # # if len(self.itemWithTags[itemID]) == tagQnt: 492 # # # _TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] 493 # # # и вычитывать это из него 494 # # # и/или 495 # # # _TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashSet_1, hashOfTheTagHashSet_2, ... 496 # # # , hashOfTheTagHashSet_3}] и вычитывать это из него, а потом уже и из каждого tagHashSet 497 # # # где hashOfTheTagHashSet - это tagHashSet.__hash__() 498 # # filteredItemIDsSet.add(itemID) 499 # if tagQnt in self.tagsNumPerItemID: 500 # filteredItemIDsSet = setOfAllInternalItemIDsForThisSetOfTags & self.tagsNumPerItemID[tagQnt] 501 # 502 # resultItemIDSet = set() 503 # tagHashSet = set() 504 # for binTag in binTags: 505 # tagHashSet.add(binTag.__hash__()) 506 # for itemID in filteredItemIDsSet: 507 # commonTagTupleHash = self.itemWithTags[itemID] 508 # tagSet = set(self.commonTagSets[commonTagTupleHash]) 509 # if tagSet == tagHashSet: 510 # # _TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple] 511 # # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3} 512 # # _TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ... 513 # # , itemID_3}] 514 # resultItemIDSet.add(itemID) 515 elif treeType == PLAIN_PSEUDO_TREE_TYPE: 516 # already implemented (see bellow). Don't touch this code! 517 pass 518 else: 519 raise UnknownTreeTypeError() 520 521 if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 522 result = (set(resultItemIDSet), set(setOfAllInternalItemIDsForThisSetOfTags)) 523 return result 524 else: 525 return set(resultItemIDSet) 526 527 def get_items_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 528 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False): 529 # treeType - type of the graph tree representation: show all tags with replies (pure representation); 530 # show only relevant tags; etc. 531 # return set of itemIDs 532 binTags = set(binTags) 533 itemIDsSet = self.get_itemIDs_from_tags(binTags, treeType=treeType, 534 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags= 535 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags) 536 if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 537 itemSet = set() 538 for itemID in itemIDsSet[0]: 539 itemSet.add(self.itemsSet[itemID]) 540 result = (tuple(itemSet), tuple(itemIDsSet[1])) # result == (usual items set, additional set of all 541 # internal itemIDs) 542 return result 543 else: 544 itemSet = set() 545 for itemID in itemIDsSet: 546 itemSet.add(self.itemsSet[itemID]) 547 return tuple(itemSet) 548 549 def get_tagHashes_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 550 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None): 551 # where "itemIDsSet" is externally given "get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE)" 552 # so "itemIDsSet" is a set of the all items inside the "folder" binTags (including items from "subfolders") 553 # treeType - the same as in the "get_items_from_tags()" method 554 # return set of itemIDs 555 # prePreparedSetOfAllInternalItemIDsForThisSetOfTags can be generated by: 556 # a) get_itemIDs_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) 557 # a) get_itemIDs_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True) 558 # c) get_items_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True) 559 # d) get_items_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) || BUT: it'll return item set - not itemID 560 # set 561 562 binTags = set(binTags) 563 564 resultTagHashSet = set() 565 566 setOfAllInternalItemIDs = set() 567 if prePreparedSetOfAllInternalItemIDsForThisSetOfTags is None: 568 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 569 else: 570 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDsForThisSetOfTags 571 572 if (treeType == PLAIN_PSEUDO_TREE_TYPE) or (treeType == FULL_TREE_TYPE): 573 binTagHashes = set() 574 for tag in binTags: 575 binTagHashes.add(tag.__hash__()) 576 tagHashSet = set() 577 for itemID in setOfAllInternalItemIDs: 578 if itemID in self.itemWithTags: 579 commonTagTupleHash = self.itemWithTags[itemID] 580 tagSet = set(self.commonTagSets[commonTagTupleHash]) 581 tagHashSet.update(tagSet) 582 resultTagHashSet = tagHashSet - binTagHashes 583 elif treeType == SMART_TREE_TYPE: 584 # smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs) 585 smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs, 586 zeroSliceOnly=True) 587 if 0 in smartTree: 588 resultTagHashSet = smartTree[0] 589 # filteredItemIDsList = list() 590 # tagQnt = len(binTags) 591 # for itemID in listOfAllInternalItemIDs: 592 # if len(self.itemWithTags[itemID]) == (tagQnt + 1): 593 # filteredItemIDsList.append(itemID) 594 # 595 # tagHashSet = set() 596 # for itemID in filteredItemIDsList: 597 # tagHashSet.update(set(self.itemWithTags[itemID])) 598 # givenTagHashes = set() 599 # for tag in binTags: 600 # givenTagHashes.add(tag.__hash__()) 601 # tagHashSet.difference_update(givenTagHashes) 602 # ##resultTagHashList = list(tagHashSet) 603 # # если остановиться тут - то мы увидим не все папки: мы не увидим папки непосредственно в которых есть 604 # # только другие подпапки, но ни одного файла. 605 # # 606 # # значит далее мы должны исключить все файлы, которые имеют только что найденные теги, и начать строить 607 # # древо тегов для оставшихся 608 # # 609 # # а далее - повторить все это в цикле, увеличив при проверке кол-во тегов еще раз на единицу (и используя 610 # # уже оставшийся после отсеивания набор файлов). В итоге кол-во итераций зависит не от количества файлов, 611 # # а от максимальной фактически имеющейся вложенности файлов внутри тегов-каталогов 612 else: 613 raise UnknownTreeTypeError() 614 615 sortedTagHashList = self.sort_tag_hash_list_by_qnt(resultTagHashSet - {self.get_root_tag().__hash__()}) 616 return list(sortedTagHashList) 617 618 def get_tags_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 619 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None): 620 result = self.get_tagHashes_from_tags(binTags, treeType=treeType, 621 prePreparedSetOfAllInternalItemIDsForThisSetOfTags= 622 prePreparedSetOfAllInternalItemIDsForThisSetOfTags) 623 return tuple(self.tag_hash_list_2_tag_list(result)) 624 625 def build_smart_tree(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 626 startingBinTags = set(startingBinTags) 627 628 if self.get_root_tag() not in startingBinTags: 629 startingBinTags.add(self.get_root_tag()) 630 631 startingTagHashes = set() 632 for tag in startingBinTags: 633 startingTagHashes.add(tag.__hash__()) 634 635 if prePreparedSetOfAllInternalItemIDs is None: 636 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 637 else: 638 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 639 640 smartTree = {0: set()} 641 smartTree__filler = AddToCompoundDict__Set(smartTree) 642 local_tags_qnt = dict() 643 local_tags_qnt__filler = AddToCompoundDict( 644 local_tags_qnt, 645 lambda: 0, 646 lambda container, key, value: (True, container[key] + 1) 647 ) 648 for itemID in setOfAllInternalItemIDs: 649 commonTagTupleHash = self.itemWithTags[itemID] 650 tagSet = set(self.commonTagSets[commonTagTupleHash]) 651 setOfTags = tagSet 652 setOfTags = setOfTags - startingTagHashes 653 for tag_hash in setOfTags: 654 # if tag_hash not in local_tags_qnt: 655 # local_tags_qnt[tag_hash] = 0 656 # local_tags_qnt[tag_hash] += 1 657 local_tags_qnt__filler.add(tag_hash) 658 659 for itemID in setOfAllInternalItemIDs: 660 commonTagTupleHash = self.itemWithTags[itemID] 661 tagSet = set(self.commonTagSets[commonTagTupleHash]) 662 setOfTags = tagSet 663 setOfTags = setOfTags - startingTagHashes 664 listOfTagHashes = None 665 if zeroSliceOnly: 666 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 667 else: 668 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 669 670 lastTagHash = None 671 lastTagHashQnt = None 672 treeLevel = 0 673 for tagHash in listOfTagHashes: 674 # currentTagHashQnt = self.tagsQnt[tagHash] 675 currentTagHashQnt = local_tags_qnt[tagHash] 676 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 677 pass 678 else: 679 treeLevel += 1 680 lastTagHash = None 681 lastTagHashQnt = None 682 # if treeLevel not in smartTree: 683 # smartTree[treeLevel] = set() 684 # # tagsSetOnTheLevel = smartTree[treeLevel] 685 # # tagsSetOnTheLevel.add(tagHash) 686 # # smartTree[treeLevel] = tagsSetOnTheLevel 687 # smartTree[treeLevel].add(tagHash) 688 smartTree__filler.add(treeLevel, tagHash) 689 lastTagHash = tagHash 690 lastTagHashQnt = currentTagHashQnt 691 return dict(smartTree) 692 693 def build_smart_tree_2(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 694 startingBinTags = set(startingBinTags) 695 696 if self.get_root_tag() not in startingBinTags: 697 startingBinTags.add(self.get_root_tag()) 698 699 startingTagHashes = set() 700 for tag in startingBinTags: 701 startingTagHashes.add(tag.__hash__()) 702 703 if prePreparedSetOfAllInternalItemIDs is None: 704 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 705 else: 706 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 707 708 smartTree = {0: set()} 709 smartTree__filler = AddToCompoundDict__Set(smartTree) 710 local_tags_qnt = dict() 711 local_tags_qnt__filler = AddToCompoundDict( 712 local_tags_qnt, 713 lambda: 0, 714 lambda container, key, value: (True, container[key] + 1) 715 ) 716 for itemID in setOfAllInternalItemIDs: 717 commonTagTupleHash = self.itemWithTags[itemID] 718 tagSet = set(self.commonTagSets[commonTagTupleHash]) 719 setOfTags = tagSet 720 setOfTags = setOfTags - startingTagHashes 721 for tag_hash in setOfTags: 722 # if tag_hash not in local_tags_qnt: 723 # local_tags_qnt[tag_hash] = 0 724 # local_tags_qnt[tag_hash] += 1 725 local_tags_qnt__filler.add(tag_hash) 726 727 for itemID in setOfAllInternalItemIDs: 728 commonTagTupleHash = self.itemWithTags[itemID] 729 tagSet = set(self.commonTagSets[commonTagTupleHash]) 730 setOfTags = tagSet 731 setOfTags = setOfTags - startingTagHashes 732 listOfTagHashes = None 733 if zeroSliceOnly: 734 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 735 else: 736 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 737 738 lastTagHash = None 739 lastTagHashQnt = None 740 treeLevel = 0 741 for tagHash in listOfTagHashes: 742 # currentTagHashQnt = self.tagsQnt[tagHash] 743 currentTagHashQnt = local_tags_qnt[tagHash] 744 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 745 pass 746 else: 747 treeLevel += 1 748 lastTagHash = None 749 lastTagHashQnt = None 750 # if treeLevel not in smartTree: 751 # smartTree[treeLevel] = set() 752 # # tagsSetOnTheLevel = smartTree[treeLevel] 753 # # tagsSetOnTheLevel.add(tagHash) 754 # # smartTree[treeLevel] = tagsSetOnTheLevel 755 # smartTree[treeLevel].add(tagHash) 756 smartTree__filler.add(treeLevel, tagHash) 757 lastTagHash = tagHash 758 lastTagHashQnt = currentTagHashQnt 759 return dict(smartTree) 760 761 def get_all_from_tags(self, binTags, treeType=USUAL_TREE_TYPE): 762 binTags = set(binTags) 763 items = self.get_items_from_tags(binTags, treeType=treeType, 764 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=True) 765 if len(items[1]) > 0: 766 tags = self.get_tags_from_tags(binTags, treeType=treeType, 767 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=items[1]) 768 result = (tuple(tags), set(items[0])) 769 return result 770 else: 771 result = (tuple(), set()) 772 return result 773 774 def get_tagsHashes_from_single_item(self, itemID, isWithoutRootHash=True): 775 if itemID in self.itemWithTags: 776 commonTagTupleHash = self.itemWithTags[itemID] 777 tagSet = set(self.commonTagSets[commonTagTupleHash]) 778 if isWithoutRootHash: 779 return set(tagSet - {self.get_root_tag().__hash__()}) 780 else: 781 return set(tagSet) 782 else: 783 return set() 784 785 def get_potential_itemIDs_from_item(self, binItem): 786 itemHash = binItem.__hash__() 787 if itemHash in self.itemIDsForItem: 788 return set(self.itemIDsForItem[itemHash]) 789 else: 790 return set() 791 792 def is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags): 793 binTags = set(binTags) 794 795 if self.get_root_tag() not in binTags: 796 binTags.add(self.get_root_tag()) 797 798 tagHashSet = set() 799 for binTag in binTags: 800 tagHashSet.add(binTag.__hash__()) 801 802 binTagsQnt = len(tagHashSet) 803 commonTagGroupHashSet = set() 804 # setOfLenOfTheCommonTagHashSetForChecking = set() 805 setOfTheTagsIntersection = None 806 for commonTagQnt in self.setOfTagGroupQnt: 807 if commonTagQnt > binTagsQnt: 808 setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt] 809 commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes) 810 for commonTagGroupHash in commonTagGroupHashSet: 811 commonTagHashTuple = self.commonTagSets[commonTagGroupHash] 812 commonTagHashSet = set(commonTagHashTuple) 813 if len(tagHashSet & commonTagHashSet) == len(tagHashSet): 814 if tagHashSet != commonTagHashSet: 815 if setOfTheTagsIntersection is None: 816 setOfTheTagsIntersection = commonTagHashSet 817 else: 818 setOfTheTagsIntersection = setOfTheTagsIntersection & commonTagHashSet 819 # if tagHashSet != commonTagHashSet: 820 # setOfLenOfTheCommonTagHashSetForChecking.add(len(commonTagHashSet)) 821 # minimalTagPath = min(setOfLenOfTheCommonTagHashSetForChecking) 822 # pathDiff = minimalTagPath - len(tagHashSet) 823 # if pathDiff > 0: 824 if setOfTheTagsIntersection is None: 825 return list() 826 827 setOfTheTagsForAReduction = setOfTheTagsIntersection - tagHashSet 828 829 sortedTagHashList = self.sort_tag_hash_list_by_qnt(setOfTheTagsForAReduction - {self.get_root_tag().__hash__()}) 830 return list(sortedTagHashList) 831 832 def get_tags_for_a_smart_redirection(self, binTags): 833 result = self.is_smart_redirection_for_a_tag_path_reduction_needed(binTags) 834 return tuple(self.tag_hash_list_2_tag_list(result))
SMART_TREE_TYPE =
0
SMART_TREE_TYPE_WITH_INTERNAL_MENU =
1
FULL_TREE_TYPE =
2
PLAIN_PSEUDO_TREE_TYPE =
3
USUAL_TREE_TYPE =
3
class
ToManyIdenticalItemsOnTheGivenTagPathError(builtins.Exception):
Common base class for all non-exit exceptions.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
- args
class
UnknownTreeTypeError(builtins.Exception):
Common base class for all non-exit exceptions.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
- args
class
AddToCompoundDict__Set(cengal.data_containers.compound_dict_management.manager.versions.v_1.manager.AddToCompoundDict):
72class AddToCompoundDict__Set(AddToCompoundDict): 73 def __init__(self, original_dict): 74 super().__init__( 75 original_dict, 76 lambda: set(), 77 lambda original_dict, key, value: original_dict[key].add(value) 78 )
AddToCompoundDict__Set(original_dict)
73 def __init__(self, original_dict): 74 super().__init__( 75 original_dict, 76 lambda: set(), 77 lambda original_dict, key, value: original_dict[key].add(value) 78 )
:param original_dict: :param default_value: functor. list(); {1:set(), 2:[set(), set(), list()]}; etc. :param mediator: functor. original_dict[index].add(y), original_dict[index] += y, etc. Should return ValueExistence(True, ...) or None/nothing :return:
Inherited Members
- cengal.data_containers.compound_dict_management.manager.versions.v_1.manager.AddToCompoundDict
- original_dict
- add
class
TagDB:
81class TagDB: 82 83 def __init__(self): 84 super().__init__() 85 86 self.itemsID = IDGenerator() 87 88 self.itemIDsForItem = {} # key - item hash; data - set of itemIDs 89 90 self.itemsSet = {} # key - ItemID; data - binItem 91 # TODO: заменить список тегов на хеш единожды сохраненного списка тегов 92 self.itemWithTags = {} # key - ItemID; data - sorted common TagsTuple's hash 93 94 # TODO: убрать tagsNumPerItemID из кода. Заменить этот список itemID - на список hashOfTheTagHashTuple 95 self.tagsNumPerItemID = {} # key - number of tags in this ItemID group; data - set of itemIDs which are have 96 # needed number of tags 97 98 self.tagsSet = {} # key - tag hash; data - binTag 99 self.tagWithItems = {} # key - Tag hash; data - set of itemIDs 100 self.tagsQnt = {} # key - Tag hash; data - quantity of the items with this tag 101 102 103 self.commonTagSets = {} # key - sorted common TagsTuple's hash; data - sorted TagsTuple 104 self.itemsOnTheCommonTagSets = {} # key - sorted common TagsTuple's hash; data - set of itemIDs 105 self.tagsQntPerCommonTagSet = {} # key - number of tags; data - set of TagsTuple hashes 106 self.setOfTagGroupQnt = set() # {tagQntInGroup1, tagQntInGroup2, ..., tagQntInGroupN} where 107 # each Group is an key of the self.tagsQntPerCommonTagSet 108 109 # TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple] 110 # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3} 111 # TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ..., itemID_3}] 112 113 # TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] и вычитывать это из него 114 # и/или 115 # TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashTuple_1, hashOfTheTagHashTuple_2, ... 116 # , hashOfTheTagHashTuple_N}] и вычитывать это из него, а потом уже и из каждого tagHashSet 117 # где hashOfTheTagHashTuple - это tagHashTuple.__hash__() 118 119 def get_root_tag(self): 120 return str(_ROOT_TAG) 121 122 def add_tag(self, binTag): 123 tagHash = binTag.__hash__() 124 self.tagsSet[tagHash] = binTag 125 if tagHash not in self.tagsSet: 126 self.tagsQnt[tagHash] = 0 127 if tagHash not in self.tagWithItems: 128 self.tagWithItems[tagHash] = set() 129 130 def remove_tag(self, binTag): 131 # will try to delete given tag. If there is at least one item with this tag, than function will fail 132 # and will return False; otherwise it will delete given tag and will return True. 133 functionResult = False 134 tagHash = binTag.__hash__() 135 if tagHash in self.tagsSet: 136 if tagHash in self.tagsQnt: 137 if self.tagsQnt[tagHash] == 0: 138 del self.tagsSet[tagHash] 139 del self.tagsQnt[tagHash] 140 functionResult = True 141 else: 142 functionResult = False 143 else: 144 del self.tagsSet[tagHash] 145 functionResult = True 146 147 if functionResult: 148 if tagHash in self.tagWithItems: 149 del self.tagWithItems[tagHash] 150 151 return functionResult 152 153 def add_item(self, binItem, binTags): 154 # will add new item and return it's dynamic ID or None object If this Item already exist on the given tag path 155 # Or will raise an exception if we already have more than one binItem (another item that is identical to 156 # the given binItem) on this tag path 157 158 binTags = set(binTags) 159 160 if self.get_root_tag() not in binTags: 161 binTags.add(self.get_root_tag()) 162 163 # may raise an exception in this place. Nope - from now it will be not 164 if self.get_itemID_from_item_and_tags(binTags, binItem) is not None: 165 return None 166 167 itemID = self.itemsID() 168 self.itemsSet[itemID] = binItem 169 170 itemHash = binItem.__hash__() 171 if itemHash in self.itemIDsForItem: 172 IDsSet = self.itemIDsForItem[itemHash] 173 IDsSet.add(itemID) 174 # self.itemIDsForItem[itemHash] = IDsSet 175 else: 176 self.itemIDsForItem[itemHash] = {itemID} 177 178 tagQnt = len(binTags) 179 if tagQnt in self.tagsNumPerItemID: 180 itemIDsSet = self.tagsNumPerItemID[tagQnt] 181 itemIDsSet.add(itemID) 182 # self.tagsNumPerItemID[tagQnt] = itemIDsSet 183 else: 184 self.tagsNumPerItemID[tagQnt] = {itemID} 185 186 binTagHashes = set() 187 188 for tag in binTags: 189 self.add_tag(tag) 190 tagHash = tag.__hash__() 191 binTagHashes.add(tagHash) 192 setOfItems = self.tagWithItems[tagHash] 193 if itemID not in setOfItems: 194 setOfItems.add(itemID) 195 if tagHash in self.tagsQnt: 196 self.tagsQnt[tagHash] += 1 197 else: 198 self.tagsQnt[tagHash] = 1 199 self.tagWithItems[tagHash] = setOfItems 200 201 sortedTagTuple = tuple(self.sort_tag_hash_list_by_hash(binTagHashes)) 202 hashOfTheSortedTagTuple = sortedTagTuple.__hash__() 203 self.itemWithTags[itemID] = hashOfTheSortedTagTuple 204 205 self.commonTagSets[hashOfTheSortedTagTuple] = sortedTagTuple 206 207 if hashOfTheSortedTagTuple in self.itemsOnTheCommonTagSets: 208 itemIDsSet = self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] 209 itemIDsSet.add(itemID) 210 # self.itemsOnTheCommonTagSets[tagQnt] = itemIDsSet 211 else: 212 self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] = {itemID} 213 214 lenOfTheSortedTagTuple = len(sortedTagTuple) 215 if lenOfTheSortedTagTuple in self.tagsQntPerCommonTagSet: 216 itemIDsSet = self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] 217 itemIDsSet.add(hashOfTheSortedTagTuple) 218 # self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = itemIDsSet 219 else: 220 self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = {hashOfTheSortedTagTuple} 221 self.setOfTagGroupQnt.add(lenOfTheSortedTagTuple) 222 223 return itemID 224 225 def remove_item_by_itemID(self, itemID): 226 if itemID in self.itemsSet: 227 itemHash = self.itemsSet[itemID].__hash__() 228 del self.itemsSet[itemID] 229 230 if itemHash in self.itemIDsForItem: 231 IDsSet = self.itemIDsForItem[itemHash] 232 IDsSet.difference_update({itemID}) 233 # self.itemIDsForItem[itemHash] = IDsSet 234 if len(IDsSet) < 1: 235 del self.itemIDsForItem[itemHash] 236 237 if itemID in self.itemWithTags: 238 commonTagTupleHash = self.itemWithTags[itemID] 239 tagTuple = self.commonTagSets[commonTagTupleHash] 240 numberOfTags = len(tagTuple) 241 242 if commonTagTupleHash in self.itemsOnTheCommonTagSets: 243 IDsSet = self.itemsOnTheCommonTagSets[commonTagTupleHash] 244 IDsSet.difference_update({itemID}) 245 # self.itemsOnTheCommonTagSets[commonTagTupleHash] = IDsSet 246 if len(IDsSet) < 1: 247 del self.itemsOnTheCommonTagSets[commonTagTupleHash] 248 del self.commonTagSets[commonTagTupleHash] 249 if numberOfTags in self.tagsQntPerCommonTagSet: 250 setOfTagTuplesHashes = self.tagsQntPerCommonTagSet[numberOfTags] 251 setOfTagTuplesHashes.difference_update({numberOfTags}) 252 # self.tagsQntPerCommonTagSet[numberOfTags] = setOfTagTuplesHashes 253 if len(setOfTagTuplesHashes) < 1: 254 del self.tagsQntPerCommonTagSet[numberOfTags] 255 self.setOfTagGroupQnt.difference_update({numberOfTags}) 256 257 del self.itemWithTags[itemID] 258 259 setOfTagHashes = set(tagTuple) 260 261 tagsQnt = len(setOfTagHashes) 262 if tagsQnt in self.tagsNumPerItemID: 263 IDsSet = self.tagsNumPerItemID[tagsQnt] 264 IDsSet.difference_update({itemID}) 265 # self.tagsNumPerItemID[tagsQnt] = IDsSet 266 if len(IDsSet) < 1: 267 del self.tagsNumPerItemID[tagsQnt] 268 269 for tagHash in setOfTagHashes: 270 if tagHash in self.tagsQnt: 271 tagsQuantity = self.tagsQnt[tagHash] 272 tagsQuantity -= 1 273 if tagsQuantity < 1: 274 tagsQuantity = 0 275 self.tagsQnt[tagHash] = tagsQuantity 276 if tagsQuantity < 1: 277 del self.tagsQnt[tagHash] 278 del self.tagsSet[tagHash] 279 if tagHash in self.tagWithItems: 280 IDsSet = self.tagWithItems[tagHash] 281 IDsSet.difference_update({itemID}) 282 if len(IDsSet) < 1: 283 del self.tagWithItems[tagHash] 284 # self.tagWithItems[tagHash] = IDsSet 285 286 self.itemsID.remove_id(itemID) 287 288 def remove_item(self, binTags, binItem): 289 # will return ItemId for deleted item or None object if Item is not exist 290 # Or will raise an exception if we already have more than one binItem (another item that is identical to 291 # the given binItem) on this tag path 292 binTags = set(binTags) 293 if self.get_root_tag() not in binTags: 294 binTags.add(self.get_root_tag()) 295 itemID = self.get_itemID_from_item_and_tags(binTags, binItem) 296 if itemID is not None: 297 self.remove_item_by_itemID(itemID) 298 return itemID 299 300 def __OLD__get_itemID_from_item_and_tags(self, binTags, binItem): 301 if self.get_root_tag() not in binTags: 302 binTags.append(self.get_root_tag()) 303 potentialIDs = set(self.get_potential_itemIDs_from_item(binItem)) 304 itemIDsSet = set(self.get_itemIDs_from_tags(binTags, SMART_TREE_TYPE)) 305 resultItemIDsList = potentialIDs & itemIDsSet 306 if len(resultItemIDsList) == 0: 307 return None 308 elif len(resultItemIDsList) == 1: 309 resultItemID = resultItemIDsList.pop() # we have assume that we'll have only one item in intersection 310 # between potential IDs and Items that have (and have only) given tag list (without another tags in the 311 # path to this items). We need to check it in the adding new item to the given tag path. 312 return resultItemID 313 elif len(resultItemIDsList) > 1: 314 raise ToManyIdenticalItemsOnTheGivenTagPathError() 315 316 def get_itemID_from_item_and_tags(self, binTags, binItem): 317 binTags = set(binTags) 318 319 if self.get_root_tag() not in binTags: 320 binTags.add(self.get_root_tag()) 321 322 potentialIDs = self.get_potential_itemIDs_from_item(binItem) 323 setOfBinTagsHashes = set() 324 for tag in binTags: 325 setOfBinTagsHashes.add(tag.__hash__()) 326 for itemID in potentialIDs: 327 currentItemTagsSet = self.get_tagsHashes_from_single_item(itemID, isWithoutRootHash=False) 328 if setOfBinTagsHashes == currentItemTagsSet: 329 return itemID 330 return None 331 332 def tag_hash_list_2_tag_list(self, tagHashList): 333 tagList = list() 334 for tagHash in tagHashList: 335 tagList.append(self.tagsSet[tagHash]) 336 return tagList 337 338 def get_item_and_tags_from_itemID(self, itemID): 339 commonTagTupleHash = self.itemWithTags[itemID] 340 tagSet = set(self.commonTagSets[commonTagTupleHash]) - {self.get_root_tag().__hash__()} 341 sortedTagHashList = self.sort_tag_hash_list_by_qnt(tagSet) 342 result = (self.itemsSet[itemID], self.tag_hash_list_2_tag_list(sortedTagHashList)) 343 return result 344 345 # @profile 346 def get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None): 347 tagsQnt = self.tagsQnt 348 if local_tags_qnt is not None: 349 tagsQnt = local_tags_qnt 350 tag_hash_set = set(tagHashSet) 351 tag_by_qnt = dict() 352 tag_by_qnt__filler = AddToCompoundDict__Set(tag_by_qnt) 353 biggest_qnt = 0 354 for tag_hash in tag_hash_set: 355 qnt = tagsQnt[tag_hash] 356 if qnt > biggest_qnt: 357 biggest_qnt = qnt 358 # if qnt not in tag_by_qnt: 359 # tag_by_qnt[qnt] = set() 360 # tag_by_qnt[qnt].add(tag_hash) 361 tag_by_qnt__filler.add(qnt, tag_hash) 362 363 result = None 364 if len(tag_by_qnt) > 0: 365 # biggest_qnt = max(tag_by_qnt) 366 result = tuple(tag_by_qnt[biggest_qnt]) 367 else: 368 result = tuple() 369 return result 370 371 def sort_tag_hash_list_by_qnt(self, tagHashSet): 372 # will return sorted tag list - not sorted tag hash list 373 tagHashSet = set(tagHashSet) 374 rawTagList = list() 375 for tagHash in tagHashSet: 376 tagWithWeight = (tagHash, self.tagsQnt[tagHash]) 377 rawTagList.append(tagWithWeight) 378 return self.sort_raw_tag_list(rawTagList) 379 380 def sort_tag_list_by_qnt(self, binTags): 381 binTags = set(binTags) 382 rawTagList = list() 383 for tag in binTags: 384 tagHash = tag.__hash__() 385 tagWithWeight = (tag, self.tagsQnt[tagHash]) 386 rawTagList.append(tagWithWeight) 387 return self.sort_raw_tag_list(rawTagList) 388 389 def sort_tag_hash_list_by_hash(self, tagHashSet): 390 # will return sorted tag list - not sorted tag hash list 391 tagHashSet = set(tagHashSet) 392 rawTagList = list() 393 for tagHash in tagHashSet: 394 tagWithWeight = (tagHash, tagHash) 395 rawTagList.append(tagWithWeight) 396 return self.sort_raw_tag_list(rawTagList) 397 398 def sort_tag_list_by_hash(self, binTags): 399 binTags = set(binTags) 400 rawTagList = list() 401 for tag in binTags: 402 tagHash = tag.__hash__() 403 tagWithWeight = (tag, tagHash) 404 rawTagList.append(tagWithWeight) 405 return self.sort_raw_tag_list(rawTagList) 406 407 def sort_raw_tag_list(self, rawTagList): 408 # will return sorted tag list 409 rawTagList = sorted(rawTagList, key=lambda tagAndWeight: tagAndWeight[1], reverse=True) 410 tagList = list() 411 for rawTag in rawTagList: 412 tagList.append(rawTag[0]) 413 return tagList 414 415 def get_itemIDs_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 416 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False): 417 # TODO: исправить ошибку: SMART_TREE_TYPE: возвращает не только список файлов в текущей директории, но и из 418 # непосредственных подпапок данной папки 419 420 # treeType - type of the graph tree representation: show all tags with replies (pure representation); 421 # show only relevant tags; etc. 422 # return set of itemIDs 423 424 binTags = set(binTags) 425 426 if self.get_root_tag() not in binTags: 427 binTags.add(self.get_root_tag()) 428 429 tagHashSet = set() 430 for binTag in binTags: 431 tagHashSet.add(binTag.__hash__()) 432 433 # PLAIN_PSEUDO_TREE_TYPE 434 interceptionOfItemsWithTags = set() 435 if (treeType == PLAIN_PSEUDO_TREE_TYPE) or isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 436 binTagsQnt = len(tagHashSet) 437 commonTagGroupHashSet = set() 438 tagSet = set() 439 itemIDSet = set() 440 binTagHashTuple = tuple(self.sort_tag_hash_list_by_hash(tagHashSet)) 441 hashOfTheBinTagHashTuple = binTagHashTuple.__hash__() 442 if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets: 443 itemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple] 444 for commonTagQnt in self.setOfTagGroupQnt: 445 if commonTagQnt > binTagsQnt: 446 setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt] 447 commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes) 448 for commonTagGroupHash in commonTagGroupHashSet: 449 commonTagHashTuple = self.commonTagSets[commonTagGroupHash] 450 commonTagHashSet = set(commonTagHashTuple) 451 if tagHashSet.issubset(commonTagHashSet): 452 itemIDSet.update(self.itemsOnTheCommonTagSets[commonTagGroupHash]) 453 # # if len(tagHashSet & commonTagHashSet) == len(tagHashSet): 454 # res_set = tagHashSet.intersection(commonTagHashSet) 455 # if len(res_set) == binTagsQnt: 456 # itemIDSet = itemIDSet | self.itemsOnTheCommonTagSets[commonTagGroupHash] 457 interceptionOfItemsWithTags = itemIDSet 458 459 # isFirstHash = True 460 # for tag in binTags: 461 # tagHash = tag.__hash__() 462 # if tagHash in self.tagWithItems: 463 # if isFirstHash: 464 # interceptionOfItemsWithTags = self.tagWithItems[tagHash] 465 # isFirstHash = False 466 # else: 467 # itemsWithTag = self.tagWithItems[tagHash] 468 # interceptionOfItemsWithTags = interceptionOfItemsWithTags & itemsWithTag 469 # else: 470 # # TODO: произвести такую же провеку в get_items_from_tags() и build_smart_tree() 471 # if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 472 # result = (set(), set()) 473 # return result 474 # else: 475 # return set() 476 477 resultItemIDSet = interceptionOfItemsWithTags 478 setOfAllInternalItemIDsForThisSetOfTags = interceptionOfItemsWithTags 479 480 # SMART_TREE_TYPE or FULL_TREE_TYPE 481 if (treeType == SMART_TREE_TYPE) or (treeType == FULL_TREE_TYPE): 482 resultItemIDSet = set() 483 binTagHashTuple = self.sort_tag_hash_list_by_hash(tagHashSet) 484 binTagHashTuple = tuple(binTagHashTuple) 485 hashOfTheBinTagHashTuple = binTagHashTuple.__hash__() 486 if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets: 487 resultItemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple] 488 489 # filteredItemIDsSet = set() 490 # tagQnt = len(binTags) 491 # # for itemID in setOfAllInternalItemIDsForThisSetOfTags: 492 # # if len(self.itemWithTags[itemID]) == tagQnt: 493 # # # _TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] 494 # # # и вычитывать это из него 495 # # # и/или 496 # # # _TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashSet_1, hashOfTheTagHashSet_2, ... 497 # # # , hashOfTheTagHashSet_3}] и вычитывать это из него, а потом уже и из каждого tagHashSet 498 # # # где hashOfTheTagHashSet - это tagHashSet.__hash__() 499 # # filteredItemIDsSet.add(itemID) 500 # if tagQnt in self.tagsNumPerItemID: 501 # filteredItemIDsSet = setOfAllInternalItemIDsForThisSetOfTags & self.tagsNumPerItemID[tagQnt] 502 # 503 # resultItemIDSet = set() 504 # tagHashSet = set() 505 # for binTag in binTags: 506 # tagHashSet.add(binTag.__hash__()) 507 # for itemID in filteredItemIDsSet: 508 # commonTagTupleHash = self.itemWithTags[itemID] 509 # tagSet = set(self.commonTagSets[commonTagTupleHash]) 510 # if tagSet == tagHashSet: 511 # # _TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple] 512 # # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3} 513 # # _TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ... 514 # # , itemID_3}] 515 # resultItemIDSet.add(itemID) 516 elif treeType == PLAIN_PSEUDO_TREE_TYPE: 517 # already implemented (see bellow). Don't touch this code! 518 pass 519 else: 520 raise UnknownTreeTypeError() 521 522 if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 523 result = (set(resultItemIDSet), set(setOfAllInternalItemIDsForThisSetOfTags)) 524 return result 525 else: 526 return set(resultItemIDSet) 527 528 def get_items_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 529 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False): 530 # treeType - type of the graph tree representation: show all tags with replies (pure representation); 531 # show only relevant tags; etc. 532 # return set of itemIDs 533 binTags = set(binTags) 534 itemIDsSet = self.get_itemIDs_from_tags(binTags, treeType=treeType, 535 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags= 536 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags) 537 if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags: 538 itemSet = set() 539 for itemID in itemIDsSet[0]: 540 itemSet.add(self.itemsSet[itemID]) 541 result = (tuple(itemSet), tuple(itemIDsSet[1])) # result == (usual items set, additional set of all 542 # internal itemIDs) 543 return result 544 else: 545 itemSet = set() 546 for itemID in itemIDsSet: 547 itemSet.add(self.itemsSet[itemID]) 548 return tuple(itemSet) 549 550 def get_tagHashes_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 551 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None): 552 # where "itemIDsSet" is externally given "get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE)" 553 # so "itemIDsSet" is a set of the all items inside the "folder" binTags (including items from "subfolders") 554 # treeType - the same as in the "get_items_from_tags()" method 555 # return set of itemIDs 556 # prePreparedSetOfAllInternalItemIDsForThisSetOfTags can be generated by: 557 # a) get_itemIDs_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) 558 # a) get_itemIDs_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True) 559 # c) get_items_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True) 560 # d) get_items_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) || BUT: it'll return item set - not itemID 561 # set 562 563 binTags = set(binTags) 564 565 resultTagHashSet = set() 566 567 setOfAllInternalItemIDs = set() 568 if prePreparedSetOfAllInternalItemIDsForThisSetOfTags is None: 569 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 570 else: 571 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDsForThisSetOfTags 572 573 if (treeType == PLAIN_PSEUDO_TREE_TYPE) or (treeType == FULL_TREE_TYPE): 574 binTagHashes = set() 575 for tag in binTags: 576 binTagHashes.add(tag.__hash__()) 577 tagHashSet = set() 578 for itemID in setOfAllInternalItemIDs: 579 if itemID in self.itemWithTags: 580 commonTagTupleHash = self.itemWithTags[itemID] 581 tagSet = set(self.commonTagSets[commonTagTupleHash]) 582 tagHashSet.update(tagSet) 583 resultTagHashSet = tagHashSet - binTagHashes 584 elif treeType == SMART_TREE_TYPE: 585 # smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs) 586 smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs, 587 zeroSliceOnly=True) 588 if 0 in smartTree: 589 resultTagHashSet = smartTree[0] 590 # filteredItemIDsList = list() 591 # tagQnt = len(binTags) 592 # for itemID in listOfAllInternalItemIDs: 593 # if len(self.itemWithTags[itemID]) == (tagQnt + 1): 594 # filteredItemIDsList.append(itemID) 595 # 596 # tagHashSet = set() 597 # for itemID in filteredItemIDsList: 598 # tagHashSet.update(set(self.itemWithTags[itemID])) 599 # givenTagHashes = set() 600 # for tag in binTags: 601 # givenTagHashes.add(tag.__hash__()) 602 # tagHashSet.difference_update(givenTagHashes) 603 # ##resultTagHashList = list(tagHashSet) 604 # # если остановиться тут - то мы увидим не все папки: мы не увидим папки непосредственно в которых есть 605 # # только другие подпапки, но ни одного файла. 606 # # 607 # # значит далее мы должны исключить все файлы, которые имеют только что найденные теги, и начать строить 608 # # древо тегов для оставшихся 609 # # 610 # # а далее - повторить все это в цикле, увеличив при проверке кол-во тегов еще раз на единицу (и используя 611 # # уже оставшийся после отсеивания набор файлов). В итоге кол-во итераций зависит не от количества файлов, 612 # # а от максимальной фактически имеющейся вложенности файлов внутри тегов-каталогов 613 else: 614 raise UnknownTreeTypeError() 615 616 sortedTagHashList = self.sort_tag_hash_list_by_qnt(resultTagHashSet - {self.get_root_tag().__hash__()}) 617 return list(sortedTagHashList) 618 619 def get_tags_from_tags(self, binTags, treeType=USUAL_TREE_TYPE, 620 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None): 621 result = self.get_tagHashes_from_tags(binTags, treeType=treeType, 622 prePreparedSetOfAllInternalItemIDsForThisSetOfTags= 623 prePreparedSetOfAllInternalItemIDsForThisSetOfTags) 624 return tuple(self.tag_hash_list_2_tag_list(result)) 625 626 def build_smart_tree(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 627 startingBinTags = set(startingBinTags) 628 629 if self.get_root_tag() not in startingBinTags: 630 startingBinTags.add(self.get_root_tag()) 631 632 startingTagHashes = set() 633 for tag in startingBinTags: 634 startingTagHashes.add(tag.__hash__()) 635 636 if prePreparedSetOfAllInternalItemIDs is None: 637 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 638 else: 639 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 640 641 smartTree = {0: set()} 642 smartTree__filler = AddToCompoundDict__Set(smartTree) 643 local_tags_qnt = dict() 644 local_tags_qnt__filler = AddToCompoundDict( 645 local_tags_qnt, 646 lambda: 0, 647 lambda container, key, value: (True, container[key] + 1) 648 ) 649 for itemID in setOfAllInternalItemIDs: 650 commonTagTupleHash = self.itemWithTags[itemID] 651 tagSet = set(self.commonTagSets[commonTagTupleHash]) 652 setOfTags = tagSet 653 setOfTags = setOfTags - startingTagHashes 654 for tag_hash in setOfTags: 655 # if tag_hash not in local_tags_qnt: 656 # local_tags_qnt[tag_hash] = 0 657 # local_tags_qnt[tag_hash] += 1 658 local_tags_qnt__filler.add(tag_hash) 659 660 for itemID in setOfAllInternalItemIDs: 661 commonTagTupleHash = self.itemWithTags[itemID] 662 tagSet = set(self.commonTagSets[commonTagTupleHash]) 663 setOfTags = tagSet 664 setOfTags = setOfTags - startingTagHashes 665 listOfTagHashes = None 666 if zeroSliceOnly: 667 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 668 else: 669 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 670 671 lastTagHash = None 672 lastTagHashQnt = None 673 treeLevel = 0 674 for tagHash in listOfTagHashes: 675 # currentTagHashQnt = self.tagsQnt[tagHash] 676 currentTagHashQnt = local_tags_qnt[tagHash] 677 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 678 pass 679 else: 680 treeLevel += 1 681 lastTagHash = None 682 lastTagHashQnt = None 683 # if treeLevel not in smartTree: 684 # smartTree[treeLevel] = set() 685 # # tagsSetOnTheLevel = smartTree[treeLevel] 686 # # tagsSetOnTheLevel.add(tagHash) 687 # # smartTree[treeLevel] = tagsSetOnTheLevel 688 # smartTree[treeLevel].add(tagHash) 689 smartTree__filler.add(treeLevel, tagHash) 690 lastTagHash = tagHash 691 lastTagHashQnt = currentTagHashQnt 692 return dict(smartTree) 693 694 def build_smart_tree_2(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 695 startingBinTags = set(startingBinTags) 696 697 if self.get_root_tag() not in startingBinTags: 698 startingBinTags.add(self.get_root_tag()) 699 700 startingTagHashes = set() 701 for tag in startingBinTags: 702 startingTagHashes.add(tag.__hash__()) 703 704 if prePreparedSetOfAllInternalItemIDs is None: 705 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 706 else: 707 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 708 709 smartTree = {0: set()} 710 smartTree__filler = AddToCompoundDict__Set(smartTree) 711 local_tags_qnt = dict() 712 local_tags_qnt__filler = AddToCompoundDict( 713 local_tags_qnt, 714 lambda: 0, 715 lambda container, key, value: (True, container[key] + 1) 716 ) 717 for itemID in setOfAllInternalItemIDs: 718 commonTagTupleHash = self.itemWithTags[itemID] 719 tagSet = set(self.commonTagSets[commonTagTupleHash]) 720 setOfTags = tagSet 721 setOfTags = setOfTags - startingTagHashes 722 for tag_hash in setOfTags: 723 # if tag_hash not in local_tags_qnt: 724 # local_tags_qnt[tag_hash] = 0 725 # local_tags_qnt[tag_hash] += 1 726 local_tags_qnt__filler.add(tag_hash) 727 728 for itemID in setOfAllInternalItemIDs: 729 commonTagTupleHash = self.itemWithTags[itemID] 730 tagSet = set(self.commonTagSets[commonTagTupleHash]) 731 setOfTags = tagSet 732 setOfTags = setOfTags - startingTagHashes 733 listOfTagHashes = None 734 if zeroSliceOnly: 735 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 736 else: 737 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 738 739 lastTagHash = None 740 lastTagHashQnt = None 741 treeLevel = 0 742 for tagHash in listOfTagHashes: 743 # currentTagHashQnt = self.tagsQnt[tagHash] 744 currentTagHashQnt = local_tags_qnt[tagHash] 745 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 746 pass 747 else: 748 treeLevel += 1 749 lastTagHash = None 750 lastTagHashQnt = None 751 # if treeLevel not in smartTree: 752 # smartTree[treeLevel] = set() 753 # # tagsSetOnTheLevel = smartTree[treeLevel] 754 # # tagsSetOnTheLevel.add(tagHash) 755 # # smartTree[treeLevel] = tagsSetOnTheLevel 756 # smartTree[treeLevel].add(tagHash) 757 smartTree__filler.add(treeLevel, tagHash) 758 lastTagHash = tagHash 759 lastTagHashQnt = currentTagHashQnt 760 return dict(smartTree) 761 762 def get_all_from_tags(self, binTags, treeType=USUAL_TREE_TYPE): 763 binTags = set(binTags) 764 items = self.get_items_from_tags(binTags, treeType=treeType, 765 isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=True) 766 if len(items[1]) > 0: 767 tags = self.get_tags_from_tags(binTags, treeType=treeType, 768 prePreparedSetOfAllInternalItemIDsForThisSetOfTags=items[1]) 769 result = (tuple(tags), set(items[0])) 770 return result 771 else: 772 result = (tuple(), set()) 773 return result 774 775 def get_tagsHashes_from_single_item(self, itemID, isWithoutRootHash=True): 776 if itemID in self.itemWithTags: 777 commonTagTupleHash = self.itemWithTags[itemID] 778 tagSet = set(self.commonTagSets[commonTagTupleHash]) 779 if isWithoutRootHash: 780 return set(tagSet - {self.get_root_tag().__hash__()}) 781 else: 782 return set(tagSet) 783 else: 784 return set() 785 786 def get_potential_itemIDs_from_item(self, binItem): 787 itemHash = binItem.__hash__() 788 if itemHash in self.itemIDsForItem: 789 return set(self.itemIDsForItem[itemHash]) 790 else: 791 return set() 792 793 def is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags): 794 binTags = set(binTags) 795 796 if self.get_root_tag() not in binTags: 797 binTags.add(self.get_root_tag()) 798 799 tagHashSet = set() 800 for binTag in binTags: 801 tagHashSet.add(binTag.__hash__()) 802 803 binTagsQnt = len(tagHashSet) 804 commonTagGroupHashSet = set() 805 # setOfLenOfTheCommonTagHashSetForChecking = set() 806 setOfTheTagsIntersection = None 807 for commonTagQnt in self.setOfTagGroupQnt: 808 if commonTagQnt > binTagsQnt: 809 setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt] 810 commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes) 811 for commonTagGroupHash in commonTagGroupHashSet: 812 commonTagHashTuple = self.commonTagSets[commonTagGroupHash] 813 commonTagHashSet = set(commonTagHashTuple) 814 if len(tagHashSet & commonTagHashSet) == len(tagHashSet): 815 if tagHashSet != commonTagHashSet: 816 if setOfTheTagsIntersection is None: 817 setOfTheTagsIntersection = commonTagHashSet 818 else: 819 setOfTheTagsIntersection = setOfTheTagsIntersection & commonTagHashSet 820 # if tagHashSet != commonTagHashSet: 821 # setOfLenOfTheCommonTagHashSetForChecking.add(len(commonTagHashSet)) 822 # minimalTagPath = min(setOfLenOfTheCommonTagHashSetForChecking) 823 # pathDiff = minimalTagPath - len(tagHashSet) 824 # if pathDiff > 0: 825 if setOfTheTagsIntersection is None: 826 return list() 827 828 setOfTheTagsForAReduction = setOfTheTagsIntersection - tagHashSet 829 830 sortedTagHashList = self.sort_tag_hash_list_by_qnt(setOfTheTagsForAReduction - {self.get_root_tag().__hash__()}) 831 return list(sortedTagHashList) 832 833 def get_tags_for_a_smart_redirection(self, binTags): 834 result = self.is_smart_redirection_for_a_tag_path_reduction_needed(binTags) 835 return tuple(self.tag_hash_list_2_tag_list(result))
def
remove_tag(self, binTag):
130 def remove_tag(self, binTag): 131 # will try to delete given tag. If there is at least one item with this tag, than function will fail 132 # and will return False; otherwise it will delete given tag and will return True. 133 functionResult = False 134 tagHash = binTag.__hash__() 135 if tagHash in self.tagsSet: 136 if tagHash in self.tagsQnt: 137 if self.tagsQnt[tagHash] == 0: 138 del self.tagsSet[tagHash] 139 del self.tagsQnt[tagHash] 140 functionResult = True 141 else: 142 functionResult = False 143 else: 144 del self.tagsSet[tagHash] 145 functionResult = True 146 147 if functionResult: 148 if tagHash in self.tagWithItems: 149 del self.tagWithItems[tagHash] 150 151 return functionResult
def
add_item(self, binItem, binTags):
153 def add_item(self, binItem, binTags): 154 # will add new item and return it's dynamic ID or None object If this Item already exist on the given tag path 155 # Or will raise an exception if we already have more than one binItem (another item that is identical to 156 # the given binItem) on this tag path 157 158 binTags = set(binTags) 159 160 if self.get_root_tag() not in binTags: 161 binTags.add(self.get_root_tag()) 162 163 # may raise an exception in this place. Nope - from now it will be not 164 if self.get_itemID_from_item_and_tags(binTags, binItem) is not None: 165 return None 166 167 itemID = self.itemsID() 168 self.itemsSet[itemID] = binItem 169 170 itemHash = binItem.__hash__() 171 if itemHash in self.itemIDsForItem: 172 IDsSet = self.itemIDsForItem[itemHash] 173 IDsSet.add(itemID) 174 # self.itemIDsForItem[itemHash] = IDsSet 175 else: 176 self.itemIDsForItem[itemHash] = {itemID} 177 178 tagQnt = len(binTags) 179 if tagQnt in self.tagsNumPerItemID: 180 itemIDsSet = self.tagsNumPerItemID[tagQnt] 181 itemIDsSet.add(itemID) 182 # self.tagsNumPerItemID[tagQnt] = itemIDsSet 183 else: 184 self.tagsNumPerItemID[tagQnt] = {itemID} 185 186 binTagHashes = set() 187 188 for tag in binTags: 189 self.add_tag(tag) 190 tagHash = tag.__hash__() 191 binTagHashes.add(tagHash) 192 setOfItems = self.tagWithItems[tagHash] 193 if itemID not in setOfItems: 194 setOfItems.add(itemID) 195 if tagHash in self.tagsQnt: 196 self.tagsQnt[tagHash] += 1 197 else: 198 self.tagsQnt[tagHash] = 1 199 self.tagWithItems[tagHash] = setOfItems 200 201 sortedTagTuple = tuple(self.sort_tag_hash_list_by_hash(binTagHashes)) 202 hashOfTheSortedTagTuple = sortedTagTuple.__hash__() 203 self.itemWithTags[itemID] = hashOfTheSortedTagTuple 204 205 self.commonTagSets[hashOfTheSortedTagTuple] = sortedTagTuple 206 207 if hashOfTheSortedTagTuple in self.itemsOnTheCommonTagSets: 208 itemIDsSet = self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] 209 itemIDsSet.add(itemID) 210 # self.itemsOnTheCommonTagSets[tagQnt] = itemIDsSet 211 else: 212 self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] = {itemID} 213 214 lenOfTheSortedTagTuple = len(sortedTagTuple) 215 if lenOfTheSortedTagTuple in self.tagsQntPerCommonTagSet: 216 itemIDsSet = self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] 217 itemIDsSet.add(hashOfTheSortedTagTuple) 218 # self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = itemIDsSet 219 else: 220 self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = {hashOfTheSortedTagTuple} 221 self.setOfTagGroupQnt.add(lenOfTheSortedTagTuple) 222 223 return itemID
def
remove_item_by_itemID(self, itemID):
225 def remove_item_by_itemID(self, itemID): 226 if itemID in self.itemsSet: 227 itemHash = self.itemsSet[itemID].__hash__() 228 del self.itemsSet[itemID] 229 230 if itemHash in self.itemIDsForItem: 231 IDsSet = self.itemIDsForItem[itemHash] 232 IDsSet.difference_update({itemID}) 233 # self.itemIDsForItem[itemHash] = IDsSet 234 if len(IDsSet) < 1: 235 del self.itemIDsForItem[itemHash] 236 237 if itemID in self.itemWithTags: 238 commonTagTupleHash = self.itemWithTags[itemID] 239 tagTuple = self.commonTagSets[commonTagTupleHash] 240 numberOfTags = len(tagTuple) 241 242 if commonTagTupleHash in self.itemsOnTheCommonTagSets: 243 IDsSet = self.itemsOnTheCommonTagSets[commonTagTupleHash] 244 IDsSet.difference_update({itemID}) 245 # self.itemsOnTheCommonTagSets[commonTagTupleHash] = IDsSet 246 if len(IDsSet) < 1: 247 del self.itemsOnTheCommonTagSets[commonTagTupleHash] 248 del self.commonTagSets[commonTagTupleHash] 249 if numberOfTags in self.tagsQntPerCommonTagSet: 250 setOfTagTuplesHashes = self.tagsQntPerCommonTagSet[numberOfTags] 251 setOfTagTuplesHashes.difference_update({numberOfTags}) 252 # self.tagsQntPerCommonTagSet[numberOfTags] = setOfTagTuplesHashes 253 if len(setOfTagTuplesHashes) < 1: 254 del self.tagsQntPerCommonTagSet[numberOfTags] 255 self.setOfTagGroupQnt.difference_update({numberOfTags}) 256 257 del self.itemWithTags[itemID] 258 259 setOfTagHashes = set(tagTuple) 260 261 tagsQnt = len(setOfTagHashes) 262 if tagsQnt in self.tagsNumPerItemID: 263 IDsSet = self.tagsNumPerItemID[tagsQnt] 264 IDsSet.difference_update({itemID}) 265 # self.tagsNumPerItemID[tagsQnt] = IDsSet 266 if len(IDsSet) < 1: 267 del self.tagsNumPerItemID[tagsQnt] 268 269 for tagHash in setOfTagHashes: 270 if tagHash in self.tagsQnt: 271 tagsQuantity = self.tagsQnt[tagHash] 272 tagsQuantity -= 1 273 if tagsQuantity < 1: 274 tagsQuantity = 0 275 self.tagsQnt[tagHash] = tagsQuantity 276 if tagsQuantity < 1: 277 del self.tagsQnt[tagHash] 278 del self.tagsSet[tagHash] 279 if tagHash in self.tagWithItems: 280 IDsSet = self.tagWithItems[tagHash] 281 IDsSet.difference_update({itemID}) 282 if len(IDsSet) < 1: 283 del self.tagWithItems[tagHash] 284 # self.tagWithItems[tagHash] = IDsSet 285 286 self.itemsID.remove_id(itemID)
def
remove_item(self, binTags, binItem):
288 def remove_item(self, binTags, binItem): 289 # will return ItemId for deleted item or None object if Item is not exist 290 # Or will raise an exception if we already have more than one binItem (another item that is identical to 291 # the given binItem) on this tag path 292 binTags = set(binTags) 293 if self.get_root_tag() not in binTags: 294 binTags.add(self.get_root_tag()) 295 itemID = self.get_itemID_from_item_and_tags(binTags, binItem) 296 if itemID is not None: 297 self.remove_item_by_itemID(itemID) 298 return itemID
def
get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None):
346 def get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None): 347 tagsQnt = self.tagsQnt 348 if local_tags_qnt is not None: 349 tagsQnt = local_tags_qnt 350 tag_hash_set = set(tagHashSet) 351 tag_by_qnt = dict() 352 tag_by_qnt__filler = AddToCompoundDict__Set(tag_by_qnt) 353 biggest_qnt = 0 354 for tag_hash in tag_hash_set: 355 qnt = tagsQnt[tag_hash] 356 if qnt > biggest_qnt: 357 biggest_qnt = qnt 358 # if qnt not in tag_by_qnt: 359 # tag_by_qnt[qnt] = set() 360 # tag_by_qnt[qnt].add(tag_hash) 361 tag_by_qnt__filler.add(qnt, tag_hash) 362 363 result = None 364 if len(tag_by_qnt) > 0: 365 # biggest_qnt = max(tag_by_qnt) 366 result = tuple(tag_by_qnt[biggest_qnt]) 367 else: 368 result = tuple() 369 return result
def
sort_tag_hash_list_by_qnt(self, tagHashSet):
371 def sort_tag_hash_list_by_qnt(self, tagHashSet): 372 # will return sorted tag list - not sorted tag hash list 373 tagHashSet = set(tagHashSet) 374 rawTagList = list() 375 for tagHash in tagHashSet: 376 tagWithWeight = (tagHash, self.tagsQnt[tagHash]) 377 rawTagList.append(tagWithWeight) 378 return self.sort_raw_tag_list(rawTagList)
def
sort_tag_hash_list_by_hash(self, tagHashSet):
389 def sort_tag_hash_list_by_hash(self, tagHashSet): 390 # will return sorted tag list - not sorted tag hash list 391 tagHashSet = set(tagHashSet) 392 rawTagList = list() 393 for tagHash in tagHashSet: 394 tagWithWeight = (tagHash, tagHash) 395 rawTagList.append(tagWithWeight) 396 return self.sort_raw_tag_list(rawTagList)
def
build_smart_tree( self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
626 def build_smart_tree(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 627 startingBinTags = set(startingBinTags) 628 629 if self.get_root_tag() not in startingBinTags: 630 startingBinTags.add(self.get_root_tag()) 631 632 startingTagHashes = set() 633 for tag in startingBinTags: 634 startingTagHashes.add(tag.__hash__()) 635 636 if prePreparedSetOfAllInternalItemIDs is None: 637 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 638 else: 639 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 640 641 smartTree = {0: set()} 642 smartTree__filler = AddToCompoundDict__Set(smartTree) 643 local_tags_qnt = dict() 644 local_tags_qnt__filler = AddToCompoundDict( 645 local_tags_qnt, 646 lambda: 0, 647 lambda container, key, value: (True, container[key] + 1) 648 ) 649 for itemID in setOfAllInternalItemIDs: 650 commonTagTupleHash = self.itemWithTags[itemID] 651 tagSet = set(self.commonTagSets[commonTagTupleHash]) 652 setOfTags = tagSet 653 setOfTags = setOfTags - startingTagHashes 654 for tag_hash in setOfTags: 655 # if tag_hash not in local_tags_qnt: 656 # local_tags_qnt[tag_hash] = 0 657 # local_tags_qnt[tag_hash] += 1 658 local_tags_qnt__filler.add(tag_hash) 659 660 for itemID in setOfAllInternalItemIDs: 661 commonTagTupleHash = self.itemWithTags[itemID] 662 tagSet = set(self.commonTagSets[commonTagTupleHash]) 663 setOfTags = tagSet 664 setOfTags = setOfTags - startingTagHashes 665 listOfTagHashes = None 666 if zeroSliceOnly: 667 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 668 else: 669 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 670 671 lastTagHash = None 672 lastTagHashQnt = None 673 treeLevel = 0 674 for tagHash in listOfTagHashes: 675 # currentTagHashQnt = self.tagsQnt[tagHash] 676 currentTagHashQnt = local_tags_qnt[tagHash] 677 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 678 pass 679 else: 680 treeLevel += 1 681 lastTagHash = None 682 lastTagHashQnt = None 683 # if treeLevel not in smartTree: 684 # smartTree[treeLevel] = set() 685 # # tagsSetOnTheLevel = smartTree[treeLevel] 686 # # tagsSetOnTheLevel.add(tagHash) 687 # # smartTree[treeLevel] = tagsSetOnTheLevel 688 # smartTree[treeLevel].add(tagHash) 689 smartTree__filler.add(treeLevel, tagHash) 690 lastTagHash = tagHash 691 lastTagHashQnt = currentTagHashQnt 692 return dict(smartTree)
def
build_smart_tree_2( self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
694 def build_smart_tree_2(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False): 695 startingBinTags = set(startingBinTags) 696 697 if self.get_root_tag() not in startingBinTags: 698 startingBinTags.add(self.get_root_tag()) 699 700 startingTagHashes = set() 701 for tag in startingBinTags: 702 startingTagHashes.add(tag.__hash__()) 703 704 if prePreparedSetOfAllInternalItemIDs is None: 705 setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE) 706 else: 707 setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs 708 709 smartTree = {0: set()} 710 smartTree__filler = AddToCompoundDict__Set(smartTree) 711 local_tags_qnt = dict() 712 local_tags_qnt__filler = AddToCompoundDict( 713 local_tags_qnt, 714 lambda: 0, 715 lambda container, key, value: (True, container[key] + 1) 716 ) 717 for itemID in setOfAllInternalItemIDs: 718 commonTagTupleHash = self.itemWithTags[itemID] 719 tagSet = set(self.commonTagSets[commonTagTupleHash]) 720 setOfTags = tagSet 721 setOfTags = setOfTags - startingTagHashes 722 for tag_hash in setOfTags: 723 # if tag_hash not in local_tags_qnt: 724 # local_tags_qnt[tag_hash] = 0 725 # local_tags_qnt[tag_hash] += 1 726 local_tags_qnt__filler.add(tag_hash) 727 728 for itemID in setOfAllInternalItemIDs: 729 commonTagTupleHash = self.itemWithTags[itemID] 730 tagSet = set(self.commonTagSets[commonTagTupleHash]) 731 setOfTags = tagSet 732 setOfTags = setOfTags - startingTagHashes 733 listOfTagHashes = None 734 if zeroSliceOnly: 735 listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt) 736 else: 737 listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags) 738 739 lastTagHash = None 740 lastTagHashQnt = None 741 treeLevel = 0 742 for tagHash in listOfTagHashes: 743 # currentTagHashQnt = self.tagsQnt[tagHash] 744 currentTagHashQnt = local_tags_qnt[tagHash] 745 if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt): 746 pass 747 else: 748 treeLevel += 1 749 lastTagHash = None 750 lastTagHashQnt = None 751 # if treeLevel not in smartTree: 752 # smartTree[treeLevel] = set() 753 # # tagsSetOnTheLevel = smartTree[treeLevel] 754 # # tagsSetOnTheLevel.add(tagHash) 755 # # smartTree[treeLevel] = tagsSetOnTheLevel 756 # smartTree[treeLevel].add(tagHash) 757 smartTree__filler.add(treeLevel, tagHash) 758 lastTagHash = tagHash 759 lastTagHashQnt = currentTagHashQnt 760 return dict(smartTree)
def
is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags):
793 def is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags): 794 binTags = set(binTags) 795 796 if self.get_root_tag() not in binTags: 797 binTags.add(self.get_root_tag()) 798 799 tagHashSet = set() 800 for binTag in binTags: 801 tagHashSet.add(binTag.__hash__()) 802 803 binTagsQnt = len(tagHashSet) 804 commonTagGroupHashSet = set() 805 # setOfLenOfTheCommonTagHashSetForChecking = set() 806 setOfTheTagsIntersection = None 807 for commonTagQnt in self.setOfTagGroupQnt: 808 if commonTagQnt > binTagsQnt: 809 setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt] 810 commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes) 811 for commonTagGroupHash in commonTagGroupHashSet: 812 commonTagHashTuple = self.commonTagSets[commonTagGroupHash] 813 commonTagHashSet = set(commonTagHashTuple) 814 if len(tagHashSet & commonTagHashSet) == len(tagHashSet): 815 if tagHashSet != commonTagHashSet: 816 if setOfTheTagsIntersection is None: 817 setOfTheTagsIntersection = commonTagHashSet 818 else: 819 setOfTheTagsIntersection = setOfTheTagsIntersection & commonTagHashSet 820 # if tagHashSet != commonTagHashSet: 821 # setOfLenOfTheCommonTagHashSetForChecking.add(len(commonTagHashSet)) 822 # minimalTagPath = min(setOfLenOfTheCommonTagHashSetForChecking) 823 # pathDiff = minimalTagPath - len(tagHashSet) 824 # if pathDiff > 0: 825 if setOfTheTagsIntersection is None: 826 return list() 827 828 setOfTheTagsForAReduction = setOfTheTagsIntersection - tagHashSet 829 830 sortedTagHashList = self.sort_tag_hash_list_by_qnt(setOfTheTagsForAReduction - {self.get_root_tag().__hash__()}) 831 return list(sortedTagHashList)