cengal.data_containers.dynamic_tag_tree.versions.v_0.TagDB

Module Docstring Docstrings: http://www.python.org/dev/peps/pep-0257/

  1#!/usr/bin/env python
  2# coding=utf-8
  3
  4# Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space>
  5# 
  6# Licensed under the Apache License, Version 2.0 (the "License");
  7# you may not use this file except in compliance with the License.
  8# You may obtain a copy of the License at
  9# 
 10#     http://www.apache.org/licenses/LICENSE-2.0
 11# 
 12# Unless required by applicable law or agreed to in writing, software
 13# distributed under the License is distributed on an "AS IS" BASIS,
 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15# See the License for the specific language governing permissions and
 16# limitations under the License.
 17
 18
 19"""
 20Module Docstring
 21Docstrings: http://www.python.org/dev/peps/pep-0257/
 22"""
 23
 24
 25__author__ = "ButenkoMS <gtalk@butenkoms.space>"
 26__copyright__ = "Copyright © 2012-2024 ButenkoMS. All rights reserved. Contacts: <gtalk@butenkoms.space>"
 27__credits__ = ["ButenkoMS <gtalk@butenkoms.space>", ]
 28__license__ = "Apache License, Version 2.0"
 29__version__ = "4.4.1"
 30__maintainer__ = "ButenkoMS <gtalk@butenkoms.space>"
 31__email__ = "gtalk@butenkoms.space"
 32# __status__ = "Prototype"
 33__status__ = "Development"
 34# __status__ = "Production"
 35
 36
 37__author__ = 'Mikhail Butenko <gtalk@mikhail-butenko.in.ua>'
 38
 39from cengal.data_generation.id_generator import IDGenerator
 40# from help_tools import AddToCompoundDict, ResultExistence
 41from cengal.code_flow_control.smart_values.versions.v_0 import ResultExistence
 42from cengal.data_containers.compound_dict_management import AddToCompoundDict
 43
 44
 45SMART_TREE_TYPE = 0  # smart tree. Умное дерево тегов: сеть отображенная на древо. Возвращает только список
 46    # непосредственных подтегов текущего пути, но не их подтеги; возвращает элементы текущего пути, но не элементы
 47    # из подпутей
 48SMART_TREE_TYPE_WITH_INTERNAL_MENU = 1   # smart tree with internal menu. В древо встроено меню, позволяющее прямо из
 49    # древа производить смену типа вывода: SMART_TREE_TYPE, FULL_TREE_TYPE и PLAIN_PSEUDO_TREE_TYPE. На каждый тип
 50    # вывода будет доступен подтег/подпапка, внутри когорого уже будет нормальное древо элементов, но уже выбранного
 51    # типа
 52FULL_TREE_TYPE = 2  # full tree with all tags - with repeats and without filtering. Список айтемов - как у
 53    # SMART_TREE_TYPE, но при этом список тегов - как у PLAIN_PSEUDO_TREE_TYPE
 54PLAIN_PSEUDO_TREE_TYPE = 3   # plain tags and items set (will show all tags, subtags and items of
 55    # current hm... dir - current tag set). Показывает все теги и подтеги единым списком - как у примитивных теговых
 56    # файловых систем; показывает все элементы текущего пути + все элементы всех под-путей
 57
 58USUAL_TREE_TYPE = PLAIN_PSEUDO_TREE_TYPE
 59
 60_ROOT_TAG = r'k{1+vdcY#m8t-4m9`)G2\b]/O\'Rzqyr@FEO~%./nGPzl)[^q 0RS!.bCPh ?fag{8~{SGj;Ss3U85Q-:'
 61
 62
 63class ToManyIdenticalItemsOnTheGivenTagPathError(Exception):
 64    pass
 65
 66
 67class UnknownTreeTypeError(Exception):
 68    pass
 69
 70
 71class AddToCompoundDict__Set(AddToCompoundDict):
 72    def __init__(self, original_dict):
 73        super().__init__(
 74            original_dict,
 75            lambda: set(),
 76            lambda original_dict, key, value: original_dict[key].add(value)
 77        )
 78
 79
 80class TagDB:
 81
 82    def __init__(self):
 83        super().__init__()
 84
 85        self.itemsID = IDGenerator()
 86
 87        self.itemIDsForItem = {}  # key - item hash; data - set of itemIDs
 88
 89        self.itemsSet = {}  # key - ItemID; data - binItem
 90        # TODO: заменить список тегов на хеш единожды сохраненного списка тегов
 91        self.itemWithTags = {}  # key - ItemID; data - sorted common TagsTuple's hash
 92
 93        # TODO: убрать tagsNumPerItemID из кода. Заменить этот список itemID - на список hashOfTheTagHashTuple
 94        self.tagsNumPerItemID = {}  # key - number of tags in this ItemID group; data - set of itemIDs which are have
 95            # needed number of tags
 96
 97        self.tagsSet = {}  # key - tag hash; data - binTag
 98        self.tagWithItems = {}  # key - Tag hash; data - set of itemIDs
 99        self.tagsQnt = {}    # key - Tag hash; data - quantity of the items with this tag
100
101
102        self.commonTagSets = {}  # key - sorted common TagsTuple's hash; data - sorted TagsTuple
103        self.itemsOnTheCommonTagSets = {}   # key - sorted common TagsTuple's hash; data - set of itemIDs
104        self.tagsQntPerCommonTagSet = {}    # key - number of tags; data - set of TagsTuple hashes
105        self.setOfTagGroupQnt = set()   # {tagQntInGroup1, tagQntInGroup2, ..., tagQntInGroupN} where
106            # each Group is an key of the self.tagsQntPerCommonTagSet
107
108        # TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple]
109        # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3}
110        # TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ..., itemID_3}]
111
112        # TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] и вычитывать это из него
113        # и/или
114        # TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashTuple_1, hashOfTheTagHashTuple_2, ...
115        # , hashOfTheTagHashTuple_N}] и вычитывать это из него, а потом уже и из каждого tagHashSet
116        # где hashOfTheTagHashTuple - это tagHashTuple.__hash__()
117
118    def get_root_tag(self):
119        return str(_ROOT_TAG)
120
121    def add_tag(self, binTag):
122        tagHash = binTag.__hash__()
123        self.tagsSet[tagHash] = binTag
124        if tagHash not in self.tagsSet:
125            self.tagsQnt[tagHash] = 0
126        if tagHash not in self.tagWithItems:
127            self.tagWithItems[tagHash] = set()
128
129    def remove_tag(self, binTag):
130        # will try to delete given tag. If there is at least one item with this tag, than function will fail
131        # and will return False; otherwise it will delete given tag and will return True.
132        functionResult = False
133        tagHash = binTag.__hash__()
134        if tagHash in self.tagsSet:
135            if tagHash in self.tagsQnt:
136                if self.tagsQnt[tagHash] == 0:
137                    del self.tagsSet[tagHash]
138                    del self.tagsQnt[tagHash]
139                    functionResult = True
140                else:
141                    functionResult = False
142            else:
143                del self.tagsSet[tagHash]
144                functionResult = True
145
146        if functionResult:
147            if tagHash in self.tagWithItems:
148                del self.tagWithItems[tagHash]
149
150        return functionResult
151
152    def add_item(self, binItem, binTags):
153        # will add new item and return it's dynamic ID or None object If this Item already exist on the given tag path
154        # Or will raise an exception if we already have more than one binItem (another item that is identical to
155        # the given binItem)  on this tag path
156
157        binTags = set(binTags)
158
159        if self.get_root_tag() not in binTags:
160            binTags.add(self.get_root_tag())
161
162        # may raise an exception in this place. Nope - from now it will be not
163        if self.get_itemID_from_item_and_tags(binTags, binItem) is not None:
164            return None
165
166        itemID = self.itemsID()
167        self.itemsSet[itemID] = binItem
168
169        itemHash = binItem.__hash__()
170        if itemHash in self.itemIDsForItem:
171            IDsSet = self.itemIDsForItem[itemHash]
172            IDsSet.add(itemID)
173            # self.itemIDsForItem[itemHash] = IDsSet
174        else:
175            self.itemIDsForItem[itemHash] = {itemID}
176
177        tagQnt = len(binTags)
178        if tagQnt in self.tagsNumPerItemID:
179            itemIDsSet = self.tagsNumPerItemID[tagQnt]
180            itemIDsSet.add(itemID)
181            # self.tagsNumPerItemID[tagQnt] = itemIDsSet
182        else:
183            self.tagsNumPerItemID[tagQnt] = {itemID}
184
185        binTagHashes = set()
186
187        for tag in binTags:
188            self.add_tag(tag)
189            tagHash = tag.__hash__()
190            binTagHashes.add(tagHash)
191            setOfItems = self.tagWithItems[tagHash]
192            if itemID not in setOfItems:
193                setOfItems.add(itemID)
194                if tagHash in self.tagsQnt:
195                    self.tagsQnt[tagHash] += 1
196                else:
197                    self.tagsQnt[tagHash] = 1
198            self.tagWithItems[tagHash] = setOfItems
199
200        sortedTagTuple = tuple(self.sort_tag_hash_list_by_hash(binTagHashes))
201        hashOfTheSortedTagTuple = sortedTagTuple.__hash__()
202        self.itemWithTags[itemID] = hashOfTheSortedTagTuple
203
204        self.commonTagSets[hashOfTheSortedTagTuple] = sortedTagTuple
205
206        if hashOfTheSortedTagTuple in self.itemsOnTheCommonTagSets:
207            itemIDsSet = self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple]
208            itemIDsSet.add(itemID)
209            # self.itemsOnTheCommonTagSets[tagQnt] = itemIDsSet
210        else:
211            self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] = {itemID}
212
213        lenOfTheSortedTagTuple = len(sortedTagTuple)
214        if lenOfTheSortedTagTuple in self.tagsQntPerCommonTagSet:
215            itemIDsSet = self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple]
216            itemIDsSet.add(hashOfTheSortedTagTuple)
217            # self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = itemIDsSet
218        else:
219            self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = {hashOfTheSortedTagTuple}
220            self.setOfTagGroupQnt.add(lenOfTheSortedTagTuple)
221
222        return itemID
223
224    def remove_item_by_itemID(self, itemID):
225        if itemID in self.itemsSet:
226            itemHash = self.itemsSet[itemID].__hash__()
227            del self.itemsSet[itemID]
228
229            if itemHash in self.itemIDsForItem:
230                IDsSet = self.itemIDsForItem[itemHash]
231                IDsSet.difference_update({itemID})
232                # self.itemIDsForItem[itemHash] = IDsSet
233                if len(IDsSet) < 1:
234                    del self.itemIDsForItem[itemHash]
235
236        if itemID in self.itemWithTags:
237            commonTagTupleHash = self.itemWithTags[itemID]
238            tagTuple = self.commonTagSets[commonTagTupleHash]
239            numberOfTags = len(tagTuple)
240
241            if commonTagTupleHash in self.itemsOnTheCommonTagSets:
242                IDsSet = self.itemsOnTheCommonTagSets[commonTagTupleHash]
243                IDsSet.difference_update({itemID})
244                # self.itemsOnTheCommonTagSets[commonTagTupleHash] = IDsSet
245                if len(IDsSet) < 1:
246                    del self.itemsOnTheCommonTagSets[commonTagTupleHash]
247                    del self.commonTagSets[commonTagTupleHash]
248                    if numberOfTags in self.tagsQntPerCommonTagSet:
249                        setOfTagTuplesHashes = self.tagsQntPerCommonTagSet[numberOfTags]
250                        setOfTagTuplesHashes.difference_update({numberOfTags})
251                        # self.tagsQntPerCommonTagSet[numberOfTags] = setOfTagTuplesHashes
252                        if len(setOfTagTuplesHashes) < 1:
253                            del self.tagsQntPerCommonTagSet[numberOfTags]
254                            self.setOfTagGroupQnt.difference_update({numberOfTags})
255
256            del self.itemWithTags[itemID]
257
258            setOfTagHashes = set(tagTuple)
259
260            tagsQnt = len(setOfTagHashes)
261            if tagsQnt in self.tagsNumPerItemID:
262                IDsSet = self.tagsNumPerItemID[tagsQnt]
263                IDsSet.difference_update({itemID})
264                # self.tagsNumPerItemID[tagsQnt] = IDsSet
265                if len(IDsSet) < 1:
266                    del self.tagsNumPerItemID[tagsQnt]
267
268            for tagHash in setOfTagHashes:
269                if tagHash in self.tagsQnt:
270                    tagsQuantity = self.tagsQnt[tagHash]
271                    tagsQuantity -= 1
272                    if tagsQuantity < 1:
273                        tagsQuantity = 0
274                    self.tagsQnt[tagHash] = tagsQuantity
275                    if tagsQuantity < 1:
276                        del self.tagsQnt[tagHash]
277                        del self.tagsSet[tagHash]
278                if tagHash in self.tagWithItems:
279                    IDsSet = self.tagWithItems[tagHash]
280                    IDsSet.difference_update({itemID})
281                    if len(IDsSet) < 1:
282                        del self.tagWithItems[tagHash]
283                    # self.tagWithItems[tagHash] = IDsSet
284
285        self.itemsID.remove_id(itemID)
286
287    def remove_item(self, binTags, binItem):
288        # will return ItemId for deleted item or None object if Item is not exist
289        # Or will raise an exception if we already have more than one binItem (another item that is identical to
290        # the given binItem) on this tag path
291        binTags = set(binTags)
292        if self.get_root_tag() not in binTags:
293            binTags.add(self.get_root_tag())
294        itemID = self.get_itemID_from_item_and_tags(binTags, binItem)
295        if itemID is not None:
296            self.remove_item_by_itemID(itemID)
297        return itemID
298
299    def __OLD__get_itemID_from_item_and_tags(self, binTags, binItem):
300        if self.get_root_tag() not in binTags:
301            binTags.append(self.get_root_tag())
302        potentialIDs = set(self.get_potential_itemIDs_from_item(binItem))
303        itemIDsSet = set(self.get_itemIDs_from_tags(binTags, SMART_TREE_TYPE))
304        resultItemIDsList = potentialIDs & itemIDsSet
305        if len(resultItemIDsList) == 0:
306            return None
307        elif len(resultItemIDsList) == 1:
308            resultItemID = resultItemIDsList.pop()  # we have assume that we'll have only one item in intersection
309                # between potential IDs and Items that have (and have only) given tag list (without another tags in the
310                # path to this items). We need to check it in the adding new item to the given tag path.
311            return resultItemID
312        elif len(resultItemIDsList) > 1:
313            raise ToManyIdenticalItemsOnTheGivenTagPathError()
314
315    def get_itemID_from_item_and_tags(self, binTags, binItem):
316        binTags = set(binTags)
317
318        if self.get_root_tag() not in binTags:
319            binTags.add(self.get_root_tag())
320
321        potentialIDs = self.get_potential_itemIDs_from_item(binItem)
322        setOfBinTagsHashes = set()
323        for tag in binTags:
324            setOfBinTagsHashes.add(tag.__hash__())
325        for itemID in potentialIDs:
326            currentItemTagsSet = self.get_tagsHashes_from_single_item(itemID, isWithoutRootHash=False)
327            if setOfBinTagsHashes == currentItemTagsSet:
328                return itemID
329        return None
330
331    def tag_hash_list_2_tag_list(self, tagHashList):
332        tagList = list()
333        for tagHash in tagHashList:
334            tagList.append(self.tagsSet[tagHash])
335        return tagList
336
337    def get_item_and_tags_from_itemID(self, itemID):
338        commonTagTupleHash = self.itemWithTags[itemID]
339        tagSet = set(self.commonTagSets[commonTagTupleHash]) - {self.get_root_tag().__hash__()}
340        sortedTagHashList = self.sort_tag_hash_list_by_qnt(tagSet)
341        result = (self.itemsSet[itemID], self.tag_hash_list_2_tag_list(sortedTagHashList))
342        return result
343
344    # @profile
345    def get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None):
346        tagsQnt = self.tagsQnt
347        if local_tags_qnt is not None:
348            tagsQnt = local_tags_qnt
349        tag_hash_set = set(tagHashSet)
350        tag_by_qnt = dict()
351        tag_by_qnt__filler = AddToCompoundDict__Set(tag_by_qnt)
352        biggest_qnt = 0
353        for tag_hash in tag_hash_set:
354            qnt = tagsQnt[tag_hash]
355            if qnt > biggest_qnt:
356                biggest_qnt = qnt
357            # if qnt not in tag_by_qnt:
358            #     tag_by_qnt[qnt] = set()
359            # tag_by_qnt[qnt].add(tag_hash)
360            tag_by_qnt__filler.add(qnt, tag_hash)
361
362        result = None
363        if len(tag_by_qnt) > 0:
364            # biggest_qnt = max(tag_by_qnt)
365            result = tuple(tag_by_qnt[biggest_qnt])
366        else:
367            result = tuple()
368        return result
369
370    def sort_tag_hash_list_by_qnt(self, tagHashSet):
371        # will return sorted tag list - not sorted tag hash list
372        tagHashSet = set(tagHashSet)
373        rawTagList = list()
374        for tagHash in tagHashSet:
375            tagWithWeight = (tagHash, self.tagsQnt[tagHash])
376            rawTagList.append(tagWithWeight)
377        return self.sort_raw_tag_list(rawTagList)
378
379    def sort_tag_list_by_qnt(self, binTags):
380        binTags = set(binTags)
381        rawTagList = list()
382        for tag in binTags:
383            tagHash = tag.__hash__()
384            tagWithWeight = (tag, self.tagsQnt[tagHash])
385            rawTagList.append(tagWithWeight)
386        return self.sort_raw_tag_list(rawTagList)
387
388    def sort_tag_hash_list_by_hash(self, tagHashSet):
389        # will return sorted tag list - not sorted tag hash list
390        tagHashSet = set(tagHashSet)
391        rawTagList = list()
392        for tagHash in tagHashSet:
393            tagWithWeight = (tagHash, tagHash)
394            rawTagList.append(tagWithWeight)
395        return self.sort_raw_tag_list(rawTagList)
396
397    def sort_tag_list_by_hash(self, binTags):
398        binTags = set(binTags)
399        rawTagList = list()
400        for tag in binTags:
401            tagHash = tag.__hash__()
402            tagWithWeight = (tag, tagHash)
403            rawTagList.append(tagWithWeight)
404        return self.sort_raw_tag_list(rawTagList)
405
406    def sort_raw_tag_list(self, rawTagList):
407        # will return sorted tag list
408        rawTagList = sorted(rawTagList, key=lambda tagAndWeight: tagAndWeight[1], reverse=True)
409        tagList = list()
410        for rawTag in rawTagList:
411            tagList.append(rawTag[0])
412        return tagList
413
414    def get_itemIDs_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
415                              isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False):
416        # TODO: исправить ошибку: SMART_TREE_TYPE: возвращает не только список файлов в текущей директории, но и из
417        # непосредственных подпапок данной папки
418
419        # treeType - type of the graph tree representation: show all tags with replies (pure representation);
420        # show only relevant tags; etc.
421        # return set of itemIDs
422
423        binTags = set(binTags)
424
425        if self.get_root_tag() not in binTags:
426            binTags.add(self.get_root_tag())
427
428        tagHashSet = set()
429        for binTag in binTags:
430            tagHashSet.add(binTag.__hash__())
431
432        # PLAIN_PSEUDO_TREE_TYPE
433        interceptionOfItemsWithTags = set()
434        if (treeType == PLAIN_PSEUDO_TREE_TYPE) or isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
435            binTagsQnt = len(tagHashSet)
436            commonTagGroupHashSet = set()
437            tagSet = set()
438            itemIDSet = set()
439            binTagHashTuple = tuple(self.sort_tag_hash_list_by_hash(tagHashSet))
440            hashOfTheBinTagHashTuple = binTagHashTuple.__hash__()
441            if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets:
442                itemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple]
443            for commonTagQnt in self.setOfTagGroupQnt:
444                if commonTagQnt > binTagsQnt:
445                    setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt]
446                    commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes)
447            for commonTagGroupHash in commonTagGroupHashSet:
448                commonTagHashTuple = self.commonTagSets[commonTagGroupHash]
449                commonTagHashSet = set(commonTagHashTuple)
450                if tagHashSet.issubset(commonTagHashSet):
451                    itemIDSet.update(self.itemsOnTheCommonTagSets[commonTagGroupHash])
452                # # if len(tagHashSet & commonTagHashSet) == len(tagHashSet):
453                # res_set = tagHashSet.intersection(commonTagHashSet)
454                # if len(res_set) == binTagsQnt:
455                #     itemIDSet = itemIDSet | self.itemsOnTheCommonTagSets[commonTagGroupHash]
456            interceptionOfItemsWithTags = itemIDSet
457
458            # isFirstHash = True
459            # for tag in binTags:
460            #     tagHash = tag.__hash__()
461            #     if tagHash in self.tagWithItems:
462            #         if isFirstHash:
463            #             interceptionOfItemsWithTags = self.tagWithItems[tagHash]
464            #             isFirstHash = False
465            #         else:
466            #             itemsWithTag = self.tagWithItems[tagHash]
467            #             interceptionOfItemsWithTags = interceptionOfItemsWithTags & itemsWithTag
468            #     else:
469            #         # TODO: произвести такую же провеку в get_items_from_tags() и build_smart_tree()
470            #         if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
471            #             result = (set(), set())
472            #             return result
473            #         else:
474            #             return set()
475
476        resultItemIDSet = interceptionOfItemsWithTags
477        setOfAllInternalItemIDsForThisSetOfTags = interceptionOfItemsWithTags
478
479        # SMART_TREE_TYPE or FULL_TREE_TYPE
480        if (treeType == SMART_TREE_TYPE) or (treeType == FULL_TREE_TYPE):
481            resultItemIDSet = set()
482            binTagHashTuple = self.sort_tag_hash_list_by_hash(tagHashSet)
483            binTagHashTuple = tuple(binTagHashTuple)
484            hashOfTheBinTagHashTuple = binTagHashTuple.__hash__()
485            if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets:
486                resultItemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple]
487
488            # filteredItemIDsSet = set()
489            # tagQnt = len(binTags)
490            # # for itemID in setOfAllInternalItemIDsForThisSetOfTags:
491            # #     if len(self.itemWithTags[itemID]) == tagQnt:
492            # #         # _TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}]
493            # #         # и вычитывать это из него
494            # #         # и/или
495            # #         # _TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashSet_1, hashOfTheTagHashSet_2, ...
496            # #         # , hashOfTheTagHashSet_3}] и вычитывать это из него, а потом уже и из каждого tagHashSet
497            # #         # где hashOfTheTagHashSet - это tagHashSet.__hash__()
498            # #         filteredItemIDsSet.add(itemID)
499            # if tagQnt in self.tagsNumPerItemID:
500            #     filteredItemIDsSet = setOfAllInternalItemIDsForThisSetOfTags & self.tagsNumPerItemID[tagQnt]
501            #
502            # resultItemIDSet = set()
503            # tagHashSet = set()
504            # for binTag in binTags:
505            #     tagHashSet.add(binTag.__hash__())
506            # for itemID in filteredItemIDsSet:
507            #     commonTagTupleHash = self.itemWithTags[itemID]
508            #     tagSet = set(self.commonTagSets[commonTagTupleHash])
509            #     if tagSet == tagHashSet:
510            #         # _TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple]
511            #         # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3}
512            #         # _TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ...
513            #         # , itemID_3}]
514            #         resultItemIDSet.add(itemID)
515        elif treeType == PLAIN_PSEUDO_TREE_TYPE:
516            # already implemented (see bellow). Don't touch this code!
517            pass
518        else:
519            raise UnknownTreeTypeError()
520
521        if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
522            result = (set(resultItemIDSet), set(setOfAllInternalItemIDsForThisSetOfTags))
523            return result
524        else:
525            return set(resultItemIDSet)
526
527    def get_items_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
528                            isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False):
529        # treeType - type of the graph tree representation: show all tags with replies (pure representation);
530        # show only relevant tags; etc.
531        # return set of itemIDs
532        binTags = set(binTags)
533        itemIDsSet = self.get_itemIDs_from_tags(binTags, treeType=treeType,
534                                                isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=
535                                                isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags)
536        if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
537            itemSet = set()
538            for itemID in itemIDsSet[0]:
539                itemSet.add(self.itemsSet[itemID])
540            result = (tuple(itemSet), tuple(itemIDsSet[1]))  # result == (usual items set, additional set of all
541                # internal itemIDs)
542            return result
543        else:
544            itemSet = set()
545            for itemID in itemIDsSet:
546                itemSet.add(self.itemsSet[itemID])
547            return tuple(itemSet)
548
549    def get_tagHashes_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
550                                prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None):
551        # where "itemIDsSet" is externally given "get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE)"
552        # so "itemIDsSet" is a set of the all items inside the "folder" binTags (including items from "subfolders")
553        # treeType - the same as in the "get_items_from_tags()" method
554        # return set of itemIDs
555        # prePreparedSetOfAllInternalItemIDsForThisSetOfTags can be generated by:
556        #   a) get_itemIDs_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE)
557        #   a) get_itemIDs_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True)
558        #   c) get_items_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True)
559        #   d) get_items_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) || BUT: it'll return item set - not itemID
560        #       set
561
562        binTags = set(binTags)
563
564        resultTagHashSet = set()
565
566        setOfAllInternalItemIDs = set()
567        if prePreparedSetOfAllInternalItemIDsForThisSetOfTags is None:
568            setOfAllInternalItemIDs = self.get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE)
569        else:
570            setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDsForThisSetOfTags
571
572        if (treeType == PLAIN_PSEUDO_TREE_TYPE) or (treeType == FULL_TREE_TYPE):
573            binTagHashes = set()
574            for tag in binTags:
575                binTagHashes.add(tag.__hash__())
576            tagHashSet = set()
577            for itemID in setOfAllInternalItemIDs:
578                if itemID in self.itemWithTags:
579                    commonTagTupleHash = self.itemWithTags[itemID]
580                    tagSet = set(self.commonTagSets[commonTagTupleHash])
581                    tagHashSet.update(tagSet)
582            resultTagHashSet = tagHashSet - binTagHashes
583        elif treeType == SMART_TREE_TYPE:
584            # smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs)
585            smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs,
586                                              zeroSliceOnly=True)
587            if 0 in smartTree:
588                resultTagHashSet = smartTree[0]
589            # filteredItemIDsList = list()
590            # tagQnt = len(binTags)
591            # for itemID in listOfAllInternalItemIDs:
592            #     if len(self.itemWithTags[itemID]) == (tagQnt + 1):
593            #         filteredItemIDsList.append(itemID)
594            #
595            # tagHashSet = set()
596            # for itemID in filteredItemIDsList:
597            #     tagHashSet.update(set(self.itemWithTags[itemID]))
598            # givenTagHashes = set()
599            # for tag in binTags:
600            #     givenTagHashes.add(tag.__hash__())
601            # tagHashSet.difference_update(givenTagHashes)
602            # ##resultTagHashList = list(tagHashSet)
603            # # если остановиться тут - то мы увидим не все папки: мы не увидим папки непосредственно в которых есть
604            # # только другие подпапки, но ни одного файла.
605            # #
606            # # значит далее мы должны исключить все файлы, которые имеют только что найденные теги, и начать строить
607            # # древо тегов для оставшихся
608            # #
609            # # а далее - повторить все это в цикле, увеличив при проверке кол-во тегов еще раз на единицу (и используя
610            # # уже оставшийся после отсеивания набор файлов). В итоге кол-во итераций зависит не от количества файлов,
611            # # а от максимальной фактически имеющейся вложенности файлов внутри тегов-каталогов
612        else:
613            raise UnknownTreeTypeError()
614
615        sortedTagHashList = self.sort_tag_hash_list_by_qnt(resultTagHashSet - {self.get_root_tag().__hash__()})
616        return list(sortedTagHashList)
617
618    def get_tags_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
619                           prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None):
620        result = self.get_tagHashes_from_tags(binTags, treeType=treeType,
621                                              prePreparedSetOfAllInternalItemIDsForThisSetOfTags=
622                                              prePreparedSetOfAllInternalItemIDsForThisSetOfTags)
623        return tuple(self.tag_hash_list_2_tag_list(result))
624
625    def build_smart_tree(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
626        startingBinTags = set(startingBinTags)
627
628        if self.get_root_tag() not in startingBinTags:
629            startingBinTags.add(self.get_root_tag())
630
631        startingTagHashes = set()
632        for tag in startingBinTags:
633            startingTagHashes.add(tag.__hash__())
634
635        if prePreparedSetOfAllInternalItemIDs is None:
636            setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE)
637        else:
638            setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs
639
640        smartTree = {0: set()}
641        smartTree__filler = AddToCompoundDict__Set(smartTree)
642        local_tags_qnt = dict()
643        local_tags_qnt__filler = AddToCompoundDict(
644            local_tags_qnt,
645            lambda: 0,
646            lambda container, key, value: (True, container[key] + 1)
647        )
648        for itemID in setOfAllInternalItemIDs:
649            commonTagTupleHash = self.itemWithTags[itemID]
650            tagSet = set(self.commonTagSets[commonTagTupleHash])
651            setOfTags = tagSet
652            setOfTags = setOfTags - startingTagHashes
653            for tag_hash in setOfTags:
654                # if tag_hash not in local_tags_qnt:
655                #     local_tags_qnt[tag_hash] = 0
656                # local_tags_qnt[tag_hash] += 1
657                local_tags_qnt__filler.add(tag_hash)
658
659        for itemID in setOfAllInternalItemIDs:
660            commonTagTupleHash = self.itemWithTags[itemID]
661            tagSet = set(self.commonTagSets[commonTagTupleHash])
662            setOfTags = tagSet
663            setOfTags = setOfTags - startingTagHashes
664            listOfTagHashes = None
665            if zeroSliceOnly:
666                listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt)
667            else:
668                listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags)
669
670            lastTagHash = None
671            lastTagHashQnt = None
672            treeLevel = 0
673            for tagHash in listOfTagHashes:
674                # currentTagHashQnt = self.tagsQnt[tagHash]
675                currentTagHashQnt = local_tags_qnt[tagHash]
676                if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt):
677                    pass
678                else:
679                    treeLevel += 1
680                    lastTagHash = None
681                    lastTagHashQnt = None
682                # if treeLevel not in smartTree:
683                #     smartTree[treeLevel] = set()
684                # # tagsSetOnTheLevel = smartTree[treeLevel]
685                # # tagsSetOnTheLevel.add(tagHash)
686                # # smartTree[treeLevel] = tagsSetOnTheLevel
687                # smartTree[treeLevel].add(tagHash)
688                smartTree__filler.add(treeLevel, tagHash)
689                lastTagHash = tagHash
690                lastTagHashQnt = currentTagHashQnt
691        return dict(smartTree)
692
693    def build_smart_tree_2(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
694        startingBinTags = set(startingBinTags)
695
696        if self.get_root_tag() not in startingBinTags:
697            startingBinTags.add(self.get_root_tag())
698
699        startingTagHashes = set()
700        for tag in startingBinTags:
701            startingTagHashes.add(tag.__hash__())
702
703        if prePreparedSetOfAllInternalItemIDs is None:
704            setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE)
705        else:
706            setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs
707
708        smartTree = {0: set()}
709        smartTree__filler = AddToCompoundDict__Set(smartTree)
710        local_tags_qnt = dict()
711        local_tags_qnt__filler = AddToCompoundDict(
712            local_tags_qnt,
713            lambda: 0,
714            lambda container, key, value: (True, container[key] + 1)
715        )
716        for itemID in setOfAllInternalItemIDs:
717            commonTagTupleHash = self.itemWithTags[itemID]
718            tagSet = set(self.commonTagSets[commonTagTupleHash])
719            setOfTags = tagSet
720            setOfTags = setOfTags - startingTagHashes
721            for tag_hash in setOfTags:
722                # if tag_hash not in local_tags_qnt:
723                #     local_tags_qnt[tag_hash] = 0
724                # local_tags_qnt[tag_hash] += 1
725                local_tags_qnt__filler.add(tag_hash)
726
727        for itemID in setOfAllInternalItemIDs:
728            commonTagTupleHash = self.itemWithTags[itemID]
729            tagSet = set(self.commonTagSets[commonTagTupleHash])
730            setOfTags = tagSet
731            setOfTags = setOfTags - startingTagHashes
732            listOfTagHashes = None
733            if zeroSliceOnly:
734                listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt)
735            else:
736                listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags)
737
738            lastTagHash = None
739            lastTagHashQnt = None
740            treeLevel = 0
741            for tagHash in listOfTagHashes:
742                # currentTagHashQnt = self.tagsQnt[tagHash]
743                currentTagHashQnt = local_tags_qnt[tagHash]
744                if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt):
745                    pass
746                else:
747                    treeLevel += 1
748                    lastTagHash = None
749                    lastTagHashQnt = None
750                # if treeLevel not in smartTree:
751                #     smartTree[treeLevel] = set()
752                # # tagsSetOnTheLevel = smartTree[treeLevel]
753                # # tagsSetOnTheLevel.add(tagHash)
754                # # smartTree[treeLevel] = tagsSetOnTheLevel
755                # smartTree[treeLevel].add(tagHash)
756                smartTree__filler.add(treeLevel, tagHash)
757                lastTagHash = tagHash
758                lastTagHashQnt = currentTagHashQnt
759        return dict(smartTree)
760
761    def get_all_from_tags(self, binTags, treeType=USUAL_TREE_TYPE):
762        binTags = set(binTags)
763        items = self.get_items_from_tags(binTags, treeType=treeType,
764                                         isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=True)
765        if len(items[1]) > 0:
766            tags = self.get_tags_from_tags(binTags, treeType=treeType,
767                                           prePreparedSetOfAllInternalItemIDsForThisSetOfTags=items[1])
768            result = (tuple(tags), set(items[0]))
769            return result
770        else:
771            result = (tuple(), set())
772            return result
773
774    def get_tagsHashes_from_single_item(self, itemID, isWithoutRootHash=True):
775        if itemID in self.itemWithTags:
776            commonTagTupleHash = self.itemWithTags[itemID]
777            tagSet = set(self.commonTagSets[commonTagTupleHash])
778            if isWithoutRootHash:
779                return set(tagSet - {self.get_root_tag().__hash__()})
780            else:
781                return set(tagSet)
782        else:
783            return set()
784
785    def get_potential_itemIDs_from_item(self, binItem):
786        itemHash = binItem.__hash__()
787        if itemHash in self.itemIDsForItem:
788            return set(self.itemIDsForItem[itemHash])
789        else:
790            return set()
791
792    def is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags):
793        binTags = set(binTags)
794
795        if self.get_root_tag() not in binTags:
796            binTags.add(self.get_root_tag())
797
798        tagHashSet = set()
799        for binTag in binTags:
800            tagHashSet.add(binTag.__hash__())
801
802        binTagsQnt = len(tagHashSet)
803        commonTagGroupHashSet = set()
804        # setOfLenOfTheCommonTagHashSetForChecking = set()
805        setOfTheTagsIntersection = None
806        for commonTagQnt in self.setOfTagGroupQnt:
807            if commonTagQnt > binTagsQnt:
808                setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt]
809                commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes)
810        for commonTagGroupHash in commonTagGroupHashSet:
811            commonTagHashTuple = self.commonTagSets[commonTagGroupHash]
812            commonTagHashSet = set(commonTagHashTuple)
813            if len(tagHashSet & commonTagHashSet) == len(tagHashSet):
814                if tagHashSet != commonTagHashSet:
815                    if setOfTheTagsIntersection is None:
816                        setOfTheTagsIntersection = commonTagHashSet
817                    else:
818                        setOfTheTagsIntersection = setOfTheTagsIntersection & commonTagHashSet
819        #         if tagHashSet != commonTagHashSet:
820        #             setOfLenOfTheCommonTagHashSetForChecking.add(len(commonTagHashSet))
821        # minimalTagPath = min(setOfLenOfTheCommonTagHashSetForChecking)
822        # pathDiff = minimalTagPath - len(tagHashSet)
823        # if pathDiff > 0:
824        if setOfTheTagsIntersection is None:
825            return list()
826
827        setOfTheTagsForAReduction = setOfTheTagsIntersection - tagHashSet
828
829        sortedTagHashList = self.sort_tag_hash_list_by_qnt(setOfTheTagsForAReduction - {self.get_root_tag().__hash__()})
830        return list(sortedTagHashList)
831
832    def get_tags_for_a_smart_redirection(self, binTags):
833        result = self.is_smart_redirection_for_a_tag_path_reduction_needed(binTags)
834        return tuple(self.tag_hash_list_2_tag_list(result))
SMART_TREE_TYPE = 0
SMART_TREE_TYPE_WITH_INTERNAL_MENU = 1
FULL_TREE_TYPE = 2
PLAIN_PSEUDO_TREE_TYPE = 3
USUAL_TREE_TYPE = 3
class ToManyIdenticalItemsOnTheGivenTagPathError(builtins.Exception):
64class ToManyIdenticalItemsOnTheGivenTagPathError(Exception):
65    pass

Common base class for all non-exit exceptions.

Inherited Members
builtins.Exception
Exception
builtins.BaseException
with_traceback
args
class UnknownTreeTypeError(builtins.Exception):
68class UnknownTreeTypeError(Exception):
69    pass

Common base class for all non-exit exceptions.

Inherited Members
builtins.Exception
Exception
builtins.BaseException
with_traceback
args
class AddToCompoundDict__Set(cengal.data_containers.compound_dict_management.manager.versions.v_1.manager.AddToCompoundDict):
72class AddToCompoundDict__Set(AddToCompoundDict):
73    def __init__(self, original_dict):
74        super().__init__(
75            original_dict,
76            lambda: set(),
77            lambda original_dict, key, value: original_dict[key].add(value)
78        )
AddToCompoundDict__Set(original_dict)
73    def __init__(self, original_dict):
74        super().__init__(
75            original_dict,
76            lambda: set(),
77            lambda original_dict, key, value: original_dict[key].add(value)
78        )

:param original_dict: :param default_value: functor. list(); {1:set(), 2:[set(), set(), list()]}; etc. :param mediator: functor. original_dict[index].add(y), original_dict[index] += y, etc. Should return ValueExistence(True, ...) or None/nothing :return:

Inherited Members
cengal.data_containers.compound_dict_management.manager.versions.v_1.manager.AddToCompoundDict
original_dict
add
class TagDB:
 81class TagDB:
 82
 83    def __init__(self):
 84        super().__init__()
 85
 86        self.itemsID = IDGenerator()
 87
 88        self.itemIDsForItem = {}  # key - item hash; data - set of itemIDs
 89
 90        self.itemsSet = {}  # key - ItemID; data - binItem
 91        # TODO: заменить список тегов на хеш единожды сохраненного списка тегов
 92        self.itemWithTags = {}  # key - ItemID; data - sorted common TagsTuple's hash
 93
 94        # TODO: убрать tagsNumPerItemID из кода. Заменить этот список itemID - на список hashOfTheTagHashTuple
 95        self.tagsNumPerItemID = {}  # key - number of tags in this ItemID group; data - set of itemIDs which are have
 96            # needed number of tags
 97
 98        self.tagsSet = {}  # key - tag hash; data - binTag
 99        self.tagWithItems = {}  # key - Tag hash; data - set of itemIDs
100        self.tagsQnt = {}    # key - Tag hash; data - quantity of the items with this tag
101
102
103        self.commonTagSets = {}  # key - sorted common TagsTuple's hash; data - sorted TagsTuple
104        self.itemsOnTheCommonTagSets = {}   # key - sorted common TagsTuple's hash; data - set of itemIDs
105        self.tagsQntPerCommonTagSet = {}    # key - number of tags; data - set of TagsTuple hashes
106        self.setOfTagGroupQnt = set()   # {tagQntInGroup1, tagQntInGroup2, ..., tagQntInGroupN} where
107            # each Group is an key of the self.tagsQntPerCommonTagSet
108
109        # TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple]
110        # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3}
111        # TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ..., itemID_3}]
112
113        # TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}] и вычитывать это из него
114        # и/или
115        # TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashTuple_1, hashOfTheTagHashTuple_2, ...
116        # , hashOfTheTagHashTuple_N}] и вычитывать это из него, а потом уже и из каждого tagHashSet
117        # где hashOfTheTagHashTuple - это tagHashTuple.__hash__()
118
119    def get_root_tag(self):
120        return str(_ROOT_TAG)
121
122    def add_tag(self, binTag):
123        tagHash = binTag.__hash__()
124        self.tagsSet[tagHash] = binTag
125        if tagHash not in self.tagsSet:
126            self.tagsQnt[tagHash] = 0
127        if tagHash not in self.tagWithItems:
128            self.tagWithItems[tagHash] = set()
129
130    def remove_tag(self, binTag):
131        # will try to delete given tag. If there is at least one item with this tag, than function will fail
132        # and will return False; otherwise it will delete given tag and will return True.
133        functionResult = False
134        tagHash = binTag.__hash__()
135        if tagHash in self.tagsSet:
136            if tagHash in self.tagsQnt:
137                if self.tagsQnt[tagHash] == 0:
138                    del self.tagsSet[tagHash]
139                    del self.tagsQnt[tagHash]
140                    functionResult = True
141                else:
142                    functionResult = False
143            else:
144                del self.tagsSet[tagHash]
145                functionResult = True
146
147        if functionResult:
148            if tagHash in self.tagWithItems:
149                del self.tagWithItems[tagHash]
150
151        return functionResult
152
153    def add_item(self, binItem, binTags):
154        # will add new item and return it's dynamic ID or None object If this Item already exist on the given tag path
155        # Or will raise an exception if we already have more than one binItem (another item that is identical to
156        # the given binItem)  on this tag path
157
158        binTags = set(binTags)
159
160        if self.get_root_tag() not in binTags:
161            binTags.add(self.get_root_tag())
162
163        # may raise an exception in this place. Nope - from now it will be not
164        if self.get_itemID_from_item_and_tags(binTags, binItem) is not None:
165            return None
166
167        itemID = self.itemsID()
168        self.itemsSet[itemID] = binItem
169
170        itemHash = binItem.__hash__()
171        if itemHash in self.itemIDsForItem:
172            IDsSet = self.itemIDsForItem[itemHash]
173            IDsSet.add(itemID)
174            # self.itemIDsForItem[itemHash] = IDsSet
175        else:
176            self.itemIDsForItem[itemHash] = {itemID}
177
178        tagQnt = len(binTags)
179        if tagQnt in self.tagsNumPerItemID:
180            itemIDsSet = self.tagsNumPerItemID[tagQnt]
181            itemIDsSet.add(itemID)
182            # self.tagsNumPerItemID[tagQnt] = itemIDsSet
183        else:
184            self.tagsNumPerItemID[tagQnt] = {itemID}
185
186        binTagHashes = set()
187
188        for tag in binTags:
189            self.add_tag(tag)
190            tagHash = tag.__hash__()
191            binTagHashes.add(tagHash)
192            setOfItems = self.tagWithItems[tagHash]
193            if itemID not in setOfItems:
194                setOfItems.add(itemID)
195                if tagHash in self.tagsQnt:
196                    self.tagsQnt[tagHash] += 1
197                else:
198                    self.tagsQnt[tagHash] = 1
199            self.tagWithItems[tagHash] = setOfItems
200
201        sortedTagTuple = tuple(self.sort_tag_hash_list_by_hash(binTagHashes))
202        hashOfTheSortedTagTuple = sortedTagTuple.__hash__()
203        self.itemWithTags[itemID] = hashOfTheSortedTagTuple
204
205        self.commonTagSets[hashOfTheSortedTagTuple] = sortedTagTuple
206
207        if hashOfTheSortedTagTuple in self.itemsOnTheCommonTagSets:
208            itemIDsSet = self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple]
209            itemIDsSet.add(itemID)
210            # self.itemsOnTheCommonTagSets[tagQnt] = itemIDsSet
211        else:
212            self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] = {itemID}
213
214        lenOfTheSortedTagTuple = len(sortedTagTuple)
215        if lenOfTheSortedTagTuple in self.tagsQntPerCommonTagSet:
216            itemIDsSet = self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple]
217            itemIDsSet.add(hashOfTheSortedTagTuple)
218            # self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = itemIDsSet
219        else:
220            self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = {hashOfTheSortedTagTuple}
221            self.setOfTagGroupQnt.add(lenOfTheSortedTagTuple)
222
223        return itemID
224
225    def remove_item_by_itemID(self, itemID):
226        if itemID in self.itemsSet:
227            itemHash = self.itemsSet[itemID].__hash__()
228            del self.itemsSet[itemID]
229
230            if itemHash in self.itemIDsForItem:
231                IDsSet = self.itemIDsForItem[itemHash]
232                IDsSet.difference_update({itemID})
233                # self.itemIDsForItem[itemHash] = IDsSet
234                if len(IDsSet) < 1:
235                    del self.itemIDsForItem[itemHash]
236
237        if itemID in self.itemWithTags:
238            commonTagTupleHash = self.itemWithTags[itemID]
239            tagTuple = self.commonTagSets[commonTagTupleHash]
240            numberOfTags = len(tagTuple)
241
242            if commonTagTupleHash in self.itemsOnTheCommonTagSets:
243                IDsSet = self.itemsOnTheCommonTagSets[commonTagTupleHash]
244                IDsSet.difference_update({itemID})
245                # self.itemsOnTheCommonTagSets[commonTagTupleHash] = IDsSet
246                if len(IDsSet) < 1:
247                    del self.itemsOnTheCommonTagSets[commonTagTupleHash]
248                    del self.commonTagSets[commonTagTupleHash]
249                    if numberOfTags in self.tagsQntPerCommonTagSet:
250                        setOfTagTuplesHashes = self.tagsQntPerCommonTagSet[numberOfTags]
251                        setOfTagTuplesHashes.difference_update({numberOfTags})
252                        # self.tagsQntPerCommonTagSet[numberOfTags] = setOfTagTuplesHashes
253                        if len(setOfTagTuplesHashes) < 1:
254                            del self.tagsQntPerCommonTagSet[numberOfTags]
255                            self.setOfTagGroupQnt.difference_update({numberOfTags})
256
257            del self.itemWithTags[itemID]
258
259            setOfTagHashes = set(tagTuple)
260
261            tagsQnt = len(setOfTagHashes)
262            if tagsQnt in self.tagsNumPerItemID:
263                IDsSet = self.tagsNumPerItemID[tagsQnt]
264                IDsSet.difference_update({itemID})
265                # self.tagsNumPerItemID[tagsQnt] = IDsSet
266                if len(IDsSet) < 1:
267                    del self.tagsNumPerItemID[tagsQnt]
268
269            for tagHash in setOfTagHashes:
270                if tagHash in self.tagsQnt:
271                    tagsQuantity = self.tagsQnt[tagHash]
272                    tagsQuantity -= 1
273                    if tagsQuantity < 1:
274                        tagsQuantity = 0
275                    self.tagsQnt[tagHash] = tagsQuantity
276                    if tagsQuantity < 1:
277                        del self.tagsQnt[tagHash]
278                        del self.tagsSet[tagHash]
279                if tagHash in self.tagWithItems:
280                    IDsSet = self.tagWithItems[tagHash]
281                    IDsSet.difference_update({itemID})
282                    if len(IDsSet) < 1:
283                        del self.tagWithItems[tagHash]
284                    # self.tagWithItems[tagHash] = IDsSet
285
286        self.itemsID.remove_id(itemID)
287
288    def remove_item(self, binTags, binItem):
289        # will return ItemId for deleted item or None object if Item is not exist
290        # Or will raise an exception if we already have more than one binItem (another item that is identical to
291        # the given binItem) on this tag path
292        binTags = set(binTags)
293        if self.get_root_tag() not in binTags:
294            binTags.add(self.get_root_tag())
295        itemID = self.get_itemID_from_item_and_tags(binTags, binItem)
296        if itemID is not None:
297            self.remove_item_by_itemID(itemID)
298        return itemID
299
300    def __OLD__get_itemID_from_item_and_tags(self, binTags, binItem):
301        if self.get_root_tag() not in binTags:
302            binTags.append(self.get_root_tag())
303        potentialIDs = set(self.get_potential_itemIDs_from_item(binItem))
304        itemIDsSet = set(self.get_itemIDs_from_tags(binTags, SMART_TREE_TYPE))
305        resultItemIDsList = potentialIDs & itemIDsSet
306        if len(resultItemIDsList) == 0:
307            return None
308        elif len(resultItemIDsList) == 1:
309            resultItemID = resultItemIDsList.pop()  # we have assume that we'll have only one item in intersection
310                # between potential IDs and Items that have (and have only) given tag list (without another tags in the
311                # path to this items). We need to check it in the adding new item to the given tag path.
312            return resultItemID
313        elif len(resultItemIDsList) > 1:
314            raise ToManyIdenticalItemsOnTheGivenTagPathError()
315
316    def get_itemID_from_item_and_tags(self, binTags, binItem):
317        binTags = set(binTags)
318
319        if self.get_root_tag() not in binTags:
320            binTags.add(self.get_root_tag())
321
322        potentialIDs = self.get_potential_itemIDs_from_item(binItem)
323        setOfBinTagsHashes = set()
324        for tag in binTags:
325            setOfBinTagsHashes.add(tag.__hash__())
326        for itemID in potentialIDs:
327            currentItemTagsSet = self.get_tagsHashes_from_single_item(itemID, isWithoutRootHash=False)
328            if setOfBinTagsHashes == currentItemTagsSet:
329                return itemID
330        return None
331
332    def tag_hash_list_2_tag_list(self, tagHashList):
333        tagList = list()
334        for tagHash in tagHashList:
335            tagList.append(self.tagsSet[tagHash])
336        return tagList
337
338    def get_item_and_tags_from_itemID(self, itemID):
339        commonTagTupleHash = self.itemWithTags[itemID]
340        tagSet = set(self.commonTagSets[commonTagTupleHash]) - {self.get_root_tag().__hash__()}
341        sortedTagHashList = self.sort_tag_hash_list_by_qnt(tagSet)
342        result = (self.itemsSet[itemID], self.tag_hash_list_2_tag_list(sortedTagHashList))
343        return result
344
345    # @profile
346    def get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None):
347        tagsQnt = self.tagsQnt
348        if local_tags_qnt is not None:
349            tagsQnt = local_tags_qnt
350        tag_hash_set = set(tagHashSet)
351        tag_by_qnt = dict()
352        tag_by_qnt__filler = AddToCompoundDict__Set(tag_by_qnt)
353        biggest_qnt = 0
354        for tag_hash in tag_hash_set:
355            qnt = tagsQnt[tag_hash]
356            if qnt > biggest_qnt:
357                biggest_qnt = qnt
358            # if qnt not in tag_by_qnt:
359            #     tag_by_qnt[qnt] = set()
360            # tag_by_qnt[qnt].add(tag_hash)
361            tag_by_qnt__filler.add(qnt, tag_hash)
362
363        result = None
364        if len(tag_by_qnt) > 0:
365            # biggest_qnt = max(tag_by_qnt)
366            result = tuple(tag_by_qnt[biggest_qnt])
367        else:
368            result = tuple()
369        return result
370
371    def sort_tag_hash_list_by_qnt(self, tagHashSet):
372        # will return sorted tag list - not sorted tag hash list
373        tagHashSet = set(tagHashSet)
374        rawTagList = list()
375        for tagHash in tagHashSet:
376            tagWithWeight = (tagHash, self.tagsQnt[tagHash])
377            rawTagList.append(tagWithWeight)
378        return self.sort_raw_tag_list(rawTagList)
379
380    def sort_tag_list_by_qnt(self, binTags):
381        binTags = set(binTags)
382        rawTagList = list()
383        for tag in binTags:
384            tagHash = tag.__hash__()
385            tagWithWeight = (tag, self.tagsQnt[tagHash])
386            rawTagList.append(tagWithWeight)
387        return self.sort_raw_tag_list(rawTagList)
388
389    def sort_tag_hash_list_by_hash(self, tagHashSet):
390        # will return sorted tag list - not sorted tag hash list
391        tagHashSet = set(tagHashSet)
392        rawTagList = list()
393        for tagHash in tagHashSet:
394            tagWithWeight = (tagHash, tagHash)
395            rawTagList.append(tagWithWeight)
396        return self.sort_raw_tag_list(rawTagList)
397
398    def sort_tag_list_by_hash(self, binTags):
399        binTags = set(binTags)
400        rawTagList = list()
401        for tag in binTags:
402            tagHash = tag.__hash__()
403            tagWithWeight = (tag, tagHash)
404            rawTagList.append(tagWithWeight)
405        return self.sort_raw_tag_list(rawTagList)
406
407    def sort_raw_tag_list(self, rawTagList):
408        # will return sorted tag list
409        rawTagList = sorted(rawTagList, key=lambda tagAndWeight: tagAndWeight[1], reverse=True)
410        tagList = list()
411        for rawTag in rawTagList:
412            tagList.append(rawTag[0])
413        return tagList
414
415    def get_itemIDs_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
416                              isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False):
417        # TODO: исправить ошибку: SMART_TREE_TYPE: возвращает не только список файлов в текущей директории, но и из
418        # непосредственных подпапок данной папки
419
420        # treeType - type of the graph tree representation: show all tags with replies (pure representation);
421        # show only relevant tags; etc.
422        # return set of itemIDs
423
424        binTags = set(binTags)
425
426        if self.get_root_tag() not in binTags:
427            binTags.add(self.get_root_tag())
428
429        tagHashSet = set()
430        for binTag in binTags:
431            tagHashSet.add(binTag.__hash__())
432
433        # PLAIN_PSEUDO_TREE_TYPE
434        interceptionOfItemsWithTags = set()
435        if (treeType == PLAIN_PSEUDO_TREE_TYPE) or isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
436            binTagsQnt = len(tagHashSet)
437            commonTagGroupHashSet = set()
438            tagSet = set()
439            itemIDSet = set()
440            binTagHashTuple = tuple(self.sort_tag_hash_list_by_hash(tagHashSet))
441            hashOfTheBinTagHashTuple = binTagHashTuple.__hash__()
442            if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets:
443                itemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple]
444            for commonTagQnt in self.setOfTagGroupQnt:
445                if commonTagQnt > binTagsQnt:
446                    setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt]
447                    commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes)
448            for commonTagGroupHash in commonTagGroupHashSet:
449                commonTagHashTuple = self.commonTagSets[commonTagGroupHash]
450                commonTagHashSet = set(commonTagHashTuple)
451                if tagHashSet.issubset(commonTagHashSet):
452                    itemIDSet.update(self.itemsOnTheCommonTagSets[commonTagGroupHash])
453                # # if len(tagHashSet & commonTagHashSet) == len(tagHashSet):
454                # res_set = tagHashSet.intersection(commonTagHashSet)
455                # if len(res_set) == binTagsQnt:
456                #     itemIDSet = itemIDSet | self.itemsOnTheCommonTagSets[commonTagGroupHash]
457            interceptionOfItemsWithTags = itemIDSet
458
459            # isFirstHash = True
460            # for tag in binTags:
461            #     tagHash = tag.__hash__()
462            #     if tagHash in self.tagWithItems:
463            #         if isFirstHash:
464            #             interceptionOfItemsWithTags = self.tagWithItems[tagHash]
465            #             isFirstHash = False
466            #         else:
467            #             itemsWithTag = self.tagWithItems[tagHash]
468            #             interceptionOfItemsWithTags = interceptionOfItemsWithTags & itemsWithTag
469            #     else:
470            #         # TODO: произвести такую же провеку в get_items_from_tags() и build_smart_tree()
471            #         if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
472            #             result = (set(), set())
473            #             return result
474            #         else:
475            #             return set()
476
477        resultItemIDSet = interceptionOfItemsWithTags
478        setOfAllInternalItemIDsForThisSetOfTags = interceptionOfItemsWithTags
479
480        # SMART_TREE_TYPE or FULL_TREE_TYPE
481        if (treeType == SMART_TREE_TYPE) or (treeType == FULL_TREE_TYPE):
482            resultItemIDSet = set()
483            binTagHashTuple = self.sort_tag_hash_list_by_hash(tagHashSet)
484            binTagHashTuple = tuple(binTagHashTuple)
485            hashOfTheBinTagHashTuple = binTagHashTuple.__hash__()
486            if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets:
487                resultItemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple]
488
489            # filteredItemIDsSet = set()
490            # tagQnt = len(binTags)
491            # # for itemID in setOfAllInternalItemIDsForThisSetOfTags:
492            # #     if len(self.itemWithTags[itemID]) == tagQnt:
493            # #         # _TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}]
494            # #         # и вычитывать это из него
495            # #         # и/или
496            # #         # _TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashSet_1, hashOfTheTagHashSet_2, ...
497            # #         # , hashOfTheTagHashSet_3}] и вычитывать это из него, а потом уже и из каждого tagHashSet
498            # #         # где hashOfTheTagHashSet - это tagHashSet.__hash__()
499            # #         filteredItemIDsSet.add(itemID)
500            # if tagQnt in self.tagsNumPerItemID:
501            #     filteredItemIDsSet = setOfAllInternalItemIDsForThisSetOfTags & self.tagsNumPerItemID[tagQnt]
502            #
503            # resultItemIDSet = set()
504            # tagHashSet = set()
505            # for binTag in binTags:
506            #     tagHashSet.add(binTag.__hash__())
507            # for itemID in filteredItemIDsSet:
508            #     commonTagTupleHash = self.itemWithTags[itemID]
509            #     tagSet = set(self.commonTagSets[commonTagTupleHash])
510            #     if tagSet == tagHashSet:
511            #         # _TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple]
512            #         # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3}
513            #         # _TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ...
514            #         # , itemID_3}]
515            #         resultItemIDSet.add(itemID)
516        elif treeType == PLAIN_PSEUDO_TREE_TYPE:
517            # already implemented (see bellow). Don't touch this code!
518            pass
519        else:
520            raise UnknownTreeTypeError()
521
522        if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
523            result = (set(resultItemIDSet), set(setOfAllInternalItemIDsForThisSetOfTags))
524            return result
525        else:
526            return set(resultItemIDSet)
527
528    def get_items_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
529                            isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False):
530        # treeType - type of the graph tree representation: show all tags with replies (pure representation);
531        # show only relevant tags; etc.
532        # return set of itemIDs
533        binTags = set(binTags)
534        itemIDsSet = self.get_itemIDs_from_tags(binTags, treeType=treeType,
535                                                isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=
536                                                isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags)
537        if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
538            itemSet = set()
539            for itemID in itemIDsSet[0]:
540                itemSet.add(self.itemsSet[itemID])
541            result = (tuple(itemSet), tuple(itemIDsSet[1]))  # result == (usual items set, additional set of all
542                # internal itemIDs)
543            return result
544        else:
545            itemSet = set()
546            for itemID in itemIDsSet:
547                itemSet.add(self.itemsSet[itemID])
548            return tuple(itemSet)
549
550    def get_tagHashes_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
551                                prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None):
552        # where "itemIDsSet" is externally given "get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE)"
553        # so "itemIDsSet" is a set of the all items inside the "folder" binTags (including items from "subfolders")
554        # treeType - the same as in the "get_items_from_tags()" method
555        # return set of itemIDs
556        # prePreparedSetOfAllInternalItemIDsForThisSetOfTags can be generated by:
557        #   a) get_itemIDs_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE)
558        #   a) get_itemIDs_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True)
559        #   c) get_items_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True)
560        #   d) get_items_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) || BUT: it'll return item set - not itemID
561        #       set
562
563        binTags = set(binTags)
564
565        resultTagHashSet = set()
566
567        setOfAllInternalItemIDs = set()
568        if prePreparedSetOfAllInternalItemIDsForThisSetOfTags is None:
569            setOfAllInternalItemIDs = self.get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE)
570        else:
571            setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDsForThisSetOfTags
572
573        if (treeType == PLAIN_PSEUDO_TREE_TYPE) or (treeType == FULL_TREE_TYPE):
574            binTagHashes = set()
575            for tag in binTags:
576                binTagHashes.add(tag.__hash__())
577            tagHashSet = set()
578            for itemID in setOfAllInternalItemIDs:
579                if itemID in self.itemWithTags:
580                    commonTagTupleHash = self.itemWithTags[itemID]
581                    tagSet = set(self.commonTagSets[commonTagTupleHash])
582                    tagHashSet.update(tagSet)
583            resultTagHashSet = tagHashSet - binTagHashes
584        elif treeType == SMART_TREE_TYPE:
585            # smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs)
586            smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs,
587                                              zeroSliceOnly=True)
588            if 0 in smartTree:
589                resultTagHashSet = smartTree[0]
590            # filteredItemIDsList = list()
591            # tagQnt = len(binTags)
592            # for itemID in listOfAllInternalItemIDs:
593            #     if len(self.itemWithTags[itemID]) == (tagQnt + 1):
594            #         filteredItemIDsList.append(itemID)
595            #
596            # tagHashSet = set()
597            # for itemID in filteredItemIDsList:
598            #     tagHashSet.update(set(self.itemWithTags[itemID]))
599            # givenTagHashes = set()
600            # for tag in binTags:
601            #     givenTagHashes.add(tag.__hash__())
602            # tagHashSet.difference_update(givenTagHashes)
603            # ##resultTagHashList = list(tagHashSet)
604            # # если остановиться тут - то мы увидим не все папки: мы не увидим папки непосредственно в которых есть
605            # # только другие подпапки, но ни одного файла.
606            # #
607            # # значит далее мы должны исключить все файлы, которые имеют только что найденные теги, и начать строить
608            # # древо тегов для оставшихся
609            # #
610            # # а далее - повторить все это в цикле, увеличив при проверке кол-во тегов еще раз на единицу (и используя
611            # # уже оставшийся после отсеивания набор файлов). В итоге кол-во итераций зависит не от количества файлов,
612            # # а от максимальной фактически имеющейся вложенности файлов внутри тегов-каталогов
613        else:
614            raise UnknownTreeTypeError()
615
616        sortedTagHashList = self.sort_tag_hash_list_by_qnt(resultTagHashSet - {self.get_root_tag().__hash__()})
617        return list(sortedTagHashList)
618
619    def get_tags_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
620                           prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None):
621        result = self.get_tagHashes_from_tags(binTags, treeType=treeType,
622                                              prePreparedSetOfAllInternalItemIDsForThisSetOfTags=
623                                              prePreparedSetOfAllInternalItemIDsForThisSetOfTags)
624        return tuple(self.tag_hash_list_2_tag_list(result))
625
626    def build_smart_tree(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
627        startingBinTags = set(startingBinTags)
628
629        if self.get_root_tag() not in startingBinTags:
630            startingBinTags.add(self.get_root_tag())
631
632        startingTagHashes = set()
633        for tag in startingBinTags:
634            startingTagHashes.add(tag.__hash__())
635
636        if prePreparedSetOfAllInternalItemIDs is None:
637            setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE)
638        else:
639            setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs
640
641        smartTree = {0: set()}
642        smartTree__filler = AddToCompoundDict__Set(smartTree)
643        local_tags_qnt = dict()
644        local_tags_qnt__filler = AddToCompoundDict(
645            local_tags_qnt,
646            lambda: 0,
647            lambda container, key, value: (True, container[key] + 1)
648        )
649        for itemID in setOfAllInternalItemIDs:
650            commonTagTupleHash = self.itemWithTags[itemID]
651            tagSet = set(self.commonTagSets[commonTagTupleHash])
652            setOfTags = tagSet
653            setOfTags = setOfTags - startingTagHashes
654            for tag_hash in setOfTags:
655                # if tag_hash not in local_tags_qnt:
656                #     local_tags_qnt[tag_hash] = 0
657                # local_tags_qnt[tag_hash] += 1
658                local_tags_qnt__filler.add(tag_hash)
659
660        for itemID in setOfAllInternalItemIDs:
661            commonTagTupleHash = self.itemWithTags[itemID]
662            tagSet = set(self.commonTagSets[commonTagTupleHash])
663            setOfTags = tagSet
664            setOfTags = setOfTags - startingTagHashes
665            listOfTagHashes = None
666            if zeroSliceOnly:
667                listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt)
668            else:
669                listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags)
670
671            lastTagHash = None
672            lastTagHashQnt = None
673            treeLevel = 0
674            for tagHash in listOfTagHashes:
675                # currentTagHashQnt = self.tagsQnt[tagHash]
676                currentTagHashQnt = local_tags_qnt[tagHash]
677                if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt):
678                    pass
679                else:
680                    treeLevel += 1
681                    lastTagHash = None
682                    lastTagHashQnt = None
683                # if treeLevel not in smartTree:
684                #     smartTree[treeLevel] = set()
685                # # tagsSetOnTheLevel = smartTree[treeLevel]
686                # # tagsSetOnTheLevel.add(tagHash)
687                # # smartTree[treeLevel] = tagsSetOnTheLevel
688                # smartTree[treeLevel].add(tagHash)
689                smartTree__filler.add(treeLevel, tagHash)
690                lastTagHash = tagHash
691                lastTagHashQnt = currentTagHashQnt
692        return dict(smartTree)
693
694    def build_smart_tree_2(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
695        startingBinTags = set(startingBinTags)
696
697        if self.get_root_tag() not in startingBinTags:
698            startingBinTags.add(self.get_root_tag())
699
700        startingTagHashes = set()
701        for tag in startingBinTags:
702            startingTagHashes.add(tag.__hash__())
703
704        if prePreparedSetOfAllInternalItemIDs is None:
705            setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE)
706        else:
707            setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs
708
709        smartTree = {0: set()}
710        smartTree__filler = AddToCompoundDict__Set(smartTree)
711        local_tags_qnt = dict()
712        local_tags_qnt__filler = AddToCompoundDict(
713            local_tags_qnt,
714            lambda: 0,
715            lambda container, key, value: (True, container[key] + 1)
716        )
717        for itemID in setOfAllInternalItemIDs:
718            commonTagTupleHash = self.itemWithTags[itemID]
719            tagSet = set(self.commonTagSets[commonTagTupleHash])
720            setOfTags = tagSet
721            setOfTags = setOfTags - startingTagHashes
722            for tag_hash in setOfTags:
723                # if tag_hash not in local_tags_qnt:
724                #     local_tags_qnt[tag_hash] = 0
725                # local_tags_qnt[tag_hash] += 1
726                local_tags_qnt__filler.add(tag_hash)
727
728        for itemID in setOfAllInternalItemIDs:
729            commonTagTupleHash = self.itemWithTags[itemID]
730            tagSet = set(self.commonTagSets[commonTagTupleHash])
731            setOfTags = tagSet
732            setOfTags = setOfTags - startingTagHashes
733            listOfTagHashes = None
734            if zeroSliceOnly:
735                listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt)
736            else:
737                listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags)
738
739            lastTagHash = None
740            lastTagHashQnt = None
741            treeLevel = 0
742            for tagHash in listOfTagHashes:
743                # currentTagHashQnt = self.tagsQnt[tagHash]
744                currentTagHashQnt = local_tags_qnt[tagHash]
745                if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt):
746                    pass
747                else:
748                    treeLevel += 1
749                    lastTagHash = None
750                    lastTagHashQnt = None
751                # if treeLevel not in smartTree:
752                #     smartTree[treeLevel] = set()
753                # # tagsSetOnTheLevel = smartTree[treeLevel]
754                # # tagsSetOnTheLevel.add(tagHash)
755                # # smartTree[treeLevel] = tagsSetOnTheLevel
756                # smartTree[treeLevel].add(tagHash)
757                smartTree__filler.add(treeLevel, tagHash)
758                lastTagHash = tagHash
759                lastTagHashQnt = currentTagHashQnt
760        return dict(smartTree)
761
762    def get_all_from_tags(self, binTags, treeType=USUAL_TREE_TYPE):
763        binTags = set(binTags)
764        items = self.get_items_from_tags(binTags, treeType=treeType,
765                                         isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=True)
766        if len(items[1]) > 0:
767            tags = self.get_tags_from_tags(binTags, treeType=treeType,
768                                           prePreparedSetOfAllInternalItemIDsForThisSetOfTags=items[1])
769            result = (tuple(tags), set(items[0]))
770            return result
771        else:
772            result = (tuple(), set())
773            return result
774
775    def get_tagsHashes_from_single_item(self, itemID, isWithoutRootHash=True):
776        if itemID in self.itemWithTags:
777            commonTagTupleHash = self.itemWithTags[itemID]
778            tagSet = set(self.commonTagSets[commonTagTupleHash])
779            if isWithoutRootHash:
780                return set(tagSet - {self.get_root_tag().__hash__()})
781            else:
782                return set(tagSet)
783        else:
784            return set()
785
786    def get_potential_itemIDs_from_item(self, binItem):
787        itemHash = binItem.__hash__()
788        if itemHash in self.itemIDsForItem:
789            return set(self.itemIDsForItem[itemHash])
790        else:
791            return set()
792
793    def is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags):
794        binTags = set(binTags)
795
796        if self.get_root_tag() not in binTags:
797            binTags.add(self.get_root_tag())
798
799        tagHashSet = set()
800        for binTag in binTags:
801            tagHashSet.add(binTag.__hash__())
802
803        binTagsQnt = len(tagHashSet)
804        commonTagGroupHashSet = set()
805        # setOfLenOfTheCommonTagHashSetForChecking = set()
806        setOfTheTagsIntersection = None
807        for commonTagQnt in self.setOfTagGroupQnt:
808            if commonTagQnt > binTagsQnt:
809                setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt]
810                commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes)
811        for commonTagGroupHash in commonTagGroupHashSet:
812            commonTagHashTuple = self.commonTagSets[commonTagGroupHash]
813            commonTagHashSet = set(commonTagHashTuple)
814            if len(tagHashSet & commonTagHashSet) == len(tagHashSet):
815                if tagHashSet != commonTagHashSet:
816                    if setOfTheTagsIntersection is None:
817                        setOfTheTagsIntersection = commonTagHashSet
818                    else:
819                        setOfTheTagsIntersection = setOfTheTagsIntersection & commonTagHashSet
820        #         if tagHashSet != commonTagHashSet:
821        #             setOfLenOfTheCommonTagHashSetForChecking.add(len(commonTagHashSet))
822        # minimalTagPath = min(setOfLenOfTheCommonTagHashSetForChecking)
823        # pathDiff = minimalTagPath - len(tagHashSet)
824        # if pathDiff > 0:
825        if setOfTheTagsIntersection is None:
826            return list()
827
828        setOfTheTagsForAReduction = setOfTheTagsIntersection - tagHashSet
829
830        sortedTagHashList = self.sort_tag_hash_list_by_qnt(setOfTheTagsForAReduction - {self.get_root_tag().__hash__()})
831        return list(sortedTagHashList)
832
833    def get_tags_for_a_smart_redirection(self, binTags):
834        result = self.is_smart_redirection_for_a_tag_path_reduction_needed(binTags)
835        return tuple(self.tag_hash_list_2_tag_list(result))
itemsID
itemIDsForItem
itemsSet
itemWithTags
tagsNumPerItemID
tagsSet
tagWithItems
tagsQnt
commonTagSets
itemsOnTheCommonTagSets
tagsQntPerCommonTagSet
setOfTagGroupQnt
def get_root_tag(self):
119    def get_root_tag(self):
120        return str(_ROOT_TAG)
def add_tag(self, binTag):
122    def add_tag(self, binTag):
123        tagHash = binTag.__hash__()
124        self.tagsSet[tagHash] = binTag
125        if tagHash not in self.tagsSet:
126            self.tagsQnt[tagHash] = 0
127        if tagHash not in self.tagWithItems:
128            self.tagWithItems[tagHash] = set()
def remove_tag(self, binTag):
130    def remove_tag(self, binTag):
131        # will try to delete given tag. If there is at least one item with this tag, than function will fail
132        # and will return False; otherwise it will delete given tag and will return True.
133        functionResult = False
134        tagHash = binTag.__hash__()
135        if tagHash in self.tagsSet:
136            if tagHash in self.tagsQnt:
137                if self.tagsQnt[tagHash] == 0:
138                    del self.tagsSet[tagHash]
139                    del self.tagsQnt[tagHash]
140                    functionResult = True
141                else:
142                    functionResult = False
143            else:
144                del self.tagsSet[tagHash]
145                functionResult = True
146
147        if functionResult:
148            if tagHash in self.tagWithItems:
149                del self.tagWithItems[tagHash]
150
151        return functionResult
def add_item(self, binItem, binTags):
153    def add_item(self, binItem, binTags):
154        # will add new item and return it's dynamic ID or None object If this Item already exist on the given tag path
155        # Or will raise an exception if we already have more than one binItem (another item that is identical to
156        # the given binItem)  on this tag path
157
158        binTags = set(binTags)
159
160        if self.get_root_tag() not in binTags:
161            binTags.add(self.get_root_tag())
162
163        # may raise an exception in this place. Nope - from now it will be not
164        if self.get_itemID_from_item_and_tags(binTags, binItem) is not None:
165            return None
166
167        itemID = self.itemsID()
168        self.itemsSet[itemID] = binItem
169
170        itemHash = binItem.__hash__()
171        if itemHash in self.itemIDsForItem:
172            IDsSet = self.itemIDsForItem[itemHash]
173            IDsSet.add(itemID)
174            # self.itemIDsForItem[itemHash] = IDsSet
175        else:
176            self.itemIDsForItem[itemHash] = {itemID}
177
178        tagQnt = len(binTags)
179        if tagQnt in self.tagsNumPerItemID:
180            itemIDsSet = self.tagsNumPerItemID[tagQnt]
181            itemIDsSet.add(itemID)
182            # self.tagsNumPerItemID[tagQnt] = itemIDsSet
183        else:
184            self.tagsNumPerItemID[tagQnt] = {itemID}
185
186        binTagHashes = set()
187
188        for tag in binTags:
189            self.add_tag(tag)
190            tagHash = tag.__hash__()
191            binTagHashes.add(tagHash)
192            setOfItems = self.tagWithItems[tagHash]
193            if itemID not in setOfItems:
194                setOfItems.add(itemID)
195                if tagHash in self.tagsQnt:
196                    self.tagsQnt[tagHash] += 1
197                else:
198                    self.tagsQnt[tagHash] = 1
199            self.tagWithItems[tagHash] = setOfItems
200
201        sortedTagTuple = tuple(self.sort_tag_hash_list_by_hash(binTagHashes))
202        hashOfTheSortedTagTuple = sortedTagTuple.__hash__()
203        self.itemWithTags[itemID] = hashOfTheSortedTagTuple
204
205        self.commonTagSets[hashOfTheSortedTagTuple] = sortedTagTuple
206
207        if hashOfTheSortedTagTuple in self.itemsOnTheCommonTagSets:
208            itemIDsSet = self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple]
209            itemIDsSet.add(itemID)
210            # self.itemsOnTheCommonTagSets[tagQnt] = itemIDsSet
211        else:
212            self.itemsOnTheCommonTagSets[hashOfTheSortedTagTuple] = {itemID}
213
214        lenOfTheSortedTagTuple = len(sortedTagTuple)
215        if lenOfTheSortedTagTuple in self.tagsQntPerCommonTagSet:
216            itemIDsSet = self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple]
217            itemIDsSet.add(hashOfTheSortedTagTuple)
218            # self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = itemIDsSet
219        else:
220            self.tagsQntPerCommonTagSet[lenOfTheSortedTagTuple] = {hashOfTheSortedTagTuple}
221            self.setOfTagGroupQnt.add(lenOfTheSortedTagTuple)
222
223        return itemID
def remove_item_by_itemID(self, itemID):
225    def remove_item_by_itemID(self, itemID):
226        if itemID in self.itemsSet:
227            itemHash = self.itemsSet[itemID].__hash__()
228            del self.itemsSet[itemID]
229
230            if itemHash in self.itemIDsForItem:
231                IDsSet = self.itemIDsForItem[itemHash]
232                IDsSet.difference_update({itemID})
233                # self.itemIDsForItem[itemHash] = IDsSet
234                if len(IDsSet) < 1:
235                    del self.itemIDsForItem[itemHash]
236
237        if itemID in self.itemWithTags:
238            commonTagTupleHash = self.itemWithTags[itemID]
239            tagTuple = self.commonTagSets[commonTagTupleHash]
240            numberOfTags = len(tagTuple)
241
242            if commonTagTupleHash in self.itemsOnTheCommonTagSets:
243                IDsSet = self.itemsOnTheCommonTagSets[commonTagTupleHash]
244                IDsSet.difference_update({itemID})
245                # self.itemsOnTheCommonTagSets[commonTagTupleHash] = IDsSet
246                if len(IDsSet) < 1:
247                    del self.itemsOnTheCommonTagSets[commonTagTupleHash]
248                    del self.commonTagSets[commonTagTupleHash]
249                    if numberOfTags in self.tagsQntPerCommonTagSet:
250                        setOfTagTuplesHashes = self.tagsQntPerCommonTagSet[numberOfTags]
251                        setOfTagTuplesHashes.difference_update({numberOfTags})
252                        # self.tagsQntPerCommonTagSet[numberOfTags] = setOfTagTuplesHashes
253                        if len(setOfTagTuplesHashes) < 1:
254                            del self.tagsQntPerCommonTagSet[numberOfTags]
255                            self.setOfTagGroupQnt.difference_update({numberOfTags})
256
257            del self.itemWithTags[itemID]
258
259            setOfTagHashes = set(tagTuple)
260
261            tagsQnt = len(setOfTagHashes)
262            if tagsQnt in self.tagsNumPerItemID:
263                IDsSet = self.tagsNumPerItemID[tagsQnt]
264                IDsSet.difference_update({itemID})
265                # self.tagsNumPerItemID[tagsQnt] = IDsSet
266                if len(IDsSet) < 1:
267                    del self.tagsNumPerItemID[tagsQnt]
268
269            for tagHash in setOfTagHashes:
270                if tagHash in self.tagsQnt:
271                    tagsQuantity = self.tagsQnt[tagHash]
272                    tagsQuantity -= 1
273                    if tagsQuantity < 1:
274                        tagsQuantity = 0
275                    self.tagsQnt[tagHash] = tagsQuantity
276                    if tagsQuantity < 1:
277                        del self.tagsQnt[tagHash]
278                        del self.tagsSet[tagHash]
279                if tagHash in self.tagWithItems:
280                    IDsSet = self.tagWithItems[tagHash]
281                    IDsSet.difference_update({itemID})
282                    if len(IDsSet) < 1:
283                        del self.tagWithItems[tagHash]
284                    # self.tagWithItems[tagHash] = IDsSet
285
286        self.itemsID.remove_id(itemID)
def remove_item(self, binTags, binItem):
288    def remove_item(self, binTags, binItem):
289        # will return ItemId for deleted item or None object if Item is not exist
290        # Or will raise an exception if we already have more than one binItem (another item that is identical to
291        # the given binItem) on this tag path
292        binTags = set(binTags)
293        if self.get_root_tag() not in binTags:
294            binTags.add(self.get_root_tag())
295        itemID = self.get_itemID_from_item_and_tags(binTags, binItem)
296        if itemID is not None:
297            self.remove_item_by_itemID(itemID)
298        return itemID
def get_itemID_from_item_and_tags(self, binTags, binItem):
316    def get_itemID_from_item_and_tags(self, binTags, binItem):
317        binTags = set(binTags)
318
319        if self.get_root_tag() not in binTags:
320            binTags.add(self.get_root_tag())
321
322        potentialIDs = self.get_potential_itemIDs_from_item(binItem)
323        setOfBinTagsHashes = set()
324        for tag in binTags:
325            setOfBinTagsHashes.add(tag.__hash__())
326        for itemID in potentialIDs:
327            currentItemTagsSet = self.get_tagsHashes_from_single_item(itemID, isWithoutRootHash=False)
328            if setOfBinTagsHashes == currentItemTagsSet:
329                return itemID
330        return None
def tag_hash_list_2_tag_list(self, tagHashList):
332    def tag_hash_list_2_tag_list(self, tagHashList):
333        tagList = list()
334        for tagHash in tagHashList:
335            tagList.append(self.tagsSet[tagHash])
336        return tagList
def get_item_and_tags_from_itemID(self, itemID):
338    def get_item_and_tags_from_itemID(self, itemID):
339        commonTagTupleHash = self.itemWithTags[itemID]
340        tagSet = set(self.commonTagSets[commonTagTupleHash]) - {self.get_root_tag().__hash__()}
341        sortedTagHashList = self.sort_tag_hash_list_by_qnt(tagSet)
342        result = (self.itemsSet[itemID], self.tag_hash_list_2_tag_list(sortedTagHashList))
343        return result
def get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None):
346    def get_top_tag_hash_list_by_qnt(self, tagHashSet, local_tags_qnt=None):
347        tagsQnt = self.tagsQnt
348        if local_tags_qnt is not None:
349            tagsQnt = local_tags_qnt
350        tag_hash_set = set(tagHashSet)
351        tag_by_qnt = dict()
352        tag_by_qnt__filler = AddToCompoundDict__Set(tag_by_qnt)
353        biggest_qnt = 0
354        for tag_hash in tag_hash_set:
355            qnt = tagsQnt[tag_hash]
356            if qnt > biggest_qnt:
357                biggest_qnt = qnt
358            # if qnt not in tag_by_qnt:
359            #     tag_by_qnt[qnt] = set()
360            # tag_by_qnt[qnt].add(tag_hash)
361            tag_by_qnt__filler.add(qnt, tag_hash)
362
363        result = None
364        if len(tag_by_qnt) > 0:
365            # biggest_qnt = max(tag_by_qnt)
366            result = tuple(tag_by_qnt[biggest_qnt])
367        else:
368            result = tuple()
369        return result
def sort_tag_hash_list_by_qnt(self, tagHashSet):
371    def sort_tag_hash_list_by_qnt(self, tagHashSet):
372        # will return sorted tag list - not sorted tag hash list
373        tagHashSet = set(tagHashSet)
374        rawTagList = list()
375        for tagHash in tagHashSet:
376            tagWithWeight = (tagHash, self.tagsQnt[tagHash])
377            rawTagList.append(tagWithWeight)
378        return self.sort_raw_tag_list(rawTagList)
def sort_tag_list_by_qnt(self, binTags):
380    def sort_tag_list_by_qnt(self, binTags):
381        binTags = set(binTags)
382        rawTagList = list()
383        for tag in binTags:
384            tagHash = tag.__hash__()
385            tagWithWeight = (tag, self.tagsQnt[tagHash])
386            rawTagList.append(tagWithWeight)
387        return self.sort_raw_tag_list(rawTagList)
def sort_tag_hash_list_by_hash(self, tagHashSet):
389    def sort_tag_hash_list_by_hash(self, tagHashSet):
390        # will return sorted tag list - not sorted tag hash list
391        tagHashSet = set(tagHashSet)
392        rawTagList = list()
393        for tagHash in tagHashSet:
394            tagWithWeight = (tagHash, tagHash)
395            rawTagList.append(tagWithWeight)
396        return self.sort_raw_tag_list(rawTagList)
def sort_tag_list_by_hash(self, binTags):
398    def sort_tag_list_by_hash(self, binTags):
399        binTags = set(binTags)
400        rawTagList = list()
401        for tag in binTags:
402            tagHash = tag.__hash__()
403            tagWithWeight = (tag, tagHash)
404            rawTagList.append(tagWithWeight)
405        return self.sort_raw_tag_list(rawTagList)
def sort_raw_tag_list(self, rawTagList):
407    def sort_raw_tag_list(self, rawTagList):
408        # will return sorted tag list
409        rawTagList = sorted(rawTagList, key=lambda tagAndWeight: tagAndWeight[1], reverse=True)
410        tagList = list()
411        for rawTag in rawTagList:
412            tagList.append(rawTag[0])
413        return tagList
def get_itemIDs_from_tags( self, binTags, treeType=3, isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False):
415    def get_itemIDs_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
416                              isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False):
417        # TODO: исправить ошибку: SMART_TREE_TYPE: возвращает не только список файлов в текущей директории, но и из
418        # непосредственных подпапок данной папки
419
420        # treeType - type of the graph tree representation: show all tags with replies (pure representation);
421        # show only relevant tags; etc.
422        # return set of itemIDs
423
424        binTags = set(binTags)
425
426        if self.get_root_tag() not in binTags:
427            binTags.add(self.get_root_tag())
428
429        tagHashSet = set()
430        for binTag in binTags:
431            tagHashSet.add(binTag.__hash__())
432
433        # PLAIN_PSEUDO_TREE_TYPE
434        interceptionOfItemsWithTags = set()
435        if (treeType == PLAIN_PSEUDO_TREE_TYPE) or isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
436            binTagsQnt = len(tagHashSet)
437            commonTagGroupHashSet = set()
438            tagSet = set()
439            itemIDSet = set()
440            binTagHashTuple = tuple(self.sort_tag_hash_list_by_hash(tagHashSet))
441            hashOfTheBinTagHashTuple = binTagHashTuple.__hash__()
442            if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets:
443                itemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple]
444            for commonTagQnt in self.setOfTagGroupQnt:
445                if commonTagQnt > binTagsQnt:
446                    setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt]
447                    commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes)
448            for commonTagGroupHash in commonTagGroupHashSet:
449                commonTagHashTuple = self.commonTagSets[commonTagGroupHash]
450                commonTagHashSet = set(commonTagHashTuple)
451                if tagHashSet.issubset(commonTagHashSet):
452                    itemIDSet.update(self.itemsOnTheCommonTagSets[commonTagGroupHash])
453                # # if len(tagHashSet & commonTagHashSet) == len(tagHashSet):
454                # res_set = tagHashSet.intersection(commonTagHashSet)
455                # if len(res_set) == binTagsQnt:
456                #     itemIDSet = itemIDSet | self.itemsOnTheCommonTagSets[commonTagGroupHash]
457            interceptionOfItemsWithTags = itemIDSet
458
459            # isFirstHash = True
460            # for tag in binTags:
461            #     tagHash = tag.__hash__()
462            #     if tagHash in self.tagWithItems:
463            #         if isFirstHash:
464            #             interceptionOfItemsWithTags = self.tagWithItems[tagHash]
465            #             isFirstHash = False
466            #         else:
467            #             itemsWithTag = self.tagWithItems[tagHash]
468            #             interceptionOfItemsWithTags = interceptionOfItemsWithTags & itemsWithTag
469            #     else:
470            #         # TODO: произвести такую же провеку в get_items_from_tags() и build_smart_tree()
471            #         if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
472            #             result = (set(), set())
473            #             return result
474            #         else:
475            #             return set()
476
477        resultItemIDSet = interceptionOfItemsWithTags
478        setOfAllInternalItemIDsForThisSetOfTags = interceptionOfItemsWithTags
479
480        # SMART_TREE_TYPE or FULL_TREE_TYPE
481        if (treeType == SMART_TREE_TYPE) or (treeType == FULL_TREE_TYPE):
482            resultItemIDSet = set()
483            binTagHashTuple = self.sort_tag_hash_list_by_hash(tagHashSet)
484            binTagHashTuple = tuple(binTagHashTuple)
485            hashOfTheBinTagHashTuple = binTagHashTuple.__hash__()
486            if hashOfTheBinTagHashTuple in self.itemsOnTheCommonTagSets:
487                resultItemIDSet = self.itemsOnTheCommonTagSets[hashOfTheBinTagHashTuple]
488
489            # filteredItemIDsSet = set()
490            # tagQnt = len(binTags)
491            # # for itemID in setOfAllInternalItemIDsForThisSetOfTags:
492            # #     if len(self.itemWithTags[itemID]) == tagQnt:
493            # #         # _TODO: добавить словарь вида [tagQnt:{itemID_1, itemID_2, ..., itemID_3}]
494            # #         # и вычитывать это из него
495            # #         # и/или
496            # #         # _TODO: добавить словарь вида [tagQnt:{hashOfTheTagHashSet_1, hashOfTheTagHashSet_2, ...
497            # #         # , hashOfTheTagHashSet_3}] и вычитывать это из него, а потом уже и из каждого tagHashSet
498            # #         # где hashOfTheTagHashSet - это tagHashSet.__hash__()
499            # #         filteredItemIDsSet.add(itemID)
500            # if tagQnt in self.tagsNumPerItemID:
501            #     filteredItemIDsSet = setOfAllInternalItemIDsForThisSetOfTags & self.tagsNumPerItemID[tagQnt]
502            #
503            # resultItemIDSet = set()
504            # tagHashSet = set()
505            # for binTag in binTags:
506            #     tagHashSet.add(binTag.__hash__())
507            # for itemID in filteredItemIDsSet:
508            #     commonTagTupleHash = self.itemWithTags[itemID]
509            #     tagSet = set(self.commonTagSets[commonTagTupleHash])
510            #     if tagSet == tagHashSet:
511            #         # _TODO: добавить словарь tagHashTuplesIDs вида [tagHashTuple.__hash__():tagHashTuple]
512            #         # где tagHashSet - это {tagHash_1, tagHash_2, ..., tagHash_3}
513            #         # _TODO: добавить словарь itemsOnThePath вида [tagHashTuple.__hash__():{itemID_1, itemID_2, ...
514            #         # , itemID_3}]
515            #         resultItemIDSet.add(itemID)
516        elif treeType == PLAIN_PSEUDO_TREE_TYPE:
517            # already implemented (see bellow). Don't touch this code!
518            pass
519        else:
520            raise UnknownTreeTypeError()
521
522        if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
523            result = (set(resultItemIDSet), set(setOfAllInternalItemIDsForThisSetOfTags))
524            return result
525        else:
526            return set(resultItemIDSet)
def get_items_from_tags( self, binTags, treeType=3, isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False):
528    def get_items_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
529                            isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=False):
530        # treeType - type of the graph tree representation: show all tags with replies (pure representation);
531        # show only relevant tags; etc.
532        # return set of itemIDs
533        binTags = set(binTags)
534        itemIDsSet = self.get_itemIDs_from_tags(binTags, treeType=treeType,
535                                                isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=
536                                                isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags)
537        if isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags:
538            itemSet = set()
539            for itemID in itemIDsSet[0]:
540                itemSet.add(self.itemsSet[itemID])
541            result = (tuple(itemSet), tuple(itemIDsSet[1]))  # result == (usual items set, additional set of all
542                # internal itemIDs)
543            return result
544        else:
545            itemSet = set()
546            for itemID in itemIDsSet:
547                itemSet.add(self.itemsSet[itemID])
548            return tuple(itemSet)
def get_tagHashes_from_tags( self, binTags, treeType=3, prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None):
550    def get_tagHashes_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
551                                prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None):
552        # where "itemIDsSet" is externally given "get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE)"
553        # so "itemIDsSet" is a set of the all items inside the "folder" binTags (including items from "subfolders")
554        # treeType - the same as in the "get_items_from_tags()" method
555        # return set of itemIDs
556        # prePreparedSetOfAllInternalItemIDsForThisSetOfTags can be generated by:
557        #   a) get_itemIDs_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE)
558        #   a) get_itemIDs_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True)
559        #   c) get_items_from_tags(..., isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags = True)
560        #   d) get_items_from_tags(..., treeType=PLAIN_PSEUDO_TREE_TYPE) || BUT: it'll return item set - not itemID
561        #       set
562
563        binTags = set(binTags)
564
565        resultTagHashSet = set()
566
567        setOfAllInternalItemIDs = set()
568        if prePreparedSetOfAllInternalItemIDsForThisSetOfTags is None:
569            setOfAllInternalItemIDs = self.get_itemIDs_from_tags(binTags, treeType=PLAIN_PSEUDO_TREE_TYPE)
570        else:
571            setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDsForThisSetOfTags
572
573        if (treeType == PLAIN_PSEUDO_TREE_TYPE) or (treeType == FULL_TREE_TYPE):
574            binTagHashes = set()
575            for tag in binTags:
576                binTagHashes.add(tag.__hash__())
577            tagHashSet = set()
578            for itemID in setOfAllInternalItemIDs:
579                if itemID in self.itemWithTags:
580                    commonTagTupleHash = self.itemWithTags[itemID]
581                    tagSet = set(self.commonTagSets[commonTagTupleHash])
582                    tagHashSet.update(tagSet)
583            resultTagHashSet = tagHashSet - binTagHashes
584        elif treeType == SMART_TREE_TYPE:
585            # smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs)
586            smartTree = self.build_smart_tree(binTags, prePreparedSetOfAllInternalItemIDs=setOfAllInternalItemIDs,
587                                              zeroSliceOnly=True)
588            if 0 in smartTree:
589                resultTagHashSet = smartTree[0]
590            # filteredItemIDsList = list()
591            # tagQnt = len(binTags)
592            # for itemID in listOfAllInternalItemIDs:
593            #     if len(self.itemWithTags[itemID]) == (tagQnt + 1):
594            #         filteredItemIDsList.append(itemID)
595            #
596            # tagHashSet = set()
597            # for itemID in filteredItemIDsList:
598            #     tagHashSet.update(set(self.itemWithTags[itemID]))
599            # givenTagHashes = set()
600            # for tag in binTags:
601            #     givenTagHashes.add(tag.__hash__())
602            # tagHashSet.difference_update(givenTagHashes)
603            # ##resultTagHashList = list(tagHashSet)
604            # # если остановиться тут - то мы увидим не все папки: мы не увидим папки непосредственно в которых есть
605            # # только другие подпапки, но ни одного файла.
606            # #
607            # # значит далее мы должны исключить все файлы, которые имеют только что найденные теги, и начать строить
608            # # древо тегов для оставшихся
609            # #
610            # # а далее - повторить все это в цикле, увеличив при проверке кол-во тегов еще раз на единицу (и используя
611            # # уже оставшийся после отсеивания набор файлов). В итоге кол-во итераций зависит не от количества файлов,
612            # # а от максимальной фактически имеющейся вложенности файлов внутри тегов-каталогов
613        else:
614            raise UnknownTreeTypeError()
615
616        sortedTagHashList = self.sort_tag_hash_list_by_qnt(resultTagHashSet - {self.get_root_tag().__hash__()})
617        return list(sortedTagHashList)
def get_tags_from_tags( self, binTags, treeType=3, prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None):
619    def get_tags_from_tags(self, binTags, treeType=USUAL_TREE_TYPE,
620                           prePreparedSetOfAllInternalItemIDsForThisSetOfTags=None):
621        result = self.get_tagHashes_from_tags(binTags, treeType=treeType,
622                                              prePreparedSetOfAllInternalItemIDsForThisSetOfTags=
623                                              prePreparedSetOfAllInternalItemIDsForThisSetOfTags)
624        return tuple(self.tag_hash_list_2_tag_list(result))
def build_smart_tree( self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
626    def build_smart_tree(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
627        startingBinTags = set(startingBinTags)
628
629        if self.get_root_tag() not in startingBinTags:
630            startingBinTags.add(self.get_root_tag())
631
632        startingTagHashes = set()
633        for tag in startingBinTags:
634            startingTagHashes.add(tag.__hash__())
635
636        if prePreparedSetOfAllInternalItemIDs is None:
637            setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE)
638        else:
639            setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs
640
641        smartTree = {0: set()}
642        smartTree__filler = AddToCompoundDict__Set(smartTree)
643        local_tags_qnt = dict()
644        local_tags_qnt__filler = AddToCompoundDict(
645            local_tags_qnt,
646            lambda: 0,
647            lambda container, key, value: (True, container[key] + 1)
648        )
649        for itemID in setOfAllInternalItemIDs:
650            commonTagTupleHash = self.itemWithTags[itemID]
651            tagSet = set(self.commonTagSets[commonTagTupleHash])
652            setOfTags = tagSet
653            setOfTags = setOfTags - startingTagHashes
654            for tag_hash in setOfTags:
655                # if tag_hash not in local_tags_qnt:
656                #     local_tags_qnt[tag_hash] = 0
657                # local_tags_qnt[tag_hash] += 1
658                local_tags_qnt__filler.add(tag_hash)
659
660        for itemID in setOfAllInternalItemIDs:
661            commonTagTupleHash = self.itemWithTags[itemID]
662            tagSet = set(self.commonTagSets[commonTagTupleHash])
663            setOfTags = tagSet
664            setOfTags = setOfTags - startingTagHashes
665            listOfTagHashes = None
666            if zeroSliceOnly:
667                listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt)
668            else:
669                listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags)
670
671            lastTagHash = None
672            lastTagHashQnt = None
673            treeLevel = 0
674            for tagHash in listOfTagHashes:
675                # currentTagHashQnt = self.tagsQnt[tagHash]
676                currentTagHashQnt = local_tags_qnt[tagHash]
677                if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt):
678                    pass
679                else:
680                    treeLevel += 1
681                    lastTagHash = None
682                    lastTagHashQnt = None
683                # if treeLevel not in smartTree:
684                #     smartTree[treeLevel] = set()
685                # # tagsSetOnTheLevel = smartTree[treeLevel]
686                # # tagsSetOnTheLevel.add(tagHash)
687                # # smartTree[treeLevel] = tagsSetOnTheLevel
688                # smartTree[treeLevel].add(tagHash)
689                smartTree__filler.add(treeLevel, tagHash)
690                lastTagHash = tagHash
691                lastTagHashQnt = currentTagHashQnt
692        return dict(smartTree)
def build_smart_tree_2( self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
694    def build_smart_tree_2(self, startingBinTags, prePreparedSetOfAllInternalItemIDs=None, zeroSliceOnly=False):
695        startingBinTags = set(startingBinTags)
696
697        if self.get_root_tag() not in startingBinTags:
698            startingBinTags.add(self.get_root_tag())
699
700        startingTagHashes = set()
701        for tag in startingBinTags:
702            startingTagHashes.add(tag.__hash__())
703
704        if prePreparedSetOfAllInternalItemIDs is None:
705            setOfAllInternalItemIDs = self.get_itemIDs_from_tags(startingBinTags, treeType=PLAIN_PSEUDO_TREE_TYPE)
706        else:
707            setOfAllInternalItemIDs = prePreparedSetOfAllInternalItemIDs
708
709        smartTree = {0: set()}
710        smartTree__filler = AddToCompoundDict__Set(smartTree)
711        local_tags_qnt = dict()
712        local_tags_qnt__filler = AddToCompoundDict(
713            local_tags_qnt,
714            lambda: 0,
715            lambda container, key, value: (True, container[key] + 1)
716        )
717        for itemID in setOfAllInternalItemIDs:
718            commonTagTupleHash = self.itemWithTags[itemID]
719            tagSet = set(self.commonTagSets[commonTagTupleHash])
720            setOfTags = tagSet
721            setOfTags = setOfTags - startingTagHashes
722            for tag_hash in setOfTags:
723                # if tag_hash not in local_tags_qnt:
724                #     local_tags_qnt[tag_hash] = 0
725                # local_tags_qnt[tag_hash] += 1
726                local_tags_qnt__filler.add(tag_hash)
727
728        for itemID in setOfAllInternalItemIDs:
729            commonTagTupleHash = self.itemWithTags[itemID]
730            tagSet = set(self.commonTagSets[commonTagTupleHash])
731            setOfTags = tagSet
732            setOfTags = setOfTags - startingTagHashes
733            listOfTagHashes = None
734            if zeroSliceOnly:
735                listOfTagHashes = self.get_top_tag_hash_list_by_qnt(setOfTags, local_tags_qnt)
736            else:
737                listOfTagHashes = self.sort_tag_hash_list_by_qnt(setOfTags)
738
739            lastTagHash = None
740            lastTagHashQnt = None
741            treeLevel = 0
742            for tagHash in listOfTagHashes:
743                # currentTagHashQnt = self.tagsQnt[tagHash]
744                currentTagHashQnt = local_tags_qnt[tagHash]
745                if (lastTagHash is None) or (currentTagHashQnt == lastTagHashQnt):
746                    pass
747                else:
748                    treeLevel += 1
749                    lastTagHash = None
750                    lastTagHashQnt = None
751                # if treeLevel not in smartTree:
752                #     smartTree[treeLevel] = set()
753                # # tagsSetOnTheLevel = smartTree[treeLevel]
754                # # tagsSetOnTheLevel.add(tagHash)
755                # # smartTree[treeLevel] = tagsSetOnTheLevel
756                # smartTree[treeLevel].add(tagHash)
757                smartTree__filler.add(treeLevel, tagHash)
758                lastTagHash = tagHash
759                lastTagHashQnt = currentTagHashQnt
760        return dict(smartTree)
def get_all_from_tags(self, binTags, treeType=3):
762    def get_all_from_tags(self, binTags, treeType=USUAL_TREE_TYPE):
763        binTags = set(binTags)
764        items = self.get_items_from_tags(binTags, treeType=treeType,
765                                         isAlsoNeedSetOfAllInternalItemIDsForThisSetOfTags=True)
766        if len(items[1]) > 0:
767            tags = self.get_tags_from_tags(binTags, treeType=treeType,
768                                           prePreparedSetOfAllInternalItemIDsForThisSetOfTags=items[1])
769            result = (tuple(tags), set(items[0]))
770            return result
771        else:
772            result = (tuple(), set())
773            return result
def get_tagsHashes_from_single_item(self, itemID, isWithoutRootHash=True):
775    def get_tagsHashes_from_single_item(self, itemID, isWithoutRootHash=True):
776        if itemID in self.itemWithTags:
777            commonTagTupleHash = self.itemWithTags[itemID]
778            tagSet = set(self.commonTagSets[commonTagTupleHash])
779            if isWithoutRootHash:
780                return set(tagSet - {self.get_root_tag().__hash__()})
781            else:
782                return set(tagSet)
783        else:
784            return set()
def get_potential_itemIDs_from_item(self, binItem):
786    def get_potential_itemIDs_from_item(self, binItem):
787        itemHash = binItem.__hash__()
788        if itemHash in self.itemIDsForItem:
789            return set(self.itemIDsForItem[itemHash])
790        else:
791            return set()
def is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags):
793    def is_smart_redirection_for_a_tag_path_reduction_needed(self, binTags):
794        binTags = set(binTags)
795
796        if self.get_root_tag() not in binTags:
797            binTags.add(self.get_root_tag())
798
799        tagHashSet = set()
800        for binTag in binTags:
801            tagHashSet.add(binTag.__hash__())
802
803        binTagsQnt = len(tagHashSet)
804        commonTagGroupHashSet = set()
805        # setOfLenOfTheCommonTagHashSetForChecking = set()
806        setOfTheTagsIntersection = None
807        for commonTagQnt in self.setOfTagGroupQnt:
808            if commonTagQnt > binTagsQnt:
809                setOfTheCommonTagGroupHashes = self.tagsQntPerCommonTagSet[commonTagQnt]
810                commonTagGroupHashSet.update(setOfTheCommonTagGroupHashes)
811        for commonTagGroupHash in commonTagGroupHashSet:
812            commonTagHashTuple = self.commonTagSets[commonTagGroupHash]
813            commonTagHashSet = set(commonTagHashTuple)
814            if len(tagHashSet & commonTagHashSet) == len(tagHashSet):
815                if tagHashSet != commonTagHashSet:
816                    if setOfTheTagsIntersection is None:
817                        setOfTheTagsIntersection = commonTagHashSet
818                    else:
819                        setOfTheTagsIntersection = setOfTheTagsIntersection & commonTagHashSet
820        #         if tagHashSet != commonTagHashSet:
821        #             setOfLenOfTheCommonTagHashSetForChecking.add(len(commonTagHashSet))
822        # minimalTagPath = min(setOfLenOfTheCommonTagHashSetForChecking)
823        # pathDiff = minimalTagPath - len(tagHashSet)
824        # if pathDiff > 0:
825        if setOfTheTagsIntersection is None:
826            return list()
827
828        setOfTheTagsForAReduction = setOfTheTagsIntersection - tagHashSet
829
830        sortedTagHashList = self.sort_tag_hash_list_by_qnt(setOfTheTagsForAReduction - {self.get_root_tag().__hash__()})
831        return list(sortedTagHashList)
def get_tags_for_a_smart_redirection(self, binTags):
833    def get_tags_for_a_smart_redirection(self, binTags):
834        result = self.is_smart_redirection_for_a_tag_path_reduction_needed(binTags)
835        return tuple(self.tag_hash_list_2_tag_list(result))