source: trunk/LeMillCatalogTool.py @ 3127

Revision 3127, 41.4 KB checked in by jukka, 9 years ago (diff)

Cleaned a bit.

Line 
1#
2# Plone CatalogTool simplified -- does not use ExtensibleIndexableObjectWrapper, workflows or permissions. Basically just ZCatalog posing as Plone Tool, allowing but ignoring plonish commands.
3#
4
5from Products.CMFPlone import ToolNames
6from AccessControl import ClassSecurityInfo
7from Globals import InitializeClass
8from Globals import DTMLFile
9
10from Products.ZCatalog.ZCatalog import ZCatalog
11from Products.ZCTextIndex.ParseTree import ParseError
12from Products.CMFPlone.CatalogTool import CatalogTool as PloneCatalogTool
13from Products.CMFPlone.PloneBatch import Batch
14from Products.PythonScripts.standard import urlencode
15
16from zope.interface import implements
17
18from Products.CMFCore.utils import SimpleRecord, getToolByName
19from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
20from Products.ZCTextIndex.Lexicon import CaseNormalizer
21from Products.ZCTextIndex.Lexicon import Splitter
22from Products.ZCTextIndex.Lexicon import StopWordRemover
23
24from Products.CMFPlone.UnicodeSplitter import Splitter as UnicodeSplitter
25from Products.CMFPlone.UnicodeSplitter import CaseNormalizer as UnicodeCaseNormalizer
26from Products.ZCTextIndex.ZCTextIndex import PLexicon
27from Products.ZCatalog.Lazy import Lazy, LazyMap
28from random import randint
29from config import CONTENT_TYPES, MATERIAL_TYPES, ACTIVITY_TYPES, TOOLS_TYPES, LANGUAGES, LANGUAGES_DICT, TYPE_NAMES, SUBJECT_AREAS_DICT, TARGET_GROUPS_DICT, SUBJECT_AREAS_INVERSE_DICT, TARGET_GROUPS_INVERSE_DICT, COMMUNITY_TYPES, SEARCHABLE_TYPES, DEFAULT_ICONS, TARGET_GROUP
30import time
31from math import log
32from messagefactory_ import i18nme as _
33
34class CatalogTool(PloneCatalogTool):
35
36    meta_type = 'LeMill Catalog Tool'
37    security = ClassSecurityInfo()
38    toolicon = 'skins/lemill/tool.gif'
39
40    __implements__ = PloneCatalogTool.__implements__
41
42    # Originally CatalogTool had some simplified some expensive stuff from Plone's Catalog tool and added some
43    # methods allowing us more control over indexing / unindexing
44   
45    # For LeMill 3.0,
46
47    security.declarePublic( 'enumerateIndexes' ) # Subclass can call
48    def enumerateIndexes( self ):
49        #   Return a list of ( index_name, type, extra ) tuples for the initial
50        #   index set.
51        #   Creator is deprecated and may go away, use listCreators!
52        #   meta_type is deprecated and may go away, use portal_type!
53        plaintext_extra = SimpleRecord( lexicon_id='plaintext_lexicon'
54                                      , index_type='Okapi BM25 Rank'
55                                      )
56        htmltext_extra = SimpleRecord( lexicon_id='htmltext_lexicon'
57                                     , index_type='Okapi BM25 Rank'
58                                     )
59        # Adding plone_lexicon as it should be, no nee dto change the name for our own >>> safer that way
60        plone_extra = SimpleRecord( lexicon_id='plone_lexicon'
61                                  , index_type='Okapi BM25 Rank'
62                                  )
63
64        return ( ('Title', 'ZCTextIndex', plone_extra)
65               , ('Subject', 'KeywordIndex', None)
66               , ('Description', 'ZCTextIndex', plone_extra)
67               , ('Creator', 'FieldIndex', None)
68               , ('listCreators', 'KeywordIndex', None)
69               , ('SearchableText', 'ZCTextIndex', plone_extra)
70               , ('Date', 'DateIndex', None)
71               , ('Type', 'FieldIndex', None)
72               , ('created', 'DateIndex', None)
73               , ('effective', 'DateIndex', None)
74               , ('expires', 'DateIndex', None)
75               , ('modified', 'DateIndex', None)
76               , ('allowedRolesAndUsers', 'KeywordIndex', None)
77               , ('review_state', 'FieldIndex', None)
78               , ('in_reply_to', 'FieldIndex', None)
79               , ('meta_type', 'FieldIndex', None)
80               , ('getId', 'FieldIndex', None)
81               , ('path', 'PathIndex', None)
82               , ('portal_type', 'FieldIndex', None)
83               )
84
85    security.declarePublic('enumerateLexicons')
86    def enumerateLexicons(self):
87        # Creating our own lexicon in a different way, as the automatic creation seems to be needing something to act as a StopWordRemover and fails miserably without it.
88        lexicon = PLexicon('plone_lexicon', '', UnicodeSplitter(), UnicodeCaseNormalizer())
89        self._setObject('plone_lexicon', lexicon)
90        return (
91                 ( 'plaintext_lexicon'
92                 , Splitter()
93                 , CaseNormalizer()
94                 , StopWordRemover()
95                 )
96               , ( 'htmltext_lexicon'
97                 , HTMLWordSplitter()
98                 , CaseNormalizer()
99                 , StopWordRemover()
100                 )
101               )
102    #XXX END
103
104    def catalog_object(self, object, uid, idxs=[],
105                       update_metadata=1, pghandler=None):
106        if object and hasattr(object, 'portal_type') and object.portal_type!='CollectionsFolder':
107            ZCatalog.catalog_object(self, object, uid, idxs, update_metadata, pghandler=pghandler)
108
109    def searchResults(self, REQUEST=None, **kw):
110        """Calls ZCatalog.searchResults """
111        return ZCatalog.searchResults(self, REQUEST, **kw)
112
113    __call__ = searchResults
114
115
116    def titleSearch(self, title='', sort_limit=0):
117        """ search titles containing given string """
118        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
119        if sort_limit:
120            query['sort_limit']=sort_limit
121        query['Title']='%s*' % title
122        results=self.searchResults(query)
123        return results
124
125    def fulltextSearch(self, SearchableText='', sort_limit=0):
126        """ search fulltext for a string """
127        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
128        if sort_limit:
129            query['sort_limit']=sort_limit
130        query['SearchableText']='%s*' % SearchableText
131        results=self.searchResults(query)
132        return results
133
134    ###### 'Adapters' start here
135    # these are methods that are very specifically used by page templates to efficiently return just the results needed there 
136
137    def buildResultsFromBatch(self, batch):
138        """ This method takes a Batch of results (usually 30 or less, LazyMap) and returns a list of
139            *minimal metadata* about them. This metadata is in form of dictionary.
140           
141            The idea is that instantiating real metadata objects from batch results is still a costly process
142            and with this we can show search/browse results without instantiating metadata.
143           
144            This method will also do some preprocessing for metadata, f.ex finds proper names for authors and builds links to them.
145           
146            This method is crafted for resource_list_macros.pt and if you aren't going to display results as they are displayed there, you probably
147            shouldn't use this.
148            """
149        d_list=[]
150        lutool=getToolByName(self, 'lemill_usertool')
151        ltool=getToolByName(self, 'lemill_tool')
152        url_base=getToolByName(self, 'portal_url')()
153        created=False
154        edited=False
155        if hasattr(self, 'REQUEST') and hasattr(self.REQUEST, 'form'):
156            form=self.REQUEST.form
157            if form:
158                created= 'created' in form
159                edited='edited' in form
160        if hasattr(batch._sequence, '_seq'):
161            seq=list(batch._sequence._seq[batch.start-1:batch.end])
162            keys=['rid','getHasCoverImage','getNicename','portal_type','getLocation_country','getTags', 'Language','listCreators']
163            if created:
164                keys.append('created')
165            elif edited:
166                keys.append('getLatestEdit')
167            md=self.fastMetadata(seq, keys)
168            for item in md:
169                if item[3] not in TYPE_NAMES:
170                    continue
171                d={'url':self.getpath(item[0]),
172                    'coverimage_url':item[1],
173                    'title':item[2],
174                    'readable_type':item[3],
175                    'country':item[4],
176                    'tags':item[5],
177                    'language':item[6],
178                    'authors':item[7]}
179                if created:
180                    d['created']=item[8] # These are in weird DateIndex format, difficult to map to actual dates
181                elif edited:
182                    d['edited']=item[8]
183                d_list.append(d)
184            if created or edited:
185                now=time.gmtime() # mirrors DateIndex's conversion script to provide a compatible 'now'
186                now= ( ( ( ( now[0] * 12 + now[1] ) * 31 + now[2] ) * 24 + now[3] ) * 60 + now[4] )
187                useDateIndex=True                         
188        else: # batch has already been converted to catalog Brains object
189            for item in batch:               
190                d={'url':item.getURL(),
191                    'coverimage_url':item.getHasCoverImage,
192                    'title':item.getNicename,
193                    'readable_type':item.portal_type,
194                    'country':item.getLocation_country,
195                    'tags':item.getTags,
196                    'language':item.Language,
197                    'authors':item.listCreators}                 
198                if created:
199                    d['created']=item.created
200                elif edited:
201                    d['edited']=item.getLatestEdit
202                d_list.append(d)
203            now=time.time()
204            useDateIndex=False
205        tag_base='/'.join((url_base,'search?index_type=tags&q='))
206        lang_base='/'.join((url_base,'search?language='))
207        country_base='/'.join((url_base,'community/browse?country='))
208       
209        # Manipulate raw index data to something more useful
210        for item in d_list:
211            if callable(item['title']):
212                item['title']=item['title']()
213            url=item['url']
214            if item['coverimage_url']:
215                item['coverimage_url']='/'.join((url,'coverImage'))
216            else:
217                item['coverimage_url']='/'.join((url_base, DEFAULT_ICONS[item['readable_type']]))
218            ll=item['language']
219            if ll:
220                item['language']=LANGUAGES_DICT[ll]
221                item['language_link']=''.join((lang_base,ll))
222            item['tags']=[(tag, ''.join((tag_base, tag))) for tag in item['tags']]
223            item['authors']=[self.fastLink(author) for author in item['authors']]
224            if item['country']:         
225                item['country_link']=''.join((country_base,item['country']))
226            item['readable_type']=TYPE_NAMES.get(item['readable_type'], None)[0]
227            if 'created' in item:
228                item['timedif']=ltool.getTimeDifference(item['created'], now=now, useDateIndex=useDateIndex)
229            elif 'edited' in item:
230                item['timedif']=ltool.getTimeDifference(item['edited'], now=now, useDateIndex=useDateIndex)
231        return d_list
232
233    def createBrowsingOptions(self, REQUEST, results=None, **kw):
234        """ This is used by browse_macros.pt and other places that need to display filters
235            based on the search results available and the section where the results are displayed.
236       
237            This method analyzes both request and results to decide what filter fields it should display and if it should
238            preselect something or count occurences for each value. The logic for this may look hairy,
239             but it can be understood by approaching this case-by-case:
240            'If we are browsing in Content section, and there are >1000 results, what filters we should show'           
241        """
242        src=dict(REQUEST.form.items())
243        # section determines what kinds of options there are:
244        path=REQUEST['ACTUAL_URL'].split('/')
245        force_language=True
246        if 'portfolio' in kw:
247            do_languages=False
248            do_subject_areas=True
249            do_target_groups=False
250            do_types=True
251            type_selection=SEARCHABLE_TYPES
252            do_edited=True
253            do_tags=False
254            count_indexes=('portal_type','getSubject_area')
255        elif 'content' in path:
256            do_languages=True
257            do_subject_areas=True
258            do_target_groups=True
259            do_types=True
260            type_selection=CONTENT_TYPES
261            do_edited=True
262            do_tags=True
263            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
264        elif 'methods' in path or 'tools' in path:
265            do_languages=True
266            do_subject_areas=False
267            do_target_groups=False
268            do_types=False
269            do_edited=True
270            do_tags=True
271            count_indexes=('Language',)
272        elif 'community' in path:
273            do_languages=True
274            force_language=False
275            do_subject_areas=True
276            do_target_groups=False
277            do_types=True
278            type_selection=COMMUNITY_TYPES
279            do_edited=True
280            do_tags=True
281            count_indexes=('Language','portal_type','getSubject_area')
282        else:
283            do_languages=True
284            force_language=False
285            do_subject_areas=True
286            do_target_groups=True
287            do_types=True
288            type_selection=SEARCHABLE_TYPES
289            do_edited=True
290            do_tags=False
291            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
292       
293        d={'do_languages':do_languages, 'do_subject_areas':do_subject_areas,'do_target_groups':do_target_groups,'do_types':do_types,'do_edited':do_edited, 'do_tags':do_tags, 'q':'', 'state':'', 'author':'', 'created':''}
294
295
296        # now see if the results should be counted and provide a dictionary of counted values if necessary
297        if results and len(results)<1000:
298            count_results= dict(zip(count_indexes, self.fastCount(results, count_indexes)))
299            count=len(results)
300        else:
301            count=0
302
303        # Languages: two lists, one for probable options and other for improbable
304        if do_languages:
305            all_languages=list(LANGUAGES)[1:] # Remove 'language neutral' from options
306            all_languages[0]=('','any language')
307            all_language_codes=[l[0] for l in all_languages]
308            lang_dict = getToolByName(self, 'lemill_tool').language_dict
309            common_language_codes=getToolByName(self,'lemill_usertool').getLanguages()
310            selected_language= src.get('language','')
311            was_empty=not selected_language
312            if was_empty:
313                if force_language:
314                    selected_language=common_language_codes[0]
315                else:
316                    selected_language='all'
317            common_languages=[]
318            rare_languages=[('all','All',int(selected_language=='all'),0)]
319            if count and was_empty and False:
320                langs=count_results['Language']
321                for langcode in common_language_codes:
322                    if langcode and langcode in langs:
323                        common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
324                        all_language_codes.remove(langcode)           
325                for langcode in all_language_codes:
326                    if langcode and langcode in langs:
327                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
328            else:
329                for langcode in common_language_codes:
330                    common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
331                    all_language_codes.remove(langcode)           
332                for langcode in all_language_codes:
333                    if langcode:
334                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
335            d['common_languages']=common_languages
336            d['rare_languages']=rare_languages
337            d['lang_disabled']=False # portal_type==Piece can set this to true
338            if selected_language!='all':
339                d['lang_filter']='language=%s&' % selected_language
340            else:
341                d['lang_filter']=''
342        if do_subject_areas:
343            selected=src.get('subject_area','')
344            if selected:
345                all_count='?'
346            else:
347                all_count=count
348            subject_areas=[('','All',int(not selected), all_count)]
349            if count:
350                counts=count_results['getSubject_area']
351                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
352                    if sa_full in counts:
353                        subject_areas.append((sa_key, sa_full, int(selected==sa_key), counts[sa_full]))
354            else:
355                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
356                    subject_areas.append((sa_key, sa_full, int(selected==sa_key), 0))
357
358            d['subject_area']=subject_areas
359            d['subjs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
360        if do_target_groups:
361            selected=src.get('target_group','')
362            if selected:
363                all_count='?'
364            else:
365                all_count=count
366            target_groups=[('','All',int(not selected),all_count)]
367            if count:
368                counts=count_results['getTarget_group']
369                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
370                    if tg_full in counts:
371                        target_groups.append((tg_key, tg_full, int(selected==tg_key), counts[tg_full]))
372            else:
373                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
374                    target_groups.append((tg_key, tg_full, int(selected==tg_key), 0))
375            d['target_group']=target_groups
376            d['targs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
377        if do_types:
378            selected=src.get('type','')
379            if selected:
380                all_count='?'
381            else:
382                all_count=count
383            types=[('','All',int(not selected),all_count)]
384            if count:
385                counts=count_results['portal_type']
386                for type_key in type_selection:
387                    if type_key in counts:
388                        types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), counts[type_key]))
389            else:
390                for type_key in type_selection:
391                    types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), 0))
392            d['type']=types
393            if selected:
394                if selected in ['Piece','Activity','Tool']:
395                    d['targs_disabled']=True
396                    d['subjs_disabled']=True
397                if selected=='Piece':
398                    d['lang_disabled']=True
399
400        if do_edited:
401            try:
402                selected=int(src.get('edited',0))
403            except ValueError:
404                selected=0
405            d['edited']=[('',_('Any time'),int(not selected),0), (365,_('Last year'),int(selected==365),0), (30,_('Last month'),int(selected==30),0), (7,_('Last week'),int(selected==7),0), (1,_('Yesterday'),int(selected==1),0)]
406        # Previous search term
407        if 'q' in src:
408            d['q']=src['q']
409        if 'state' in src:
410            d['state']=src['state']
411        if 'author' in src:
412            d['author']=src['author']           
413        return d
414
415
416    def decideBrowsingSubType(self):
417        """ Browse page heading needs to know what kind of browsing is going on """
418        form=self.REQUEST.form       
419        if 'state' in form:
420            if form['state']=='public': return 'published'
421            if form['state']=='draft': return 'drafts'
422        path=self.REQUEST['ACTUAL_URL'].split('/')
423        for key in ['content','methods','tools','community']:
424            if key in path: return key
425        return ''           
426
427    def decideSearchSubType(self):
428        """ Search page heading needs to know what kind of search is going on """
429        form=self.REQUEST.form       
430        if 'created' in form:
431            if 'type' in form and form['type']=='MemberFolder':
432                return 'new_members'
433            else:
434                return 'new_resources'
435        if 'edited' in form and 'q' not in form:
436            return 'recent_edits'
437        return 'search'
438
439    def browsingSearch(self, REQUEST=None, **kw):       
440        """ This is a general purpose catalog search that can convert readable keywords from request into actual search terms.
441        recognized keywords: language, subject_area, target_group, type, tags, state, author, created, edited, group, country.
442       
443        These same keywords are used all over in interface to build selection boxes, select page headers etc.
444        The actual search indexes like 'getTags' etc. are used only here and should not be used elsewhere.               
445        """
446        src=REQUEST.form
447        if '-C' in src:
448            del src['-C']
449        if not (src or kw):
450            path=REQUEST['ACTUAL_URL'].split('/')
451            if 'browse' in path or 'search' in path or 'cloud' in path:
452                return []
453        keywords={'sort_on':'getScore','sort_order':'reverse','getState':('draft', 'public')}
454        keywords.update(src)
455        keywords.update(kw)
456
457        find_empty_values=keywords.get('empties','')
458
459        if 'language' in keywords:
460            if keywords['language']=='all':
461                keywords['Language']=''
462            else:           
463                keywords['Language']=keywords['language']
464            del keywords['language']
465        if 'subject_area' in keywords:
466            if keywords['subject_area'] in SUBJECT_AREAS_DICT:
467                keywords['getSubject_area']=SUBJECT_AREAS_DICT[keywords['subject_area']]
468            del keywords['subject_area']
469        if 'target_group' in keywords:
470            if keywords['target_group'] in TARGET_GROUPS_DICT:
471                keywords['getTarget_group']=TARGET_GROUPS_DICT[keywords['target_group']]
472            del keywords['target_group']
473        if 'tags' in keywords:
474            keywords['getTags']=keywords['tags']
475            del keywords['tags']
476        if 'author' in keywords and keywords['author']:
477            keywords['listCreators']=keywords['author']
478            del keywords['author']
479        if 'group' in keywords and keywords['group']:
480            keywords['getRawGroupEditing']=keywords['group']
481            del keywords['group']
482        if 'state' in keywords:
483            if keywords['state']=='draft':
484                lutool=getToolByName(self,'lemill_usertool')               
485                if 'listCreators' in keywords and keywords['listCreators'] == lutool.getAuthenticatedId(): # only allow authenticated author to view privates
486                    keywords['getState']=('draft', 'private')
487                else:
488                    keywords['getState']='draft'
489            elif keywords['state']=='private': # don't allow manually mungling browse terms
490                return []
491            else:
492                keywords['getState']=keywords['state']           
493            del keywords['state']
494        if 'country' in keywords:
495            keywords['getLocation_country']=keywords['country']
496        if 'type' in keywords:
497            if keywords['type']=='lr':
498                keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool']
499            else:
500                keywords['portal_type']=keywords['type']
501            del keywords['type']
502        else: # portal_type is determined by location
503            path=REQUEST['ACTUAL_URL'].split('/')
504            if 'portfolio' in kw:
505                keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool','Collection']
506                del keywords['portfolio']
507            elif 'content' in path:
508                if find_empty_values=='Language': # when browsing content without language set, ignore pieces
509                    keywords['portal_type']=list(MATERIAL_TYPES)+['LeMillReference']
510                else:
511                    keywords['portal_type']=list(CONTENT_TYPES)
512            elif 'methods' in path:
513                keywords['portal_type']='Activity'
514            elif 'tools' in path:
515                keywords['portal_type']='Tool'
516            elif 'community' in path:
517                keywords['portal_type']=list(COMMUNITY_TYPES)
518            else:
519                keywords['portal_type']=list(SEARCHABLE_TYPES)
520        if 'q' in keywords:
521            keywords['SearchableText']=keywords['q']
522            del keywords['q']
523        if 'created' in keywords:
524            keywords['created']={'query': self.ZopeTime()-int(keywords['created']), 'range':'min'}
525            keywords['sort_on']='created'
526        if 'edited' in keywords:
527            keywords['getLatestEdit']={'query': self.ZopeTime()-int(keywords['edited']), 'range':'min'}
528            keywords['sort_on']='getLatestEdit'
529            del keywords['edited']
530
531        if not 'Language' in keywords:
532            found=False
533            if 'RSS' in keywords:
534                found=True
535            elif 'empties' in keywords:
536                found=True
537            elif 'base' in keywords and keywords['base']=='language':
538                found=True               
539            elif 'portal_type' in keywords and ('MemberFolder' in keywords['portal_type'] or 'GroupBlog' in keywords['portal_type']):
540                found=True
541            else:
542                for compensating in ['SearchableText','listCreators','getTarget_group','getTags','getSubject_area','created','getLatestEdit', 'getLocation_country', 'getRawGroupEditing']:
543                    if compensating in keywords:
544                        found=True
545                        break
546            if not found:
547                keywords['Language']=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
548        if 'Language' in keywords:
549            if not keywords['Language']: # Delete empty 'Language'
550                del keywords['Language']
551            elif 'portal_type' in keywords: # If browsing for MemberFolders, ignore Language, use getLanguage_skills instead
552                if 'MemberFolder' in keywords['portal_type'] or 'GroupBlog' in keywords['portal_type']:
553                    keywords['getLanguage_skills']=keywords['Language']
554                    del keywords['Language']
555
556        for k in ['base','portfolio','RSS','empties']:
557            if k in keywords:
558                del keywords[k]
559        try:
560            results = self.searchResults(keywords)               
561        except ParseError:
562            results = []
563        if find_empty_values:
564            results = self.fastFindEmptyValues(results, find_empty_values)
565        return results
566
567    #### Fast catalog handling ###########################################
568
569    def wakeLazy(self, lazy):
570        new=[]
571        for l in lazy._seq:
572            if isinstance(l, Lazy):
573                new.extend(self.wakeLazy(l))
574            else:
575                new.append(l)
576        return new
577
578    def fastPick(self, lazy_results, top):
579        """ Takes Lazy results and picks one random metadata obj from top """
580        tries=4
581        choice=randint(0,top)
582        while tries: # since we don't know how long the list is we need to use trial and error
583            # to find if the random index has corresponding item
584            i=0
585            for item in lazy_results._seq:
586                if choice==i:
587                    return lazy_results._func(item)
588                i+=1
589            choice/=2
590            tries-=1
591        return lazy_results._func(item)
592
593    def fastLink(self, user):
594        """ Finds an user from catalog and returns a tuple containing nicename and link
595        because this gets asked so often and it is relatively expensive to build, we cache them in non-permanent dict here in catalog tool
596        """
597        if not hasattr(self, 'author_cache'):
598            self.author_cache={}
599        if not user in self.author_cache:
600            res=self.searchResults(Creator=user, portal_type='MemberFolder')
601            for l in res._seq:
602                authortuple= (self.getEntry('getNicename', l), self.getpath(l))
603                self.author_cache[user]=authortuple
604                return authortuple
605        else:
606            return self.author_cache[user]
607        return (user,'')       
608
609    def getEntry(self, index, key, empty=[]):
610        val=self._catalog.getIndex(index).getEntryForObject(key, empty)
611        if callable(val):
612            val=val()
613        return val       
614           
615    def fastLinks(self, results, limit):
616        """ Takes Lazy results and returns a list of tuples (title, url, portal_type) """
617        def safeData(index, key):
618            v=index.getEntryForObject(key, [])
619            if callable(v):
620                return v()
621            return v
622
623        def wakeUp(lazy, c):
624            new=[]
625            for l in lazy._seq:
626                if isinstance(l, Lazy):
627                    c,newer=wakeUp(l,c)
628                    new.extend(newer)
629                else:
630                    new.append((safeData(title_index, l), self.getpath(l), safeData(type_index, l)))
631                    c+=1
632                if c==limit:
633                    break
634            return c,new
635        title_index=self._catalog.getIndex('getNicename')
636        type_index=self._catalog.getIndex('portal_type')
637        c,new=wakeUp(results,0)       
638        return new
639
640    def fastMetadata(self, results, indexes, cut=0):
641        """ Takes Lazy results and returns a list of tuple for values from indexes.
642        Indexes is a string or a tuple of strings.
643        This is an order of magnitude faster than getting CatalogBrains for each object"""
644        c=0
645        def safeData(index, key):
646            v=index.getEntryForObject(key, [])
647            if callable(v):
648                return v()
649            return v
650
651        def wakeUp(lazy,c):
652            new=[]
653            if isinstance(lazy, list):
654                seq=lazy
655            else:
656                seq=lazy._seq
657            for l in seq:
658                if isinstance(l, Lazy):
659                    new.extend(wakeUp(l,c))
660                else:
661                    values=[]   
662                    for index in index_sources:
663                        if index:
664                            values.append(safeData(index, l))
665                        else:
666                            values.append(l)
667                    new.append(tuple(values))
668                    c+=1
669                    if c==cut:
670                        break
671            return new
672        def wakeUpFaster(lazy, c):
673            new=[]
674            if isinstance(lazy, list):
675                seq=lazy
676            else:
677                seq=lazy._seq
678            for l in seq:
679                if isinstance(l, Lazy):
680                    new.extend(wakeUpFaster(l,c))
681                else:
682                    new.append(index.getEntryForObject(l, []))
683                c+=1
684                if c==cut:
685                    break
686            return new
687        if isinstance(indexes, tuple) or isinstance(indexes, list):
688            index_sources=[ind!='rid' and self._catalog.getIndex(ind) for ind in indexes]
689            new=wakeUp(results,c)
690        else:
691            index=self._catalog.getIndex(indexes)
692            new=wakeUpFaster(results,c)
693        return new
694       
695    def fastCount(self, results, indexes):
696        """ Takes Lazy results and returns a dictionary or tuple of dictionaries for values and their counts from indexes.
697        Index is a string or tuple of strings.
698        This is an order of magnitude faster than getting CatalogBrains for each object"""
699
700        def safeData(index, key):
701            v=index.getEntryForObject(key, [])
702            if callable(v):
703                return v()
704            return v
705       
706        def wakeUpAndCount(lazy):
707            for l in lazy._seq:
708                if isinstance(l, Lazy):
709                    wakeUpAndCount(l)
710                else:
711                    values=[]   
712                    for dic, index in indexes:
713                        val=safeData(index, l)
714                        if val:
715                            if isinstance(val, list):
716                                for v in val:                       
717                                    dic[v[:50]]=dic.get(v[:50],0)+1
718                            elif val:
719                                val=str(val)[:50]
720                                dic[val]=dic.get(val,0)+1
721        def wakeUpFasterAndCount(lazy):
722            for l in lazy._seq:
723                if isinstance(l, Lazy):
724                    wakeUpFasterAndCount(l)
725                else:
726                    val=safeData(index, l)
727                    if isinstance(val, list):
728                        for v in val:                       
729                            dic[v[:50]]=dic.get(v[:50],0)+1
730                    elif val:
731                        val=str(val)[:50]
732                        dic[val]=dic.get(val,0)+1
733        index=None
734        if isinstance(indexes, tuple):
735            if not results:
736                return [{} for ind in indexes]
737            indexes=[({}, self._catalog.getIndex(ind)) for ind in indexes]
738            new=wakeUpAndCount(results)
739            return tuple([dic for dic,index in indexes])
740        else:
741            if not results:
742                return {}
743            index=self._catalog.getIndex(indexes)
744            dic={}
745            new=wakeUpFasterAndCount(results)
746            return dic
747
748
749    def fastFindEmptyValues(self, results, index_to_look):
750        """ Takes Lazy results and return those results where the given index returns empty"""
751        new_results=[]
752        def safeData(index, key):
753            v=index.getEntryForObject(key, [])
754            if callable(v):
755                return v()
756            return v
757       
758        def wakeUpAndFilter(lazy):
759            for l in lazy._seq:
760                if isinstance(l, Lazy):
761                    wakeUpAndFilter(l)
762                else:
763                    val=safeData(index, l)
764                    if not val:
765                        new_results.append(l)
766        if not results:
767            return []
768        index=self._catalog.getIndex(index_to_look)
769        wakeUpAndFilter(results)
770        assert isinstance(results,LazyMap)
771        return LazyMap(results._func, new_results, len(new_results))
772
773
774    def getSomeMetadataForRID(self, rid, md_fields):
775        record = self._catalog.data[rid]
776        schema = self._catalog.schema
777        if isinstance(md_fields, (tuple, list)):
778            result = {}
779            for md in md_fields:
780                result[md]= record[schema[md]]
781            return result
782        else:
783            return record[schema[md_fields]]
784
785
786    ##############################      Clouds      ######################## 
787
788    def buildCloudData(self, results, request=None, size=8, portfolio=False):
789        """ Build tag cloud result tuples (name, tagsize, obj_url, tag_value, nicename) for given form from result set """
790        def adjustTag(val, steps=8):
791            # helper method to adjust hit count of this tag to relative size (1,...,8)
792            try:
793                val=int((steps*log(val-mincount,2))/log(maxcount-mincount,2))
794            except (OverflowError, ZeroDivisionError):
795                val=0
796            if not val:
797                val=1
798            return val
799        lemill_tool = getToolByName(self, 'lemill_tool')
800        if not results:
801            return []
802        if portfolio:
803            cloud_index='getTags'
804            mf=self.getMemberFolder()           
805            link_root='%s/portfolio?tags=' % mf.absolute_url()
806        else:       
807            src=dict(request.form.items())
808            if 'base' not in src:
809                return []
810            portal_url=getToolByName(self, 'portal_url')()
811            path=request['ACTUAL_URL'].split('/')
812            if 'content' in path:
813                link_base='/'.join((portal_url,'content','browse'))
814            elif 'methods' in path:
815                link_base='/'.join((portal_url,'methods','browse'))
816            elif 'tools' in path:
817                link_base='/'.join((portal_url,'tools','browse'))
818            elif 'community' in path:
819                link_base='/'.join((portal_url,'community','browse'))
820            else:
821                link_base='/'.join((portal_url,'browse'))       
822            lang_part=''
823            base=src['base']
824            language=src.get('language','')
825            type_restriction=src.get('type','')
826            if language:
827                lang_part='language=%s&' % language
828            if type_restriction:
829                lang_part+='type=%s&' % type_restriction
830            title_cloud=False
831            if base=='language':
832                link_root='%s?%slanguage=' % (link_base, lang_part)
833                cloud_index='Language'
834                if 'type' in src:
835                    if 'GroupBlog' in src['type'] or 'MemberFolder' in src['type']:
836                        cloud_index='getLanguage_skills'     
837            elif base=='tags':
838                link_root='%s?%stags=' % (link_base, lang_part)       
839                cloud_index='getTags'
840            elif base=='subject_area':
841                link_root='%s?%ssubject_area=' % (link_base, lang_part)       
842                cloud_index='getSubject_area'
843            elif base=='target_group':
844                link_root='%s?%starget_group=' % (link_base, lang_part)       
845                cloud_index='getTarget_group'
846            elif base=='country':
847                link_root='%s?%scountry=' % (link_base, lang_part)
848                cloud_index='getLocation_country'
849            elif base=='title':
850                resultlist=self.fastMetadata(results, ('sortable_title','getScore','getNicename', 'path'), cut=100)
851                if not resultlist:
852                    return []
853                maxcount=resultlist[0][1]
854                mincount=resultlist[-1][1]             
855                resultlist.sort()
856                resultlist=[(x[2], adjustTag(x[1], steps=6), x[3]) for x in resultlist if x[2]]
857                return resultlist
858        hits=self.fastCount(results, cloud_index)
859        if not hits:
860            return []
861        resultlist=zip(hits.values(),hits.keys())
862        resultlist.sort(reverse=True)
863        maxcount=resultlist[0][0] # first!
864        resultlist = resultlist[:100]
865        mincount=resultlist[-1][0] # last!
866        # At this point resultlist= [(number_of_hits, tag_text),...]
867        # It should end up as: [(displayed_text, number_of_hits, link_url)...]
868        if cloud_index=='Language' or cloud_index=='getLanguage_skills':
869            resultlist=[(lemill_tool.getPrettyLanguage(x[1]), adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
870        elif cloud_index=='getSubject_area':
871            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
872        elif cloud_index=='getTarget_group':
873            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
874        else:
875            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
876        resultlist.sort()
877        return resultlist
878
879
880    ########### Front page top lists ############
881   
882    def getTopFive(self, results=None, key_index=None, link_body='', clean=False, cache_key=''):
883        """ Returns top five results for key_index (getTags, getSubject_area...) for certain language """
884        if clean or not hasattr(self, 'top5results'):
885            self.top5results={'content':{}, 'methods':{}, 'tools':{}, 'community':{}}
886        data=self.top5results
887        cache_key=cache_key or key_index
888        path=self.REQUEST['ACTUAL_URL'].split('/')
889        for section in ['content','methods','tools','community',None]:
890            if section in path:
891                break
892        if not section:
893            return []
894        ilanguage=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
895        if cache_key in data[section]:
896            top5lists=data[section][cache_key]
897        else:
898            top5lists={}
899            data[section][cache_key]=top5lists
900        if ilanguage in top5lists:
901            return top5lists[ilanguage]
902        else:
903            tops=self.fastCount(results, key_index)
904            resultlist=zip(tops.values(),tops.keys())
905            resultlist.sort(reverse=True)
906            resultlist=resultlist[:5]
907            if key_index=='getSubject_area':
908                resultlist=[(x[1], ''.join((link_body, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
909            elif key_index=='getTarget_group':
910                resultlist=[(x[1], ''.join((link_body, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
911            else:
912                resultlist=[(x[1], ''.join((link_body, x[1]))) for x in resultlist]
913            top5lists[ilanguage]=resultlist
914            return resultlist
915
916    ############################# batch ##########################
917
918    def batch(self, results=None, request=None):
919        """ Use Plone's batch """
920        if request and 'b_start' in request.form:
921            try:
922                b_start=int(request.form['b_start'])
923            except ValueError:
924                b_start=0
925        else:
926            b_start=0 
927        b= Batch(results, 30, b_start, orphan=1)   
928        return b 
929
930    def batchBaseUrl(self):
931        """ Keep all other parametres as they are, but add or change 'b_start' """
932        form=self.REQUEST.form
933        if 'b_start' in form:
934            del form['b_start']
935        last_url='?'.join((self.REQUEST.ACTUAL_URL, urlencode(form)))
936        new= last_url+'&b_start='
937        return new
938       
939CatalogTool.__doc__ = PloneCatalogTool.__doc__
940
941InitializeClass(CatalogTool)
Note: See TracBrowser for help on using the repository browser.