source: trunk/LeMillCatalogTool.py @ 3126

Revision 3126, 44.6 KB checked in by jukka, 9 years ago (diff)

Fixed #2016. It was a bit more tricky than supposed, as catalog cannot be searched for zero values.

Line 
1#
2# Plone CatalogTool simplified -- does not use ExtensibleIndexableObjectWrapper, workflows or permissions. Basically just ZCatalog posing as Plone Tool, allowing but ignoring plonish commands.
3#
4
5from Products.CMFPlone import ToolNames
6from AccessControl import ClassSecurityInfo
7from Globals import InitializeClass
8from Globals import DTMLFile
9
10from Products.ZCatalog.ZCatalog import ZCatalog
11from Products.ZCTextIndex.ParseTree import ParseError
12from Products.CMFPlone.CatalogTool import CatalogTool as PloneCatalogTool
13from Products.CMFPlone.PloneBatch import Batch
14from Products.PythonScripts.standard import urlencode
15
16from zope.interface import implements
17
18from Products.CMFCore.utils import SimpleRecord, getToolByName
19from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
20from Products.ZCTextIndex.Lexicon import CaseNormalizer
21from Products.ZCTextIndex.Lexicon import Splitter
22from Products.ZCTextIndex.Lexicon import StopWordRemover
23
24from Products.CMFPlone.UnicodeSplitter import Splitter as UnicodeSplitter
25from Products.CMFPlone.UnicodeSplitter import CaseNormalizer as UnicodeCaseNormalizer
26from Products.ZCTextIndex.ZCTextIndex import PLexicon
27from Products.ZCatalog.Lazy import Lazy, LazyMap
28from random import randint
29from config import CONTENT_TYPES, MATERIAL_TYPES, ACTIVITY_TYPES, TOOLS_TYPES, LANGUAGES, LANGUAGES_DICT, TYPE_NAMES, SUBJECT_AREAS_DICT, TARGET_GROUPS_DICT, SUBJECT_AREAS_INVERSE_DICT, TARGET_GROUPS_INVERSE_DICT, COMMUNITY_TYPES, SEARCHABLE_TYPES, DEFAULT_ICONS, TARGET_GROUP
30import time
31from math import log
32from messagefactory_ import i18nme as _
33
34class CatalogTool(PloneCatalogTool):
35
36    meta_type = 'LeMill Catalog Tool'
37    security = ClassSecurityInfo()
38    toolicon = 'skins/lemill/tool.gif'
39
40    __implements__ = PloneCatalogTool.__implements__
41
42    # Originally CatalogTool had some simplified some expensive stuff from Plone's Catalog tool and added some
43    # methods allowing us more control over indexing / unindexing
44   
45    # For LeMill 3.0,
46
47    security.declarePublic( 'enumerateIndexes' ) # Subclass can call
48    def enumerateIndexes( self ):
49        #   Return a list of ( index_name, type, extra ) tuples for the initial
50        #   index set.
51        #   Creator is deprecated and may go away, use listCreators!
52        #   meta_type is deprecated and may go away, use portal_type!
53        plaintext_extra = SimpleRecord( lexicon_id='plaintext_lexicon'
54                                      , index_type='Okapi BM25 Rank'
55                                      )
56        htmltext_extra = SimpleRecord( lexicon_id='htmltext_lexicon'
57                                     , index_type='Okapi BM25 Rank'
58                                     )
59        # Adding plone_lexicon as it should be, no nee dto change the name for our own >>> safer that way
60        plone_extra = SimpleRecord( lexicon_id='plone_lexicon'
61                                  , index_type='Okapi BM25 Rank'
62                                  )
63
64        return ( ('Title', 'ZCTextIndex', plone_extra)
65               , ('Subject', 'KeywordIndex', None)
66               , ('Description', 'ZCTextIndex', plone_extra)
67               , ('Creator', 'FieldIndex', None)
68               , ('listCreators', 'KeywordIndex', None)
69               , ('SearchableText', 'ZCTextIndex', plone_extra)
70               , ('Date', 'DateIndex', None)
71               , ('Type', 'FieldIndex', None)
72               , ('created', 'DateIndex', None)
73               , ('effective', 'DateIndex', None)
74               , ('expires', 'DateIndex', None)
75               , ('modified', 'DateIndex', None)
76               , ('allowedRolesAndUsers', 'KeywordIndex', None)
77               , ('review_state', 'FieldIndex', None)
78               , ('in_reply_to', 'FieldIndex', None)
79               , ('meta_type', 'FieldIndex', None)
80               , ('getId', 'FieldIndex', None)
81               , ('path', 'PathIndex', None)
82               , ('portal_type', 'FieldIndex', None)
83               )
84
85    security.declarePublic('enumerateLexicons')
86    def enumerateLexicons(self):
87        # Creating our own lexicon in a different way, as the automatic creation seems to be needing something to act as a StopWordRemover and fails miserably without it.
88        lexicon = PLexicon('plone_lexicon', '', UnicodeSplitter(), UnicodeCaseNormalizer())
89        self._setObject('plone_lexicon', lexicon)
90        return (
91                 ( 'plaintext_lexicon'
92                 , Splitter()
93                 , CaseNormalizer()
94                 , StopWordRemover()
95                 )
96               , ( 'htmltext_lexicon'
97                 , HTMLWordSplitter()
98                 , CaseNormalizer()
99                 , StopWordRemover()
100                 )
101               )
102    #XXX END
103
104    def catalog_object(self, object, uid, idxs=[],
105                       update_metadata=1, pghandler=None):
106        if object and hasattr(object, 'portal_type') and object.portal_type!='CollectionsFolder':
107            ZCatalog.catalog_object(self, object, uid, idxs, update_metadata, pghandler=pghandler)
108
109    def searchResults(self, REQUEST=None, **kw):
110        """Calls ZCatalog.searchResults """
111        return ZCatalog.searchResults(self, REQUEST, **kw)
112
113    __call__ = searchResults
114
115
116    def titleSearch(self, title='', sort_limit=0):
117        """ search titles containing given string """
118        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
119        if sort_limit:
120            query['sort_limit']=sort_limit
121        query['Title']='%s*' % title
122        results=self.searchResults(query)
123        return results
124
125    def fulltextSearch(self, SearchableText='', sort_limit=0):
126        """ search fulltext for a string """
127        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
128        if sort_limit:
129            query['sort_limit']=sort_limit
130        query['SearchableText']='%s*' % SearchableText
131        results=self.searchResults(query)
132        return results
133
134    ###### 'Adapters' start here
135    # these are methods that are very specifically used by page templates to efficiently return just the results needed there 
136
137    def buildResultsFromBatch(self, batch):
138        """ This method takes a Batch of results (usually 30 or less, LazyMap) and returns a list of
139            *minimal metadata* about them. This metadata is in form of dictionary.
140           
141            The idea is that instantiating real metadata objects from batch results is still a costly process
142            and with this we can show search/browse results without instantiating metadata.
143           
144            This method will also do some preprocessing for metadata, f.ex finds proper names for authors and builds links to them.
145           
146            This method is crafted for resource_list_macros.pt and if you aren't going to display results as they are displayed there, you probably
147            shouldn't use this.
148            """
149        d_list=[]
150        lutool=getToolByName(self, 'lemill_usertool')
151        ltool=getToolByName(self, 'lemill_tool')
152        url_base=getToolByName(self, 'portal_url')()
153        created=False
154        edited=False
155        if hasattr(self, 'REQUEST') and hasattr(self.REQUEST, 'form'):
156            form=self.REQUEST.form
157            if form:
158                created= 'created' in form
159                edited='edited' in form
160        if hasattr(batch._sequence, '_seq'):
161            seq=list(batch._sequence._seq[batch.start-1:batch.end])
162            keys=['rid','getHasCoverImage','getNicename','portal_type','getLocation_country','getTags', 'Language','listCreators']
163            if created:
164                keys.append('created')
165            elif edited:
166                keys.append('getLatestEdit')
167            md=self.fastMetadata(seq, keys)
168            for item in md:
169                if item[3] not in TYPE_NAMES:
170                    continue
171                d={'url':self.getpath(item[0]),
172                    'coverimage_url':item[1],
173                    'title':item[2],
174                    'readable_type':item[3],
175                    'country':item[4],
176                    'tags':item[5],
177                    'language':item[6],
178                    'authors':item[7]}
179                if created:
180                    d['created']=item[8] # These are in weird DateIndex format, difficult to map to actual dates
181                elif edited:
182                    d['edited']=item[8]
183                d_list.append(d)
184            if created or edited:
185                now=time.gmtime() # mirrors DateIndex's conversion script to provide a compatible 'now'
186                now= ( ( ( ( now[0] * 12 + now[1] ) * 31 + now[2] ) * 24 + now[3] ) * 60 + now[4] )
187                useDateIndex=True                         
188        else: # batch has already been converted to catalog Brains object
189            for item in batch:               
190                d={'url':item.getURL(),
191                    'coverimage_url':item.getHasCoverImage,
192                    'title':item.getNicename,
193                    'readable_type':item.portal_type,
194                    'country':item.getLocation_country,
195                    'tags':item.getTags,
196                    'language':item.Language,
197                    'authors':item.listCreators}                 
198                if created:
199                    d['created']=item.created
200                elif edited:
201                    d['edited']=item.getLatestEdit
202                d_list.append(d)
203            now=time.time()
204            useDateIndex=False
205        tag_base='/'.join((url_base,'search?index_type=tags&q='))
206        lang_base='/'.join((url_base,'search?language='))
207        country_base='/'.join((url_base,'community/browse?country='))
208       
209        # Manipulate raw index data to something more useful
210        for item in d_list:
211            if callable(item['title']):
212                item['title']=item['title']()
213            url=item['url']
214            if item['coverimage_url']:
215                item['coverimage_url']='/'.join((url,'coverImage'))
216            else:
217                item['coverimage_url']='/'.join((url_base, DEFAULT_ICONS[item['readable_type']]))
218            ll=item['language']
219            if ll:
220                item['language']=LANGUAGES_DICT[ll]
221                item['language_link']=''.join((lang_base,ll))
222            item['tags']=[(tag, ''.join((tag_base, tag))) for tag in item['tags']]
223            item['authors']=[self.fastLink(author) for author in item['authors']]
224            if item['country']:         
225                item['country_link']=''.join((country_base,item['country']))
226            item['readable_type']=TYPE_NAMES.get(item['readable_type'], None)[0]
227            if 'created' in item:
228                item['timedif']=ltool.getTimeDifference(item['created'], now=now, useDateIndex=useDateIndex)
229            elif 'edited' in item:
230                item['timedif']=ltool.getTimeDifference(item['edited'], now=now, useDateIndex=useDateIndex)
231        return d_list
232
233    def createBrowsingOptions(self, REQUEST, results=None, **kw):
234        """ This is used by browse_macros.pt and other places that need to display filters
235            based on the search results available and the section where the results are displayed.
236       
237            This method analyzes both request and results to decide what filter fields it should display and if it should
238            preselect something or count occurences for each value. The logic for this may look hairy,
239             but it can be understood by approaching this case-by-case:
240            'If we are browsing in Content section, and there are >1000 results, what filters we should show'           
241        """
242        src=dict(REQUEST.form.items())
243        # section determines what kinds of options there are:
244        path=REQUEST['ACTUAL_URL'].split('/')
245        force_language=True
246        if 'portfolio' in kw:
247            do_languages=False
248            do_subject_areas=True
249            do_target_groups=False
250            do_types=True
251            type_selection=SEARCHABLE_TYPES
252            do_edited=True
253            do_tags=False
254            count_indexes=('portal_type','getSubject_area')
255        elif 'content' in path:
256            do_languages=True
257            do_subject_areas=True
258            do_target_groups=True
259            do_types=True
260            type_selection=CONTENT_TYPES
261            do_edited=True
262            do_tags=True
263            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
264        elif 'methods' in path or 'tools' in path:
265            do_languages=True
266            do_subject_areas=False
267            do_target_groups=False
268            do_types=False
269            do_edited=True
270            do_tags=True
271            count_indexes=('Language',)
272        elif 'community' in path:
273            do_languages=True
274            force_language=False
275            do_subject_areas=True
276            do_target_groups=False
277            do_types=True
278            type_selection=COMMUNITY_TYPES
279            do_edited=True
280            do_tags=True
281            count_indexes=('Language','portal_type','getSubject_area')
282        else:
283            do_languages=True
284            force_language=False
285            do_subject_areas=True
286            do_target_groups=True
287            do_types=True
288            type_selection=SEARCHABLE_TYPES
289            do_edited=True
290            do_tags=False
291            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
292       
293        d={'do_languages':do_languages, 'do_subject_areas':do_subject_areas,'do_target_groups':do_target_groups,'do_types':do_types,'do_edited':do_edited, 'do_tags':do_tags, 'q':'', 'state':'', 'author':'', 'created':''}
294
295
296        # now see if the results should be counted and provide a dictionary of counted values if necessary
297        if results and len(results)<1000:
298            count_results= dict(zip(count_indexes, self.fastCount(results, count_indexes)))
299            count=len(results)
300        else:
301            count=0
302
303        # Languages: two lists, one for probable options and other for improbable
304        if do_languages:
305            all_languages=list(LANGUAGES)[1:] # Remove 'language neutral' from options
306            all_languages[0]=('','any language')
307            all_language_codes=[l[0] for l in all_languages]
308            lang_dict = getToolByName(self, 'lemill_tool').language_dict
309            common_language_codes=getToolByName(self,'lemill_usertool').getLanguages()
310            selected_language= src.get('language','')
311            was_empty=not selected_language
312            if was_empty:
313                if force_language:
314                    selected_language=common_language_codes[0]
315                else:
316                    selected_language='all'
317            common_languages=[]
318            rare_languages=[('all','All',int(selected_language=='all'),0)]
319            if count and was_empty and False:
320                langs=count_results['Language']
321                for langcode in common_language_codes:
322                    if langcode and langcode in langs:
323                        common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
324                        all_language_codes.remove(langcode)           
325                for langcode in all_language_codes:
326                    if langcode and langcode in langs:
327                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
328            else:
329                for langcode in common_language_codes:
330                    common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
331                    all_language_codes.remove(langcode)           
332                for langcode in all_language_codes:
333                    if langcode:
334                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
335            d['common_languages']=common_languages
336            d['rare_languages']=rare_languages
337            d['lang_disabled']=False # portal_type==Piece can set this to true
338            if selected_language!='all':
339                d['lang_filter']='language=%s&' % selected_language
340            else:
341                d['lang_filter']=''
342        if do_subject_areas:
343            selected=src.get('subject_area','')
344            if selected:
345                all_count='?'
346            else:
347                all_count=count
348            subject_areas=[('','All',int(not selected), all_count)]
349            if count:
350                counts=count_results['getSubject_area']
351                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
352                    if sa_full in counts:
353                        subject_areas.append((sa_key, sa_full, int(selected==sa_key), counts[sa_full]))
354            else:
355                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
356                    subject_areas.append((sa_key, sa_full, int(selected==sa_key), 0))
357
358            d['subject_area']=subject_areas
359            d['subjs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
360        if do_target_groups:
361            selected=src.get('target_group','')
362            if selected:
363                all_count='?'
364            else:
365                all_count=count
366            target_groups=[('','All',int(not selected),all_count)]
367            if count:
368                counts=count_results['getTarget_group']
369                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
370                    if tg_full in counts:
371                        target_groups.append((tg_key, tg_full, int(selected==tg_key), counts[tg_full]))
372            else:
373                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
374                    target_groups.append((tg_key, tg_full, int(selected==tg_key), 0))
375            d['target_group']=target_groups
376            d['targs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
377        if do_types:
378            selected=src.get('type','')
379            if selected:
380                all_count='?'
381            else:
382                all_count=count
383            types=[('','All',int(not selected),all_count)]
384            if count:
385                counts=count_results['portal_type']
386                for type_key in type_selection:
387                    if type_key in counts:
388                        types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), counts[type_key]))
389            else:
390                for type_key in type_selection:
391                    types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), 0))
392            d['type']=types
393            if selected:
394                if selected in ['Piece','Activity','Tool']:
395                    d['targs_disabled']=True
396                    d['subjs_disabled']=True
397                if selected=='Piece':
398                    d['lang_disabled']=True
399
400        if do_edited:
401            try:
402                selected=int(src.get('edited',0))
403            except ValueError:
404                selected=0
405            d['edited']=[('',_('Any time'),int(not selected),0), (365,_('Last year'),int(selected==365),0), (30,_('Last month'),int(selected==30),0), (7,_('Last week'),int(selected==7),0), (1,_('Yesterday'),int(selected==1),0)]
406        # Previous search term
407        if 'q' in src:
408            d['q']=src['q']
409        if 'state' in src:
410            d['state']=src['state']
411        if 'author' in src:
412            d['author']=src['author']           
413        return d
414
415
416    def decideBrowsingSubType(self):
417        """ Browse page heading needs to know what kind of browsing is going on """
418        form=self.REQUEST.form       
419        if 'state' in form:
420            if form['state']=='public': return 'published'
421            if form['state']=='draft': return 'drafts'
422        path=self.REQUEST['ACTUAL_URL'].split('/')
423        for key in ['content','methods','tools','community']:
424            if key in path: return key
425        return ''           
426
427    def decideSearchSubType(self):
428        """ Search page heading needs to know what kind of search is going on """
429        form=self.REQUEST.form       
430        if 'created' in form:
431            if 'type' in form and form['type']=='MemberFolder':
432                return 'new_members'
433            else:
434                return 'new_resources'
435        if 'edited' in form and 'q' not in form:
436            return 'recent_edits'
437        return 'search'
438
439    def getCloudType(self):
440        """ """
441        form=self.REQUEST.form
442        base=form.get('base','')
443        if base in ['tags','subject_area','target_group','language']:
444            return base
445
446    def browsingSearch(self, REQUEST=None, **kw):       
447        """ This is a general purpose catalog search that can convert readable keywords from request into actual search terms.
448        recognized keywords: language, subject_area, target_group, type, tags, state, author, created, edited, group, country.
449       
450        These same keywords are used all over in interface to build selection boxes, select page headers etc.
451        The actual search indexes like 'getTags' etc. are used only here and should not be used elsewhere.               
452        """
453        src=REQUEST.form
454        if '-C' in src:
455            del src['-C']
456        if not (src or kw):
457            path=REQUEST['ACTUAL_URL'].split('/')
458            if 'browse' in path or 'search' in path or 'cloud' in path:
459                return []
460        keywords={'sort_on':'getScore','sort_order':'reverse','getState':('draft', 'public')}
461        keywords.update(src)
462        keywords.update(kw)
463
464        find_empty_values=keywords.get('empties','')
465
466        if 'language' in keywords:
467            if keywords['language']=='all':
468                keywords['Language']=''
469            else:           
470                keywords['Language']=keywords['language']
471            del keywords['language']
472        if 'subject_area' in keywords:
473            if keywords['subject_area'] in SUBJECT_AREAS_DICT:
474                keywords['getSubject_area']=SUBJECT_AREAS_DICT[keywords['subject_area']]
475            del keywords['subject_area']
476        if 'target_group' in keywords:
477            if keywords['target_group'] in TARGET_GROUPS_DICT:
478                keywords['getTarget_group']=TARGET_GROUPS_DICT[keywords['target_group']]
479            del keywords['target_group']
480        if 'tags' in keywords:
481            keywords['getTags']=keywords['tags']
482            del keywords['tags']
483        if 'author' in keywords and keywords['author']:
484            keywords['listCreators']=keywords['author']
485            del keywords['author']
486        if 'group' in keywords and keywords['group']:
487            keywords['getRawGroupEditing']=keywords['group']
488            del keywords['group']
489        if 'state' in keywords:
490            if keywords['state']=='draft':
491                lutool=getToolByName(self,'lemill_usertool')               
492                if 'listCreators' in keywords and keywords['listCreators'] == lutool.getAuthenticatedId(): # only allow authenticated author to view privates
493                    keywords['getState']=('draft', 'private')
494                else:
495                    keywords['getState']='draft'
496            elif keywords['state']=='private': # don't allow manually mungling browse terms
497                return []
498            else:
499                keywords['getState']=keywords['state']           
500            del keywords['state']
501        if 'country' in keywords:
502            keywords['getLocation_country']=keywords['country']
503        if 'type' in keywords:
504            if keywords['type']=='lr':
505                keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool']
506            else:
507                keywords['portal_type']=keywords['type']
508            del keywords['type']
509        else: # portal_type is determined by location
510            path=REQUEST['ACTUAL_URL'].split('/')
511            if 'portfolio' in kw:
512                keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool','Collection']
513                del keywords['portfolio']
514            elif 'content' in path:
515                if find_empty_values=='Language': # when browsing content without language set, ignore pieces
516                    keywords['portal_type']=list(MATERIAL_TYPES)+['LeMillReference']
517                else:
518                    keywords['portal_type']=list(CONTENT_TYPES)
519            elif 'methods' in path:
520                keywords['portal_type']='Activity'
521            elif 'tools' in path:
522                keywords['portal_type']='Tool'
523            elif 'community' in path:
524                keywords['portal_type']=list(COMMUNITY_TYPES)
525            else:
526                keywords['portal_type']=list(SEARCHABLE_TYPES)
527        if 'q' in keywords:
528            keywords['SearchableText']=keywords['q']
529            del keywords['q']
530        if 'created' in keywords:
531            keywords['created']={'query': self.ZopeTime()-int(keywords['created']), 'range':'min'}
532            keywords['sort_on']='created'
533        if 'edited' in keywords:
534            keywords['getLatestEdit']={'query': self.ZopeTime()-int(keywords['edited']), 'range':'min'}
535            keywords['sort_on']='getLatestEdit'
536            del keywords['edited']
537
538        if not 'Language' in keywords:
539            found=False
540            if 'RSS' in keywords:
541                found=True
542            elif 'empties' in keywords:
543                found=True
544            elif 'base' in keywords and keywords['base']=='language':
545                found=True               
546            elif 'portal_type' in keywords and ('MemberFolder' in keywords['portal_type'] or 'GroupBlog' in keywords['portal_type']):
547                found=True
548            else:
549                for compensating in ['SearchableText','listCreators','getTarget_group','getTags','getSubject_area','created','getLatestEdit', 'getLocation_country', 'getRawGroupEditing']:
550                    if compensating in keywords:
551                        found=True
552                        break
553            if not found:
554                keywords['Language']=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
555        if 'Language' in keywords:
556            if not keywords['Language']: # Delete empty 'Language'
557                del keywords['Language']
558            elif 'portal_type' in keywords: # If browsing for MemberFolders, ignore Language, use getLanguage_skills instead
559                if 'MemberFolder' in keywords['portal_type'] or 'GroupBlog' in keywords['portal_type']:
560                    keywords['getLanguage_skills']=keywords['Language']
561                    del keywords['Language']
562
563        for k in ['base','portfolio','RSS','empties']:
564            if k in keywords:
565                del keywords[k]
566        try:
567            results = self.searchResults(keywords)               
568        except ParseError:
569            results = []
570        if find_empty_values:
571            results = self.fastFindEmptyValues(results, find_empty_values)
572        return results
573
574    #### Fast catalog handling ###########################################
575
576    def wakeLazy(self, lazy):
577        new=[]
578        for l in lazy._seq:
579            if isinstance(l, Lazy):
580                new.extend(self.wakeLazy(l))
581            else:
582                new.append(l)
583        return new
584
585    def fastPick(self, lazy_results, top):
586        """ Takes Lazy results and picks one random metadata obj from top """
587        tries=4
588        choice=randint(0,top)
589        while tries: # since we don't know how long the list is we need to use trial and error
590            # to find if the random index has corresponding item
591            i=0
592            for item in lazy_results._seq:
593                if choice==i:
594                    return lazy_results._func(item)
595                i+=1
596            choice/=2
597            tries-=1
598        return lazy_results._func(item)
599
600    def fastLink(self, user):
601        """ Finds an user from catalog and returns a tuple containing nicename and link
602        because this gets asked so often and it is relatively expensive to build, we cache them in non-permanent dict here in catalog tool
603        """
604        if not hasattr(self, 'author_cache'):
605            self.author_cache={}
606        if not user in self.author_cache:
607            res=self.searchResults(Creator=user, portal_type='MemberFolder')
608            for l in res._seq:
609                authortuple= (self.getEntry('getNicename', l), self.getpath(l))
610                self.author_cache[user]=authortuple
611                return authortuple
612        else:
613            return self.author_cache[user]
614        return (user,'')       
615
616    def getEntry(self, index, key, empty=[]):
617        val=self._catalog.getIndex(index).getEntryForObject(key, empty)
618        if callable(val):
619            val=val()
620        return val       
621           
622    def fastLinks(self, results, limit):
623        """ Takes Lazy results and returns a list of tuples (title, url, portal_type) """
624        def safeData(index, key):
625            v=index.getEntryForObject(key, [])
626            if callable(v):
627                return v()
628            return v
629
630        def wakeUp(lazy, c):
631            new=[]
632            for l in lazy._seq:
633                if isinstance(l, Lazy):
634                    c,newer=wakeUp(l,c)
635                    new.extend(newer)
636                else:
637                    new.append((safeData(title_index, l), self.getpath(l), safeData(type_index, l)))
638                    c+=1
639                if c==limit:
640                    break
641            return c,new
642        title_index=self._catalog.getIndex('getNicename')
643        type_index=self._catalog.getIndex('portal_type')
644        c,new=wakeUp(results,0)       
645        return new
646
647    def fastMetadata(self, results, indexes, cut=0):
648        """ Takes Lazy results and returns a list of tuple for values from indexes.
649        Indexes is a string or a tuple of strings.
650        This is an order of magnitude faster than getting CatalogBrains for each object"""
651        c=0
652        def safeData(index, key):
653            v=index.getEntryForObject(key, [])
654            if callable(v):
655                return v()
656            return v
657
658        def wakeUp(lazy,c):
659            new=[]
660            if isinstance(lazy, list):
661                seq=lazy
662            else:
663                seq=lazy._seq
664            for l in seq:
665                if isinstance(l, Lazy):
666                    new.extend(wakeUp(l,c))
667                else:
668                    values=[]   
669                    for index in index_sources:
670                        if index:
671                            values.append(safeData(index, l))
672                        else:
673                            values.append(l)
674                    new.append(tuple(values))
675                    c+=1
676                    if c==cut:
677                        break
678            return new
679        def wakeUpFaster(lazy, c):
680            new=[]
681            if isinstance(lazy, list):
682                seq=lazy
683            else:
684                seq=lazy._seq
685            for l in seq:
686                if isinstance(l, Lazy):
687                    new.extend(wakeUpFaster(l,c))
688                else:
689                    new.append(index.getEntryForObject(l, []))
690                c+=1
691                if c==cut:
692                    break
693            return new
694        if isinstance(indexes, tuple) or isinstance(indexes, list):
695            index_sources=[ind!='rid' and self._catalog.getIndex(ind) for ind in indexes]
696            new=wakeUp(results,c)
697        else:
698            index=self._catalog.getIndex(indexes)
699            new=wakeUpFaster(results,c)
700        return new
701       
702    def fastCount(self, results, indexes):
703        """ Takes Lazy results and returns a dictionary or tuple of dictionaries for values and their counts from indexes.
704        Index is a string or tuple of strings.
705        This is an order of magnitude faster than getting CatalogBrains for each object"""
706
707        def safeData(index, key):
708            v=index.getEntryForObject(key, [])
709            if callable(v):
710                return v()
711            return v
712       
713        def wakeUpAndCount(lazy):
714            for l in lazy._seq:
715                if isinstance(l, Lazy):
716                    wakeUpAndCount(l)
717                else:
718                    values=[]   
719                    for dic, index in indexes:
720                        val=safeData(index, l)
721                        if val:
722                            if isinstance(val, list):
723                                for v in val:                       
724                                    dic[v[:50]]=dic.get(v[:50],0)+1
725                            elif val:
726                                val=str(val)[:50]
727                                dic[val]=dic.get(val,0)+1
728        def wakeUpFasterAndCount(lazy):
729            for l in lazy._seq:
730                if isinstance(l, Lazy):
731                    wakeUpFasterAndCount(l)
732                else:
733                    val=safeData(index, l)
734                    if isinstance(val, list):
735                        for v in val:                       
736                            dic[v[:50]]=dic.get(v[:50],0)+1
737                    elif val:
738                        val=str(val)[:50]
739                        dic[val]=dic.get(val,0)+1
740        index=None
741        if isinstance(indexes, tuple):
742            if not results:
743                return [{} for ind in indexes]
744            indexes=[({}, self._catalog.getIndex(ind)) for ind in indexes]
745            new=wakeUpAndCount(results)
746            return tuple([dic for dic,index in indexes])
747        else:
748            if not results:
749                return {}
750            index=self._catalog.getIndex(indexes)
751            dic={}
752            new=wakeUpFasterAndCount(results)
753            return dic
754
755
756    def fastFindEmptyValues(self, results, index_to_look):
757        """ Takes Lazy results and return those results where the given index returns empty"""
758
759        t=time.time()
760        new_results=[]
761
762        def safeData(index, key):
763            v=index.getEntryForObject(key, [])
764            if callable(v):
765                return v()
766            return v
767       
768        def wakeUpAndFilter(lazy):
769            for l in lazy._seq:
770                if isinstance(l, Lazy):
771                    wakeUpAndFilter(l)
772                else:
773                    val=safeData(index, l)
774                    if not val:
775                        new_results.append(l)
776        if not results:
777            return []
778        index=self._catalog.getIndex(index_to_look)
779        wakeUpAndFilter(results)
780        print time.time()-t
781        assert isinstance(results,LazyMap)
782        return LazyMap(results._func, new_results, len(new_results))
783
784
785    def getSomeMetadataForRID(self, rid, md_fields):
786        record = self._catalog.data[rid]
787        schema = self._catalog.schema
788        if isinstance(md_fields, (tuple, list)):
789            result = {}
790            for md in md_fields:
791                result[md]= record[schema[md]]
792            return result
793        else:
794            return record[schema[md_fields]]
795
796
797    ##############################      Clouds      ######################## 
798
799    def buildCloudData(self, results, request=None, size=8, portfolio=False):
800        """ Build tag cloud result tuples (name, tagsize, obj_url, tag_value, nicename) for given form from result set """
801        def adjustTag(val, steps=8):
802            # helper method to adjust hit count of this tag to relative size (1,...,8)
803            try:
804                val=int((steps*log(val-mincount,2))/log(maxcount-mincount,2))
805            except (OverflowError, ZeroDivisionError):
806                val=0
807            if not val:
808                val=1
809            return val
810        lemill_tool = getToolByName(self, 'lemill_tool')
811        if not results:
812            return []
813        if portfolio:
814            cloud_index='getTags'
815            mf=self.getMemberFolder()           
816            link_root='%s/portfolio?tags=' % mf.absolute_url()
817        else:       
818            src=dict(request.form.items())
819            if 'base' not in src:
820                return []
821            portal_url=getToolByName(self, 'portal_url')()
822            path=request['ACTUAL_URL'].split('/')
823            if 'content' in path:
824                link_base='/'.join((portal_url,'content','browse'))
825            elif 'methods' in path:
826                link_base='/'.join((portal_url,'methods','browse'))
827            elif 'tools' in path:
828                link_base='/'.join((portal_url,'tools','browse'))
829            elif 'community' in path:
830                link_base='/'.join((portal_url,'community','browse'))
831            else:
832                link_base='/'.join((portal_url,'browse'))       
833            lang_part=''
834            base=src['base']
835            language=src.get('language','')
836            type_restriction=src.get('type','')
837            if language:
838                lang_part='language=%s&' % language
839            if type_restriction:
840                lang_part+='type=%s&' % type_restriction
841            title_cloud=False
842            if base=='language':
843                link_root='%s?%slanguage=' % (link_base, lang_part)
844                cloud_index='Language'
845                if 'type' in src:
846                    if 'GroupBlog' in src['type'] or 'MemberFolder' in src['type']:
847                        cloud_index='getLanguage_skills'     
848            elif base=='tags':
849                link_root='%s?%stags=' % (link_base, lang_part)       
850                cloud_index='getTags'
851            elif base=='subject_area':
852                link_root='%s?%ssubject_area=' % (link_base, lang_part)       
853                cloud_index='getSubject_area'
854            elif base=='target_group':
855                link_root='%s?%starget_group=' % (link_base, lang_part)       
856                cloud_index='getTarget_group'
857            elif base=='country':
858                link_root='%s?%scountry=' % (link_base, lang_part)
859                cloud_index='getLocation_country'
860            elif base=='title':
861                resultlist=self.fastMetadata(results, ('sortable_title','getScore','getNicename', 'path'), cut=100)
862                if not resultlist:
863                    return []
864                maxcount=resultlist[0][1]
865                mincount=resultlist[-1][1]             
866                resultlist.sort()
867                resultlist=[(x[2], adjustTag(x[1], steps=6), x[3]) for x in resultlist if x[2]]
868                return resultlist
869        hits=self.fastCount(results, cloud_index)
870        if not hits:
871            return []
872        resultlist=zip(hits.values(),hits.keys())
873        resultlist.sort(reverse=True)
874        maxcount=resultlist[0][0] # first!
875        resultlist = resultlist[:100]
876        mincount=resultlist[-1][0] # last!
877        # At this point resultlist= [(number_of_hits, tag_text),...]
878        # It should end up as: [(displayed_text, number_of_hits, link_url)...]
879        if cloud_index=='Language' or cloud_index=='getLanguage_skills':
880            resultlist=[(lemill_tool.getPrettyLanguage(x[1]), adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
881        elif cloud_index=='getSubject_area':
882            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
883        elif cloud_index=='getTarget_group':
884            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
885        else:
886            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
887        resultlist.sort()
888        return resultlist
889
890
891    ########### Front page top lists ############
892   
893    def getTopFive(self, results=None, key_index=None, link_body='', clean=False, cache_key=''):
894        """ Returns top five results for key_index (getTags, getSubject_area...) for certain language """
895        if clean or not hasattr(self, 'top5results'):
896            self.top5results={'content':{}, 'methods':{}, 'tools':{}, 'community':{}}
897        data=self.top5results
898        cache_key=cache_key or key_index
899        path=self.REQUEST['ACTUAL_URL'].split('/')
900        for section in ['content','methods','tools','community',None]:
901            if section in path:
902                break
903        if not section:
904            return []
905        ilanguage=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
906        if cache_key in data[section]:
907            top5lists=data[section][cache_key]
908        else:
909            top5lists={}
910            data[section][cache_key]=top5lists
911        if ilanguage in top5lists:
912            return top5lists[ilanguage]
913        else:
914            tops=self.fastCount(results, key_index)
915            resultlist=zip(tops.values(),tops.keys())
916            resultlist.sort(reverse=True)
917            resultlist=resultlist[:5]
918            if key_index=='getSubject_area':
919                resultlist=[(x[1], ''.join((link_body, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
920            elif key_index=='getTarget_group':
921                resultlist=[(x[1], ''.join((link_body, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
922            else:
923                resultlist=[(x[1], ''.join((link_body, x[1]))) for x in resultlist]
924            top5lists[ilanguage]=resultlist
925            return resultlist
926
927    ############################# batch ##########################
928
929    def batch(self, results=None, request=None):
930        """ Use Plone's batch """
931        if request and 'b_start' in request.form:
932            try:
933                b_start=int(request.form['b_start'])
934            except ValueError:
935                b_start=0
936        else:
937            b_start=0 
938        b= Batch(results, 30, b_start, orphan=1)   
939        return b 
940
941    def batchBaseUrl(self):
942        """ Keep all other parametres as they are, but add or change 'b_start' """
943        form=self.REQUEST.form
944        if 'b_start' in form:
945            del form['b_start']
946        last_url='?'.join((self.REQUEST.ACTUAL_URL, urlencode(form)))
947        new= last_url+'&b_start='
948        return new
949       
950
951#
952#    def getTagCloud(self, search_results, index_type):
953#        """ Build a cloud based on how many occurences of this item are in results """
954#        if not search_results:
955#            return []
956#        lemill_tool = getToolByName(self, 'lemill_tool')
957#        pc = getToolByName(self, 'portal_catalog')
958#        from math import log
959#        maxcount=0
960#
961#        hits={}
962#        hits=pc.fastCount(search_results, index_type)
963#        resultlist=zip(hits.values(),hits.keys())
964#        if not resultlist:
965#            return []
966#        resultlist.sort()
967#        resultlist.reverse()
968#        maxcount=resultlist[0][0] # first!
969#        # if the first cut score for tag is x, we want to cut off all of the tags with score x.
970#        if len(resultlist)>100:
971#            #cutpoint = [x[0] for x in resultlist].index(resultlist[100]) can't figure this now, fix later
972#            cutpoint = 100
973#            resultlist = resultlist[:cutpoint]
974#        mincount=resultlist[-1][0]
975#        resultlist=[(x[1], x[0], '',x[1],x[1]) for x in resultlist]
976#
977#        # adjust to 1-8. We don't have to worry about score 0, they're already removed.
978#        if maxcount>1:
979#            resultlist=map(adjust, resultlist)
980#        # prettify language names
981#        if index_type=='Language' or index_type=='getLanguage_skills':
982#            resultlist=[(x[0],x[1],x[2],x[3],lemill_tool.getPrettyLanguage(x[4])) for x in resultlist]           
983#        if index_type=='getTarget_group':
984#            def compfunc(t2,t1):
985#                if t2[0] in TARGET_GROUP and t1[0] in TARGET_GROUP:
986#                    return  TARGET_GROUP.index(t2[0]) - TARGET_GROUP.index(t1[0])
987#                else:
988#                    return -1
989#            resultlist.sort(cmp=compfunc)
990#        else:   
991#            resultlist.sort()
992#        return resultlist
993#       
994#
995#    def getTitleCloud(self, search_results, browse_type):
996#        """ Build a cloud based on popularity score for that resource """
997#        pc=getToolByName(self,'portal_catalog')
998#        # uniquetuplelist contains result metadata reordered: (sort_title, count, url, indexvalue, title)
999#        if not search_results:
1000#            return []
1001#
1002#        def isDefaultTitle(x):
1003#            """ some heuristic to recognize default titles """
1004#            return re.match(r'.*\.(...)$', x) or re.match(r'.*\.(....)$', x)
1005#                   
1006#        popularity = pc.fastMetadata(search_results, ('getScore','rid','getNicename','sortable_title'))
1007#        popularity.sort(reverse=True)
1008#        popularity=popularity[:100]
1009#        titlecloud=[(sortable_title, getScore, self.REQUEST.physicalPathToURL(pc.getpath(rid)), sortable_title, getNicename or sortable_title) for (getScore, rid, getNicename, sortable_title) in popularity if sortable_title]
1010#       
1011#        if not titlecloud:
1012#            return []
1013#        titlecloud.sort()
1014#        maxscore=max([x[1] for x in titlecloud])
1015#        if maxscore>1:
1016#            titlecloud=map(adjust, titlecloud)
1017#        return titlecloud
1018
1019
1020
1021
1022
1023CatalogTool.__doc__ = PloneCatalogTool.__doc__
1024
1025InitializeClass(CatalogTool)
Note: See TracBrowser for help on using the repository browser.