source: trunk/LeMillCatalogTool.py @ 3055

Revision 3055, 38.6 KB checked in by jukka, 9 years ago (diff)

Recent views now sort by edit/created date and display the time to last change/creation.

Line 
1#
2# Plone CatalogTool simplified -- does not use ExtensibleIndexableObjectWrapper, workflows or permissions. Basically just ZCatalog posing as Plone Tool, allowing but ignoring plonish commands.
3#
4
5from Products.CMFPlone import ToolNames
6from AccessControl import ClassSecurityInfo
7from Globals import InitializeClass
8from Globals import DTMLFile
9
10from Products.ZCatalog.ZCatalog import ZCatalog
11from Products.CMFPlone.CatalogTool import CatalogTool as PloneCatalogTool
12from Products.CMFPlone.PloneBatch import Batch
13from Products.PythonScripts.standard import urlencode
14
15from zope.interface import implements
16
17from Products.CMFCore.utils import SimpleRecord, getToolByName
18from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
19from Products.ZCTextIndex.Lexicon import CaseNormalizer
20from Products.ZCTextIndex.Lexicon import Splitter
21from Products.ZCTextIndex.Lexicon import StopWordRemover
22
23from Products.CMFPlone.UnicodeSplitter import Splitter as UnicodeSplitter
24from Products.CMFPlone.UnicodeSplitter import CaseNormalizer as UnicodeCaseNormalizer
25from Products.ZCTextIndex.ZCTextIndex import PLexicon
26from Products.ZCatalog.Lazy import Lazy
27from random import randint
28from config import CONTENT_TYPES, MATERIAL_TYPES, ACTIVITY_TYPES, TOOLS_TYPES, LANGUAGES, LANGUAGES_DICT, TYPE_NAMES, SUBJECT_AREAS_DICT, TARGET_GROUPS_DICT, SUBJECT_AREAS_INVERSE_DICT, TARGET_GROUPS_INVERSE_DICT, COMMUNITY_TYPES, SEARCHABLE_TYPES, DEFAULT_ICONS, TARGET_GROUP
29import time
30from math import log
31
32class CatalogTool(PloneCatalogTool):
33
34    meta_type = 'LeMill Catalog Tool'
35    security = ClassSecurityInfo()
36    toolicon = 'skins/lemill/tool.gif'
37
38    __implements__ = PloneCatalogTool.__implements__
39
40   
41    #XXX START
42    # This is a copy from CMFCore/CatalogTool.py file, made several modifications so that it would do as we want
43    security.declarePublic( 'enumerateIndexes' ) # Subclass can call
44    def enumerateIndexes( self ):
45        #   Return a list of ( index_name, type, extra ) tuples for the initial
46        #   index set.
47        #   Creator is deprecated and may go away, use listCreators!
48        #   meta_type is deprecated and may go away, use portal_type!
49        plaintext_extra = SimpleRecord( lexicon_id='plaintext_lexicon'
50                                      , index_type='Okapi BM25 Rank'
51                                      )
52        htmltext_extra = SimpleRecord( lexicon_id='htmltext_lexicon'
53                                     , index_type='Okapi BM25 Rank'
54                                     )
55        # Adding plone_lexicon as it should be, no nee dto change the name for our own >>> safer that way
56        plone_extra = SimpleRecord( lexicon_id='plone_lexicon'
57                                  , index_type='Okapi BM25 Rank'
58                                  )
59
60        return ( ('Title', 'ZCTextIndex', plone_extra)
61               , ('Subject', 'KeywordIndex', None)
62               , ('Description', 'ZCTextIndex', plone_extra)
63               , ('Creator', 'FieldIndex', None)
64               , ('listCreators', 'KeywordIndex', None)
65               , ('SearchableText', 'ZCTextIndex', plone_extra)
66               , ('Date', 'DateIndex', None)
67               , ('Type', 'FieldIndex', None)
68               , ('created', 'DateIndex', None)
69               , ('effective', 'DateIndex', None)
70               , ('expires', 'DateIndex', None)
71               , ('modified', 'DateIndex', None)
72               , ('allowedRolesAndUsers', 'KeywordIndex', None)
73               , ('review_state', 'FieldIndex', None)
74               , ('in_reply_to', 'FieldIndex', None)
75               , ('meta_type', 'FieldIndex', None)
76               , ('getId', 'FieldIndex', None)
77               , ('path', 'PathIndex', None)
78               , ('portal_type', 'FieldIndex', None)
79               )
80
81    security.declarePublic('enumerateLexicons')
82    def enumerateLexicons(self):
83        # Creating our own lexicon in a different way, as the automatic creation seems to be needing something to act as a StopWordRemover and fails miserably without it.
84        lexicon = PLexicon('plone_lexicon', '', UnicodeSplitter(), UnicodeCaseNormalizer())
85        self._setObject('plone_lexicon', lexicon)
86        return (
87                 ( 'plaintext_lexicon'
88                 , Splitter()
89                 , CaseNormalizer()
90                 , StopWordRemover()
91                 )
92               , ( 'htmltext_lexicon'
93                 , HTMLWordSplitter()
94                 , CaseNormalizer()
95                 , StopWordRemover()
96                 )
97               )
98    #XXX END
99
100    def catalog_object(self, object, uid, idxs=[],
101                       update_metadata=1, pghandler=None):
102        if object and hasattr(object, 'portal_type') and object.portal_type!='CollectionsFolder':
103            ZCatalog.catalog_object(self, object, uid, idxs, update_metadata, pghandler=pghandler)
104
105    def searchResults(self, REQUEST=None, **kw):
106        """Calls ZCatalog.searchResults """
107        return ZCatalog.searchResults(self, REQUEST, **kw)
108
109    __call__ = searchResults
110
111
112    def titleSearch(self, title='', sort_limit=0):
113        """ search titles containing given string """
114        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
115        if sort_limit:
116            query['sort_limit']=sort_limit
117        query['Title']='%s*' % title
118        results=self.searchResults(query)
119        return results
120
121    def fulltextSearch(self, SearchableText='', sort_limit=0):
122        """ search fulltext for a string """
123        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
124        if sort_limit:
125            query['sort_limit']=sort_limit
126        query['SearchableText']='%s*' % SearchableText
127        results=self.searchResults(query)
128        return results
129
130    def buildResultsFromBatch(self, batch):
131        t=time.time()
132        d_list=[]
133        lutool=getToolByName(self, 'lemill_usertool')
134        ltool=getToolByName(self, 'lemill_tool')
135        url_base=getToolByName(self, 'portal_url')()
136        created=False
137        edited=False
138        if hasattr(self, 'REQUEST') and hasattr(self.REQUEST, 'form'):
139            form=self.REQUEST.form
140            if form:
141                created= 'created' in form
142                edited='edited' in form
143        if hasattr(batch._sequence, '_seq'):
144            print 'using fast metadata to build results'
145            seq=list(batch._sequence._seq[batch.start-1:batch.end])
146            keys=['rid','getHasCoverImage','getNicename','portal_type','getLocation_country','getTags', 'Language','listCreators']
147            if created:
148                keys.append('created')
149            elif edited:
150                keys.append('getLatestEdit')
151            md=self.fastMetadata(seq, keys)
152            for item in md:
153                if item[3] not in TYPE_NAMES:
154                    print 'rejected item:', item
155                    raise hell
156                    continue
157                d={'url':self.getpath(item[0]),
158                    'coverimage_url':item[1],
159                    'title':item[2],
160                    'readable_type':item[3],
161                    'country':item[4],
162                    'tags':item[5],
163                    'language':item[6],
164                    'authors':item[7]}
165                if created:
166                    d['created']=item[8] # These are in weird DateIndex format, difficult to map to actual dates
167                elif edited:
168                    d['edited']=item[8]
169                d_list.append(d)
170            if created or edited:
171                now=time.gmtime() # mirrors DateIndex's conversion script to provide a compatible 'now'
172                now= ( ( ( ( now[0] * 12 + now[1] ) * 31 + now[2] ) * 24 + now[3] ) * 60 + now[4] )
173                useDateIndex=True                         
174        else: # batch has already been converted to catalog Brains object
175            print 'using Brains objects to build results'
176            for item in batch:               
177                d={'url':item.getURL(),
178                    'coverimage_url':item.getHasCoverImage,
179                    'title':item.getNicename,
180                    'readable_type':item.portal_type,
181                    'country':item.getLocation_country,
182                    'tags':item.getTags,
183                    'language':item.Language,
184                    'authors':item.listCreators}                 
185                if created:
186                    d['created']=item.created
187                elif edited:
188                    d['edited']=item.getLatestEdit
189                d_list.append(d)
190            now=time.time()
191            useDateIndex=False
192        tag_base='/'.join((url_base,'search?index_type=tags&q='))
193        lang_base='/'.join((url_base,'search?language='))
194        country_base='/'.join((url_base,'community/browse?country='))
195       
196        # Manipulate raw index data to something more useful
197        for item in d_list:
198            if callable(item['title']):
199                item['title']=item['title']()
200            url=item['url']
201            if item['coverimage_url']:
202                item['coverimage_url']='/'.join((url,'coverImage'))
203            else:
204                item['coverimage_url']='/'.join((url_base, DEFAULT_ICONS[item['readable_type']]))
205            ll=item['language']
206            if ll:
207                item['language']=LANGUAGES_DICT[ll]
208                item['language_link']=''.join((lang_base,ll))
209            item['tags']=[(tag, ''.join((tag_base, tag))) for tag in item['tags']]
210            item['authors']=[self.fastLink(author) for author in item['authors']]
211            if item['country']:         
212                item['country_link']=''.join((country_base,item['country']))
213            item['readable_type']=TYPE_NAMES.get(item['readable_type'], None)[0]
214            if 'created' in item:
215                item['timedif']=ltool.getTimeDifference(item['created'], now=now, useDateIndex=useDateIndex)
216            elif 'edited' in item:
217                item['timedif']=ltool.getTimeDifference(item['edited'], now=now, useDateIndex=useDateIndex)
218        print 'result batch creation took: ',time.time()-t
219        return d_list
220
221
222    def createBrowsingOptions(self, REQUEST, results=None):
223        """takes search results as input and returns a dictionary that tells what values there are available for each selection box and how many results there are of each value."""
224        t=time.time()
225        src=dict(REQUEST.form.items())
226        print src
227        # section determines what kinds of options there are:
228        path=REQUEST['ACTUAL_URL'].split('/')
229        force_language=True
230
231        if 'content' in path:
232            do_languages=True
233            do_subject_areas=True
234            do_target_groups=True
235            do_types=True
236            type_selection=CONTENT_TYPES
237            do_edited=True
238            do_tags=True
239            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
240        elif 'methods' in path or 'tools' in path:
241            do_languages=True
242            do_subject_areas=False
243            do_target_groups=False
244            do_types=False
245            do_edited=True
246            do_tags=True
247            count_indexes=('Language',)
248        elif 'community' in path:
249            do_languages=True
250            do_subject_areas=True
251            do_target_groups=False
252            do_types=True
253            type_selection=COMMUNITY_TYPES
254            do_edited=True
255            do_tags=True
256            count_indexes=('Language','portal_type','getSubject_area')
257        else:
258            do_languages=True
259            force_language=False
260            do_subject_areas=True
261            do_target_groups=True
262            do_types=True
263            type_selection=SEARCHABLE_TYPES
264            do_edited=True
265            do_tags=False
266            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
267       
268        d={'do_languages':do_languages, 'do_subject_areas':do_subject_areas,'do_target_groups':do_target_groups,'do_types':do_types,'do_edited':do_edited, 'do_tags':do_tags, 'q':'', 'state':'', 'author':'', 'created':''}
269
270
271        # now see if the results should be counted and provide a dictionary of counted values if necessary
272        if results and len(results)<1000:
273            count_results= dict(zip(count_indexes, self.fastCount(results, count_indexes)))
274            count=len(results)
275        else:
276            count=0
277
278        # Languages: two lists, one for probable options and other for improbable
279        if do_languages:
280            all_languages=list(LANGUAGES)[1:] # Remove 'language neutral' from options
281            all_languages[0]=('','any language')
282            all_language_codes=[l[0] for l in all_languages]
283            lang_dict = getToolByName(self, 'lemill_tool').language_dict
284            common_language_codes=getToolByName(self,'lemill_usertool').getLanguages()
285            selected_language= src.get('language','')
286            was_empty=not selected_language
287            if was_empty:
288                if force_language:
289                    selected_language=common_language_codes[0]
290                else:
291                    selected_language='all'
292            common_languages=[]
293            rare_languages=[('all','All',int(selected_language=='all'),0)]
294            if count and was_empty and False:
295                langs=count_results['Language']
296                for langcode in common_language_codes:
297                    if langcode and langcode in langs:
298                        common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
299                        all_language_codes.remove(langcode)           
300                for langcode in all_language_codes:
301                    if langcode and langcode in langs:
302                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
303            else:
304                for langcode in common_language_codes:
305                    common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
306                    all_language_codes.remove(langcode)           
307                for langcode in all_language_codes:
308                    if langcode:
309                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
310            d['common_languages']=common_languages
311            d['rare_languages']=rare_languages
312            d['lang_disabled']=False # portal_type==Piece can set this to true
313            if selected_language!='all':
314                d['lang_filter']='language=%s&' % selected_language
315            else:
316                d['lang_filter']=''
317        if do_subject_areas:
318            selected=src.get('subject_area','')
319            if selected:
320                all_count='?'
321            else:
322                all_count=count
323            subject_areas=[('','All',int(not selected), all_count)]
324            if count:
325                counts=count_results['getSubject_area']
326                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
327                    if sa_full in counts:
328                        subject_areas.append((sa_key, sa_full, int(selected==sa_key), counts[sa_full]))
329            else:
330                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
331                    subject_areas.append((sa_key, sa_full, int(selected==sa_key), 0))
332
333            d['subject_area']=subject_areas
334            d['subjs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
335        if do_target_groups:
336            selected=src.get('target_group','')
337            if selected:
338                all_count='?'
339            else:
340                all_count=count
341            target_groups=[('','All',int(not selected),all_count)]
342            if count:
343                counts=count_results['getTarget_group']
344                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
345                    if tg_full in counts:
346                        target_groups.append((tg_key, tg_full, int(selected==tg_key), counts[tg_full]))
347            else:
348                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
349                    target_groups.append((tg_key, tg_full, int(selected==tg_key), 0))
350            d['target_group']=target_groups
351            d['targs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
352        if do_types:
353            selected=src.get('type','')
354            if selected:
355                all_count='?'
356            else:
357                all_count=count
358            types=[('','All',int(not selected),all_count)]
359            if count:
360                counts=count_results['portal_type']
361                for type_key in type_selection:
362                    if type_key in counts:
363                        types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), counts[type_key]))
364            else:
365                for type_key in type_selection:
366                    types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), 0))
367            d['type']=types
368            if selected:
369                if selected in ['Piece','Activity','Tool']:
370                    d['targs_disabled']=True
371                    d['subjs_disabled']=True
372                if selected=='Piece':
373                    d['lang_disabled']=True
374
375        if do_edited:
376            try:
377                selected=int(src.get('edited',0))
378            except ValueError:
379                selected=0
380            d['edited']=[('','Any time',int(not selected),0), (365,'Last year',int(selected==365),0), (30,'Last month',int(selected==30),0), (7,'Last week',int(selected==7),0), (1,'Yesterday',int(selected==1),0)]
381        # Previous search term
382        if 'q' in src:
383            d['q']=src['q']
384        if 'state' in src:
385            d['state']=src['state']
386        if 'author' in src:
387            d['author']=src['author']           
388               
389        print 'creating browsing options took', time.time()-t
390        return d
391
392
393    def decideBrowsingSubType(self):
394        """ Browse page header needs to know what kind of browsing is going on """
395        form=self.REQUEST.form       
396        if 'state' in form:
397            if form['state']=='public': return 'published'
398            if form['state']=='draft': return 'drafts'
399        path=self.REQUEST['ACTUAL_URL'].split('/')
400        for key in ['content','methods','tools','community']:
401            if key in path: return key
402        return ''           
403
404    def decideSearchSubType(self):
405        """ Search page header needs to know what kind of search is going on """
406        form=self.REQUEST.form       
407        if 'created' in form:
408            if 'type' in form and form['type']=='MemberFolder':
409                return 'new_members'
410            else:
411                return 'new_resources'
412        if 'edited' in form and 'q' not in form:
413            return 'recent_edits'
414        return 'search'
415
416
417    def browsingSearch(self, REQUEST=None, **kw):       
418        """ this search tries to use arguments from browsing form """
419        print 'browsing search called'
420        t=time.time()
421        src=REQUEST.form
422        if '-C' in src:
423            del src['-C']
424        if not (src or kw):
425            path=REQUEST['ACTUAL_URL'].split('/')
426            if 'browse' in path or 'search' in path or 'cloud' in path:
427                print 'empty search, return []'
428                return []
429        keywords={'sort_on':'getScore','sort_order':'reverse','getState':('draft', 'public')}
430        keywords.update(src)
431        keywords.update(kw)
432        if 'language' in keywords:
433            if keywords['language']=='all':
434                keywords['Language']=''
435            else:           
436                keywords['Language']=keywords['language']
437            del keywords['language']
438        if 'subject_area' in keywords:
439            if keywords['subject_area'] in SUBJECT_AREAS_DICT:
440                keywords['getSubject_area']=SUBJECT_AREAS_DICT[keywords['subject_area']]
441            del keywords['subject_area']
442        if 'target_group' in keywords:
443            if keywords['target_group'] in TARGET_GROUPS_DICT:
444                keywords['getTarget_group']=TARGET_GROUPS_DICT[keywords['target_group']]
445            del keywords['target_group']
446        if 'tags' in keywords:
447            keywords['getTags']=keywords['tags']
448            del keywords['tags']
449        if 'author' in keywords:
450            keywords['listCreators']=keywords['author']
451            del keywords['author']
452        if 'state' in keywords:
453            keywords['getState']=keywords['state']
454            del keywords['state']
455        if 'type' in keywords:
456            if keywords['type']=='lr':
457                keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool']
458            else:
459                keywords['portal_type']=keywords['type']
460            del keywords['type']
461        else: # portal_type is determined by location
462            path=REQUEST['ACTUAL_URL'].split('/')
463            if 'content' in path:
464                keywords['portal_type']=CONTENT_TYPES
465            elif 'methods' in path:
466                keywords['portal_type']='Activity'
467            elif 'tools' in path:
468                keywords['portal_type']='Tool'
469            elif 'community' in path:
470                keywords['portal_type']=COMMUNITY_TYPES
471            else:
472                keywords['portal_type']=SEARCHABLE_TYPES
473        if 'q' in keywords:
474            keywords['SearchableText']=keywords['q']
475            del keywords['q']
476        if 'created' in keywords:
477            keywords['created']={'query': self.ZopeTime()-int(keywords['created']), 'range':'min'}
478            keywords['sort_on']='created'
479        if 'edited' in keywords:
480            keywords['getLatestEdit']={'query': self.ZopeTime()-int(keywords['edited']), 'range':'min'}
481            keywords['sort_on']='getLatestEdit'
482            del keywords['edited']
483        if not 'Language' in keywords:
484            found=False
485            if 'base' in keywords:
486                if keywords['base']=='language':
487                    found=True
488            for compensating in ['SearchableText','listCreators','getTarget_group','getTags','getSubject_area','created','getLatestEdit']:
489                if compensating in keywords:
490                    found=True
491                    break
492            if not found:
493                keywords['Language']=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
494        if 'Language' in keywords:
495            if not keywords['Language']:
496                del keywords['Language']
497        print 'keywords:', keywords
498        try:
499            results = self.searchResults(keywords)               
500        except ParseError:
501            results = []
502        print 'browsing search:', time.time()-t
503        return results
504
505    #### Fast catalog handling ###########################################
506
507    def wakeLazy(self, lazy):
508        new=[]
509        for l in lazy._seq:
510            if isinstance(l, Lazy):
511                new.extend(self.wakeLazy(l))
512            else:
513                new.append(l)
514        return new
515
516    def fastPick(self, lazy_results, top):
517        """ Takes Lazy results and picks one random metadata obj from top """
518        tries=4
519        choice=randint(0,top)
520        while tries: # since we don't know how long the list is we need to use trial and error
521            # to find if the random index has corresponding item
522            i=0
523            for item in lazy_results._seq:
524                if choice==i:
525                    return lazy_results._func(item)
526                i+=1
527            choice/=2
528            tries-=1
529        return lazy_results._func(item)
530
531    def fastLink(self, user):
532        """ Finds an user from catalog and returns a tuple containing nicename and link
533        because this gets asked so often and it is relatively expensive to build, we cache them in non-permanent dict here in catalog tool
534        """
535        if not hasattr(self, 'author_cache'):
536            self.author_cache={}
537        if not user in self.author_cache:
538            res=self.searchResults(Creator=user, portal_type='MemberFolder')
539            for l in res._seq:
540                authortuple= (self.getEntry('getNicename', l), self.getpath(l))
541                self.author_cache[user]=authortuple
542                return authortuple
543        else:
544            return self.author_cache[user]
545        return None       
546
547    def getEntry(self, index, key, empty=[]):
548        val=self._catalog.getIndex(index).getEntryForObject(key, empty)
549        if callable(val):
550            val=val()
551        return val       
552           
553    def fastLinks(self, results, limit):
554        """ Takes Lazy results and returns a list of tuples (title, url, portal_type) """
555        def safeData(index, key):
556            v=index.getEntryForObject(key, [])
557            if callable(v):
558                return v()
559            return v
560
561        def wakeUp(lazy, c):
562            new=[]
563            for l in lazy._seq:
564                if isinstance(l, Lazy):
565                    c,newer=wakeUp(l,c)
566                    new.extend(newer)
567                else:
568                    new.append((safeData(title_index, l), self.getpath(l), safeData(type_index, l)))
569                    c+=1
570                if c==limit:
571                    break
572            return c,new
573        title_index=self._catalog.getIndex('getNicename')
574        type_index=self._catalog.getIndex('portal_type')
575        c,new=wakeUp(results,0)       
576        return new
577
578    def fastMetadata(self, results, indexes, cut=0):
579        """ Takes Lazy results and returns a list of tuple for values from indexes.
580        Indexes is a string or a tuple of strings.
581        This is an order of magnitude faster than getting CatalogBrains for each object"""
582        c=0
583        def safeData(index, key):
584            v=index.getEntryForObject(key, [])
585            if callable(v):
586                return v()
587            return v
588
589        def wakeUp(lazy,c):
590            new=[]
591            if isinstance(lazy, list):
592                seq=lazy
593            else:
594                seq=lazy._seq
595            for l in seq:
596                if isinstance(l, Lazy):
597                    new.extend(wakeUp(l,c))
598                else:
599                    values=[]   
600                    for index in index_sources:
601                        if index:
602                            values.append(safeData(index, l))
603                        else:
604                            values.append(l)
605                    new.append(tuple(values))
606                    c+=1
607                    if c==cut:
608                        break
609            return new
610        def wakeUpFaster(lazy, c):
611            new=[]
612            if isinstance(lazy, list):
613                seq=lazy
614            else:
615                seq=lazy._seq
616            for l in seq:
617                if isinstance(l, Lazy):
618                    new.extend(wakeUpFaster(l,c))
619                else:
620                    new.append(index.getEntryForObject(l, []))
621                c+=1
622                if c==cut:
623                    break
624            return new
625        if isinstance(indexes, tuple) or isinstance(indexes, list):
626            index_sources=[ind!='rid' and self._catalog.getIndex(ind) for ind in indexes]
627            new=wakeUp(results,c)
628        else:
629            index=self._catalog.getIndex(indexes)
630            new=wakeUpFaster(results,c)
631        return new
632       
633    def fastCount(self, results, indexes):
634        """ Takes Lazy results and returns a dictionary or tuple of dictionaries for values and their counts from indexes.
635        Index is a string or tuple of strings.
636        This is an order of magnitude faster than getting CatalogBrains for each object"""
637
638        def safeData(index, key):
639            v=index.getEntryForObject(key, [])
640            if callable(v):
641                return v()
642            return v
643       
644        def wakeUpAndCount(lazy):
645            for l in lazy._seq:
646                if isinstance(l, Lazy):
647                    wakeUpAndCount(l)
648                else:
649                    values=[]   
650                    for dic, index in indexes:
651                        val=safeData(index, l)
652                        if val:
653                            if isinstance(val, list):
654                                for v in val:                       
655                                    dic[v[:50]]=dic.get(v[:50],0)+1
656                            elif val:
657                                val=str(val)[:50]
658                                dic[val]=dic.get(val,0)+1
659        def wakeUpFasterAndCount(lazy):
660            for l in lazy._seq:
661                if isinstance(l, Lazy):
662                    wakeUpFasterAndCount(l)
663                else:
664                    val=safeData(index, l)
665                    if isinstance(val, list):
666                        for v in val:                       
667                            dic[v[:50]]=dic.get(v[:50],0)+1
668                    elif val:
669                        val=str(val)[:50]
670                        dic[val]=dic.get(val,0)+1
671        index=None
672        if isinstance(indexes, tuple):
673            if not results:
674                return [{} for ind in indexes]
675            indexes=[({}, self._catalog.getIndex(ind)) for ind in indexes]
676            new=wakeUpAndCount(results)
677            return tuple([dic for dic,index in indexes])
678        else:
679            if not results:
680                return {}
681            index=self._catalog.getIndex(indexes)
682            dic={}
683            new=wakeUpFasterAndCount(results)
684            return dic
685
686    def getSomeMetadataForRID(self, rid, md_fields):
687        record = self._catalog.data[rid]
688        schema = self._catalog.schema
689        if isinstance(md_fields, (tuple, list)):
690            result = {}
691            for md in md_fields:
692                result[md]= record[schema[md]]
693            return result
694        else:
695            return record[schema[md_fields]]
696
697
698    ##############################      Clouds      ######################## 
699
700    def buildCloudData(self, results, request=None):
701        """ Build tag cloud result tuples (name, tagsize, obj_url, tag_value, nicename) for given form from result set """
702        def adjustTag(val, steps=8):
703            # helper method to adjust hit count of this tag to relative size (1,...,8)
704            try:
705                val=int((8*log(val-mincount,2))/log(maxcount-mincount,2))
706            except (OverflowError, ZeroDivisionError):
707                val=0
708            if not val:
709                val=1
710            return val
711        lemill_tool = getToolByName(self, 'lemill_tool')
712        if not results:
713            return []       
714        src=dict(request.form.items())
715        if 'base' not in src:
716            return []
717        portal_url=getToolByName(self, 'portal_url')()
718        path=request['ACTUAL_URL'].split('/')
719        if 'content' in path:
720            link_base='/'.join((portal_url,'content','browse'))
721        elif 'methods' in path:
722            link_base='/'.join((portal_url,'methods','browse'))
723        elif 'tools' in path:
724            link_base='/'.join((portal_url,'tools','browse'))
725        elif 'community' in path:
726            link_base='/'.join((portal_url,'community','browse'))
727        else:
728            link_base='/'.join((portal_url,'browse'))       
729        lang_part=''
730        base=src['base']
731        language=src.get('language','')
732        if language:
733            lang_part='language=%s&' % language
734        title_cloud=False
735        if base=='language':
736            link_root='%s?language=' % link_base       
737            cloud_index='Language'
738        elif base=='tags':
739            link_root='%s?%stags=' % (link_base, lang_part)       
740            cloud_index='getTags'
741        elif base=='subject_area':
742            link_root='%s?%ssubject_area=' % (link_base, lang_part)       
743            cloud_index='getSubject_area'
744        elif base=='target_group':
745            link_root='%s?%starget_group=' % (link_base, lang_part)       
746            cloud_index='getTarget_group'
747        elif base=='title':
748            resultlist=self.fastMetadata(results, ('sortable_title','getScore','getNicename', 'path'), cut=100)
749            maxcount=resultlist[0][1]
750            mincount=resultlist[-1][1]             
751            resultlist.sort()
752            resultlist=[(x[2], adjustTag(x[1], steps=6), x[3]) for x in resultlist if x[2]]
753            return resultlist
754           
755        hits=self.fastCount(results, cloud_index)
756        resultlist=zip(hits.values(),hits.keys())
757        resultlist.sort(reverse=True)
758        maxcount=resultlist[0][0] # first!
759        resultlist = resultlist[:100]
760        mincount=resultlist[-1][0] # last!
761        # At this point resultlist= [(number_of_hits, tag_text),...]
762        # It should end up as: [(displayed_text, number_of_hits, link_url)...]
763        if cloud_index=='Language' or cloud_index=='getLanguage_skills':
764            resultlist=[(lemill_tool.getPrettyLanguage(x[1]), adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
765        elif cloud_index=='getSubject_area':
766            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
767        elif cloud_index=='getTarget_group':
768            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
769        else:
770            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
771        resultlist.sort()
772        return resultlist
773
774    ########### Front page top lists ############
775   
776    def getTopFive(self, results=None, key_index=None, link_body='', clean=False):
777        """ Returns top five results for key_index (getTags, getSubject_area...) for certain language """
778        if clean or not hasattr(self, 'top5results'):
779            self.top5results={'content':{}, 'methods':{}, 'tools':{}, 'community':{}}
780        data=self.top5results
781        path=self.REQUEST['ACTUAL_URL'].split('/')
782        for section in ['content','methods','tools','community',None]:
783            if section in path:
784                break
785        if not section:
786            return []
787        ilanguage=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
788        if key_index in data[section]:
789            top5lists=data[section][key_index]
790        else:
791            top5lists={}
792            data[section][key_index]=top5lists
793        if ilanguage in top5lists:
794            return top5lists[ilanguage]
795        else:
796            tops=self.fastCount(results, key_index)
797            resultlist=zip(tops.values(),tops.keys())
798            resultlist.sort(reverse=True)
799            resultlist=resultlist[:5]
800            if key_index=='getSubject_area':
801                resultlist=[(x[1], ''.join((link_body, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
802            elif key_index=='getTarget_group':
803                resultlist=[(x[1], ''.join((link_body, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
804            else:
805                resultlist=[(x[1], ''.join((link_body, x[1]))) for x in resultlist]
806            top5lists[ilanguage]=resultlist
807            print top5lists
808            return resultlist
809
810    ############################# batch ##########################
811
812    def batch(self, results=None, request=None):
813        """ Use Plone's batch """
814        if request and 'b_start' in request.form:
815            try:
816                b_start=int(request.form['b_start'])
817            except ValueError:
818                b_start=0
819        else:
820            b_start=0 
821        b= Batch(results, 30, b_start, orphan=1)   
822        return b 
823
824    def batchBaseUrl(self):
825        """ Keep all other parametres as they are, but add or change 'b_start' """
826        form=self.REQUEST.form
827        if 'b_start' in form:
828            del form['b_start']
829        last_url='?'.join((self.REQUEST.ACTUAL_URL, urlencode(form)))
830        new= last_url+'&b_start=%s'
831        return new
832       
833
834#
835#    def getTagCloud(self, search_results, index_type):
836#        """ Build a cloud based on how many occurences of this item are in results """
837#        if not search_results:
838#            return []
839#        lemill_tool = getToolByName(self, 'lemill_tool')
840#        pc = getToolByName(self, 'portal_catalog')
841#        from math import log
842#        maxcount=0
843#
844#        hits={}
845#        hits=pc.fastCount(search_results, index_type)
846#        resultlist=zip(hits.values(),hits.keys())
847#        if not resultlist:
848#            return []
849#        resultlist.sort()
850#        resultlist.reverse()
851#        maxcount=resultlist[0][0] # first!
852#        # if the first cut score for tag is x, we want to cut off all of the tags with score x.
853#        if len(resultlist)>100:
854#            #cutpoint = [x[0] for x in resultlist].index(resultlist[100]) can't figure this now, fix later
855#            cutpoint = 100
856#            resultlist = resultlist[:cutpoint]
857#        mincount=resultlist[-1][0]
858#        resultlist=[(x[1], x[0], '',x[1],x[1]) for x in resultlist]
859#
860#        # adjust to 1-8. We don't have to worry about score 0, they're already removed.
861#        if maxcount>1:
862#            resultlist=map(adjust, resultlist)
863#        # prettify language names
864#        if index_type=='Language' or index_type=='getLanguage_skills':
865#            resultlist=[(x[0],x[1],x[2],x[3],lemill_tool.getPrettyLanguage(x[4])) for x in resultlist]           
866#        if index_type=='getTarget_group':
867#            def compfunc(t2,t1):
868#                if t2[0] in TARGET_GROUP and t1[0] in TARGET_GROUP:
869#                    return  TARGET_GROUP.index(t2[0]) - TARGET_GROUP.index(t1[0])
870#                else:
871#                    return -1
872#            resultlist.sort(cmp=compfunc)
873#        else:   
874#            resultlist.sort()
875#        return resultlist
876#       
877#
878#    def getTitleCloud(self, search_results, browse_type):
879#        """ Build a cloud based on popularity score for that resource """
880#        pc=getToolByName(self,'portal_catalog')
881#        # uniquetuplelist contains result metadata reordered: (sort_title, count, url, indexvalue, title)
882#        if not search_results:
883#            return []
884#
885#        def isDefaultTitle(x):
886#            """ some heuristic to recognize default titles """
887#            return re.match(r'.*\.(...)$', x) or re.match(r'.*\.(....)$', x)
888#                   
889#        popularity = pc.fastMetadata(search_results, ('getScore','rid','getNicename','sortable_title'))
890#        popularity.sort(reverse=True)
891#        popularity=popularity[:100]
892#        titlecloud=[(sortable_title, getScore, self.REQUEST.physicalPathToURL(pc.getpath(rid)), sortable_title, getNicename or sortable_title) for (getScore, rid, getNicename, sortable_title) in popularity if sortable_title]
893#       
894#        if not titlecloud:
895#            return []
896#        titlecloud.sort()
897#        maxscore=max([x[1] for x in titlecloud])
898#        if maxscore>1:
899#            titlecloud=map(adjust, titlecloud)
900#        return titlecloud
901
902
903
904
905
906CatalogTool.__doc__ = PloneCatalogTool.__doc__
907
908InitializeClass(CatalogTool)
Note: See TracBrowser for help on using the repository browser.