source: trunk/LeMillCatalogTool.py @ 3067

Revision 3067, 42.0 KB checked in by jukka, 9 years ago (diff)

Added Brazilian Portuguese to available languages.

Line 
1#
2# Plone CatalogTool simplified -- does not use ExtensibleIndexableObjectWrapper, workflows or permissions. Basically just ZCatalog posing as Plone Tool, allowing but ignoring plonish commands.
3#
4
5from Products.CMFPlone import ToolNames
6from AccessControl import ClassSecurityInfo
7from Globals import InitializeClass
8from Globals import DTMLFile
9
10from Products.ZCatalog.ZCatalog import ZCatalog
11from Products.CMFPlone.CatalogTool import CatalogTool as PloneCatalogTool
12from Products.CMFPlone.PloneBatch import Batch
13from Products.PythonScripts.standard import urlencode
14
15from zope.interface import implements
16
17from Products.CMFCore.utils import SimpleRecord, getToolByName
18from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
19from Products.ZCTextIndex.Lexicon import CaseNormalizer
20from Products.ZCTextIndex.Lexicon import Splitter
21from Products.ZCTextIndex.Lexicon import StopWordRemover
22
23from Products.CMFPlone.UnicodeSplitter import Splitter as UnicodeSplitter
24from Products.CMFPlone.UnicodeSplitter import CaseNormalizer as UnicodeCaseNormalizer
25from Products.ZCTextIndex.ZCTextIndex import PLexicon
26from Products.ZCatalog.Lazy import Lazy
27from random import randint
28from config import CONTENT_TYPES, MATERIAL_TYPES, ACTIVITY_TYPES, TOOLS_TYPES, LANGUAGES, LANGUAGES_DICT, TYPE_NAMES, SUBJECT_AREAS_DICT, TARGET_GROUPS_DICT, SUBJECT_AREAS_INVERSE_DICT, TARGET_GROUPS_INVERSE_DICT, COMMUNITY_TYPES, SEARCHABLE_TYPES, DEFAULT_ICONS, TARGET_GROUP
29import time
30from math import log
31from messagefactory_ import i18nme as _
32
33class CatalogTool(PloneCatalogTool):
34
35    meta_type = 'LeMill Catalog Tool'
36    security = ClassSecurityInfo()
37    toolicon = 'skins/lemill/tool.gif'
38
39    __implements__ = PloneCatalogTool.__implements__
40
41    # Originally CatalogTool had some simplified some expensive stuff from Plone's Catalog tool and added some
42    # methods allowing us more control over indexing / unindexing
43   
44    # For LeMill 3.0,
45
46    security.declarePublic( 'enumerateIndexes' ) # Subclass can call
47    def enumerateIndexes( self ):
48        #   Return a list of ( index_name, type, extra ) tuples for the initial
49        #   index set.
50        #   Creator is deprecated and may go away, use listCreators!
51        #   meta_type is deprecated and may go away, use portal_type!
52        plaintext_extra = SimpleRecord( lexicon_id='plaintext_lexicon'
53                                      , index_type='Okapi BM25 Rank'
54                                      )
55        htmltext_extra = SimpleRecord( lexicon_id='htmltext_lexicon'
56                                     , index_type='Okapi BM25 Rank'
57                                     )
58        # Adding plone_lexicon as it should be, no nee dto change the name for our own >>> safer that way
59        plone_extra = SimpleRecord( lexicon_id='plone_lexicon'
60                                  , index_type='Okapi BM25 Rank'
61                                  )
62
63        return ( ('Title', 'ZCTextIndex', plone_extra)
64               , ('Subject', 'KeywordIndex', None)
65               , ('Description', 'ZCTextIndex', plone_extra)
66               , ('Creator', 'FieldIndex', None)
67               , ('listCreators', 'KeywordIndex', None)
68               , ('SearchableText', 'ZCTextIndex', plone_extra)
69               , ('Date', 'DateIndex', None)
70               , ('Type', 'FieldIndex', None)
71               , ('created', 'DateIndex', None)
72               , ('effective', 'DateIndex', None)
73               , ('expires', 'DateIndex', None)
74               , ('modified', 'DateIndex', None)
75               , ('allowedRolesAndUsers', 'KeywordIndex', None)
76               , ('review_state', 'FieldIndex', None)
77               , ('in_reply_to', 'FieldIndex', None)
78               , ('meta_type', 'FieldIndex', None)
79               , ('getId', 'FieldIndex', None)
80               , ('path', 'PathIndex', None)
81               , ('portal_type', 'FieldIndex', None)
82               )
83
84    security.declarePublic('enumerateLexicons')
85    def enumerateLexicons(self):
86        # Creating our own lexicon in a different way, as the automatic creation seems to be needing something to act as a StopWordRemover and fails miserably without it.
87        lexicon = PLexicon('plone_lexicon', '', UnicodeSplitter(), UnicodeCaseNormalizer())
88        self._setObject('plone_lexicon', lexicon)
89        return (
90                 ( 'plaintext_lexicon'
91                 , Splitter()
92                 , CaseNormalizer()
93                 , StopWordRemover()
94                 )
95               , ( 'htmltext_lexicon'
96                 , HTMLWordSplitter()
97                 , CaseNormalizer()
98                 , StopWordRemover()
99                 )
100               )
101    #XXX END
102
103    def catalog_object(self, object, uid, idxs=[],
104                       update_metadata=1, pghandler=None):
105        if object and hasattr(object, 'portal_type') and object.portal_type!='CollectionsFolder':
106            ZCatalog.catalog_object(self, object, uid, idxs, update_metadata, pghandler=pghandler)
107
108    def searchResults(self, REQUEST=None, **kw):
109        """Calls ZCatalog.searchResults """
110        return ZCatalog.searchResults(self, REQUEST, **kw)
111
112    __call__ = searchResults
113
114
115    def titleSearch(self, title='', sort_limit=0):
116        """ search titles containing given string """
117        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
118        if sort_limit:
119            query['sort_limit']=sort_limit
120        query['Title']='%s*' % title
121        results=self.searchResults(query)
122        return results
123
124    def fulltextSearch(self, SearchableText='', sort_limit=0):
125        """ search fulltext for a string """
126        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
127        if sort_limit:
128            query['sort_limit']=sort_limit
129        query['SearchableText']='%s*' % SearchableText
130        results=self.searchResults(query)
131        return results
132
133    ###### 'Adapters' start here
134    # these are methods that are very specifically used by page templates to efficiently return just the results needed there 
135
136    def buildResultsFromBatch(self, batch):
137        """ This method takes a Batch of results (usually 30 or less, LazyMap) and returns a list of
138            *minimal metadata* about them. This metadata is in form of dictionary.
139           
140            The idea is that instantiating real metadata objects from batch results is still a costly process
141            and with this we can show search/browse results without instantiating metadata.
142           
143            This method will also do some preprocessing for metadata, f.ex finds proper names for authors and builds links to them.
144           
145            This method is crafted for resource_list_macros.pt and if you aren't going to display results as they are displayed there, you probably
146            shouldn't use this.
147            """
148        d_list=[]
149        lutool=getToolByName(self, 'lemill_usertool')
150        ltool=getToolByName(self, 'lemill_tool')
151        url_base=getToolByName(self, 'portal_url')()
152        created=False
153        edited=False
154        if hasattr(self, 'REQUEST') and hasattr(self.REQUEST, 'form'):
155            form=self.REQUEST.form
156            if form:
157                created= 'created' in form
158                edited='edited' in form
159        if hasattr(batch._sequence, '_seq'):
160            print 'using fast metadata to build results'
161            seq=list(batch._sequence._seq[batch.start-1:batch.end])
162            keys=['rid','getHasCoverImage','getNicename','portal_type','getLocation_country','getTags', 'Language','listCreators']
163            if created:
164                keys.append('created')
165            elif edited:
166                keys.append('getLatestEdit')
167            md=self.fastMetadata(seq, keys)
168            for item in md:
169                if item[3] not in TYPE_NAMES:
170                    continue
171                d={'url':self.getpath(item[0]),
172                    'coverimage_url':item[1],
173                    'title':item[2],
174                    'readable_type':item[3],
175                    'country':item[4],
176                    'tags':item[5],
177                    'language':item[6],
178                    'authors':item[7]}
179                if created:
180                    d['created']=item[8] # These are in weird DateIndex format, difficult to map to actual dates
181                elif edited:
182                    d['edited']=item[8]
183                d_list.append(d)
184            if created or edited:
185                now=time.gmtime() # mirrors DateIndex's conversion script to provide a compatible 'now'
186                now= ( ( ( ( now[0] * 12 + now[1] ) * 31 + now[2] ) * 24 + now[3] ) * 60 + now[4] )
187                useDateIndex=True                         
188        else: # batch has already been converted to catalog Brains object
189            print 'using Brains objects to build results'
190            for item in batch:               
191                d={'url':item.getURL(),
192                    'coverimage_url':item.getHasCoverImage,
193                    'title':item.getNicename,
194                    'readable_type':item.portal_type,
195                    'country':item.getLocation_country,
196                    'tags':item.getTags,
197                    'language':item.Language,
198                    'authors':item.listCreators}                 
199                if created:
200                    d['created']=item.created
201                elif edited:
202                    d['edited']=item.getLatestEdit
203                d_list.append(d)
204            now=time.time()
205            useDateIndex=False
206        tag_base='/'.join((url_base,'search?index_type=tags&q='))
207        lang_base='/'.join((url_base,'search?language='))
208        country_base='/'.join((url_base,'community/browse?country='))
209       
210        # Manipulate raw index data to something more useful
211        for item in d_list:
212            if callable(item['title']):
213                item['title']=item['title']()
214            url=item['url']
215            if item['coverimage_url']:
216                item['coverimage_url']='/'.join((url,'coverImage'))
217            else:
218                item['coverimage_url']='/'.join((url_base, DEFAULT_ICONS[item['readable_type']]))
219            ll=item['language']
220            if ll:
221                item['language']=LANGUAGES_DICT[ll]
222                item['language_link']=''.join((lang_base,ll))
223            item['tags']=[(tag, ''.join((tag_base, tag))) for tag in item['tags']]
224            item['authors']=[self.fastLink(author) for author in item['authors']]
225            if item['country']:         
226                item['country_link']=''.join((country_base,item['country']))
227            item['readable_type']=TYPE_NAMES.get(item['readable_type'], None)[0]
228            if 'created' in item:
229                item['timedif']=ltool.getTimeDifference(item['created'], now=now, useDateIndex=useDateIndex)
230            elif 'edited' in item:
231                item['timedif']=ltool.getTimeDifference(item['edited'], now=now, useDateIndex=useDateIndex)
232        return d_list
233
234    def createBrowsingOptions(self, REQUEST, results=None, **kw):
235        """ This is used by browse_macros.pt and other places that need to display filters
236            based on the search results available and the section where the results are displayed.
237       
238            This method analyzes both request and results to decide what filter fields it should display and if it should
239            preselect something or count occurences for each value. The logic for this may look hairy,
240             but it can be understood by approaching this case-by-case:
241            'If we are browsing in Content section, and there are >1000 results, what filters we should show'           
242        """
243        src=dict(REQUEST.form.items())
244        # section determines what kinds of options there are:
245        path=REQUEST['ACTUAL_URL'].split('/')
246        force_language=True
247        if 'portfolio' in kw:
248            do_languages=False
249            do_subject_areas=True
250            do_target_groups=False
251            do_types=True
252            type_selection=SEARCHABLE_TYPES
253            do_edited=True
254            do_tags=False
255            count_indexes=('portal_type','getSubject_area')
256        elif 'content' in path:
257            do_languages=True
258            do_subject_areas=True
259            do_target_groups=True
260            do_types=True
261            type_selection=CONTENT_TYPES
262            do_edited=True
263            do_tags=True
264            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
265        elif 'methods' in path or 'tools' in path:
266            do_languages=True
267            do_subject_areas=False
268            do_target_groups=False
269            do_types=False
270            do_edited=True
271            do_tags=True
272            count_indexes=('Language',)
273        elif 'community' in path:
274            do_languages=True
275            force_language=False
276            do_subject_areas=True
277            do_target_groups=False
278            do_types=True
279            type_selection=COMMUNITY_TYPES
280            do_edited=True
281            do_tags=True
282            count_indexes=('Language','portal_type','getSubject_area')
283        else:
284            do_languages=True
285            force_language=False
286            do_subject_areas=True
287            do_target_groups=True
288            do_types=True
289            type_selection=SEARCHABLE_TYPES
290            do_edited=True
291            do_tags=False
292            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
293       
294        d={'do_languages':do_languages, 'do_subject_areas':do_subject_areas,'do_target_groups':do_target_groups,'do_types':do_types,'do_edited':do_edited, 'do_tags':do_tags, 'q':'', 'state':'', 'author':'', 'created':''}
295
296
297        # now see if the results should be counted and provide a dictionary of counted values if necessary
298        if results and len(results)<1000:
299            count_results= dict(zip(count_indexes, self.fastCount(results, count_indexes)))
300            count=len(results)
301        else:
302            count=0
303
304        # Languages: two lists, one for probable options and other for improbable
305        if do_languages:
306            all_languages=list(LANGUAGES)[1:] # Remove 'language neutral' from options
307            all_languages[0]=('','any language')
308            all_language_codes=[l[0] for l in all_languages]
309            lang_dict = getToolByName(self, 'lemill_tool').language_dict
310            common_language_codes=getToolByName(self,'lemill_usertool').getLanguages()
311            selected_language= src.get('language','')
312            was_empty=not selected_language
313            if was_empty:
314                if force_language:
315                    selected_language=common_language_codes[0]
316                else:
317                    selected_language='all'
318            common_languages=[]
319            rare_languages=[('all','All',int(selected_language=='all'),0)]
320            if count and was_empty and False:
321                langs=count_results['Language']
322                for langcode in common_language_codes:
323                    if langcode and langcode in langs:
324                        common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
325                        all_language_codes.remove(langcode)           
326                for langcode in all_language_codes:
327                    if langcode and langcode in langs:
328                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
329            else:
330                for langcode in common_language_codes:
331                    common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
332                    all_language_codes.remove(langcode)           
333                for langcode in all_language_codes:
334                    if langcode:
335                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
336            d['common_languages']=common_languages
337            d['rare_languages']=rare_languages
338            d['lang_disabled']=False # portal_type==Piece can set this to true
339            if selected_language!='all':
340                d['lang_filter']='language=%s&' % selected_language
341            else:
342                d['lang_filter']=''
343        if do_subject_areas:
344            selected=src.get('subject_area','')
345            if selected:
346                all_count='?'
347            else:
348                all_count=count
349            subject_areas=[('','All',int(not selected), all_count)]
350            if count:
351                counts=count_results['getSubject_area']
352                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
353                    if sa_full in counts:
354                        subject_areas.append((sa_key, sa_full, int(selected==sa_key), counts[sa_full]))
355            else:
356                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
357                    subject_areas.append((sa_key, sa_full, int(selected==sa_key), 0))
358
359            d['subject_area']=subject_areas
360            d['subjs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
361        if do_target_groups:
362            selected=src.get('target_group','')
363            if selected:
364                all_count='?'
365            else:
366                all_count=count
367            target_groups=[('','All',int(not selected),all_count)]
368            if count:
369                counts=count_results['getTarget_group']
370                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
371                    if tg_full in counts:
372                        target_groups.append((tg_key, tg_full, int(selected==tg_key), counts[tg_full]))
373            else:
374                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
375                    target_groups.append((tg_key, tg_full, int(selected==tg_key), 0))
376            d['target_group']=target_groups
377            d['targs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
378        if do_types:
379            selected=src.get('type','')
380            if selected:
381                all_count='?'
382            else:
383                all_count=count
384            types=[('','All',int(not selected),all_count)]
385            if count:
386                counts=count_results['portal_type']
387                for type_key in type_selection:
388                    if type_key in counts:
389                        types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), counts[type_key]))
390            else:
391                for type_key in type_selection:
392                    types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), 0))
393            d['type']=types
394            if selected:
395                if selected in ['Piece','Activity','Tool']:
396                    d['targs_disabled']=True
397                    d['subjs_disabled']=True
398                if selected=='Piece':
399                    d['lang_disabled']=True
400
401        if do_edited:
402            try:
403                selected=int(src.get('edited',0))
404            except ValueError:
405                selected=0
406            d['edited']=[('',_('Any time'),int(not selected),0), (365,_('Last year'),int(selected==365),0), (30,_('Last month'),int(selected==30),0), (7,_('Last week'),int(selected==7),0), (1,_('Yesterday'),int(selected==1),0)]
407        # Previous search term
408        if 'q' in src:
409            d['q']=src['q']
410        if 'state' in src:
411            d['state']=src['state']
412        if 'author' in src:
413            d['author']=src['author']           
414        return d
415
416
417    def decideBrowsingSubType(self):
418        """ Browse page heading needs to know what kind of browsing is going on """
419        form=self.REQUEST.form       
420        if 'state' in form:
421            if form['state']=='public': return 'published'
422            if form['state']=='draft': return 'drafts'
423        path=self.REQUEST['ACTUAL_URL'].split('/')
424        for key in ['content','methods','tools','community']:
425            if key in path: return key
426        return ''           
427
428    def decideSearchSubType(self):
429        """ Search page heading needs to know what kind of search is going on """
430        form=self.REQUEST.form       
431        if 'created' in form:
432            if 'type' in form and form['type']=='MemberFolder':
433                return 'new_members'
434            else:
435                return 'new_resources'
436        if 'edited' in form and 'q' not in form:
437            return 'recent_edits'
438        return 'search'
439
440
441    def browsingSearch(self, REQUEST=None, **kw):       
442        """ This is a general purpose catalog search that  """
443        print 'browsing search called'
444        t=time.time()
445        src=REQUEST.form
446        if '-C' in src:
447            del src['-C']
448        if not (src or kw):
449            path=REQUEST['ACTUAL_URL'].split('/')
450            if 'browse' in path or 'search' in path or 'cloud' in path:
451                print 'empty search, return []'
452                return []
453        keywords={'sort_on':'getScore','sort_order':'reverse','getState':('draft', 'public')}
454        keywords.update(src)
455        keywords.update(kw)
456        if 'language' in keywords:
457            if keywords['language']=='all':
458                keywords['Language']=''
459            else:           
460                keywords['Language']=keywords['language']
461            del keywords['language']
462        if 'subject_area' in keywords:
463            if keywords['subject_area'] in SUBJECT_AREAS_DICT:
464                keywords['getSubject_area']=SUBJECT_AREAS_DICT[keywords['subject_area']]
465            del keywords['subject_area']
466        if 'target_group' in keywords:
467            if keywords['target_group'] in TARGET_GROUPS_DICT:
468                keywords['getTarget_group']=TARGET_GROUPS_DICT[keywords['target_group']]
469            del keywords['target_group']
470        if 'tags' in keywords:
471            keywords['getTags']=keywords['tags']
472            del keywords['tags']
473        if 'author' in keywords:
474            keywords['listCreators']=keywords['author']
475            del keywords['author']
476        if 'state' in keywords:
477            keywords['getState']=keywords['state']
478            del keywords['state']
479        if 'country' in keywords:
480            keywords['getLocation_country']=keywords['country']
481        if 'type' in keywords:
482            if keywords['type']=='lr':
483                keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool']
484            else:
485                keywords['portal_type']=keywords['type']
486            del keywords['type']
487        else: # portal_type is determined by location
488            path=REQUEST['ACTUAL_URL'].split('/')
489            if 'portfolio' in kw:
490                keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool']
491                del keywords['portfolio']
492            elif 'content' in path:
493                keywords['portal_type']=list(CONTENT_TYPES)
494            elif 'methods' in path:
495                keywords['portal_type']='Activity'
496            elif 'tools' in path:
497                keywords['portal_type']='Tool'
498            elif 'community' in path:
499                keywords['portal_type']=list(COMMUNITY_TYPES)
500            else:
501                keywords['portal_type']=list(SEARCHABLE_TYPES)
502        if 'q' in keywords:
503            keywords['SearchableText']=keywords['q']
504            del keywords['q']
505        if 'created' in keywords:
506            keywords['created']={'query': self.ZopeTime()-int(keywords['created']), 'range':'min'}
507            keywords['sort_on']='created'
508        if 'edited' in keywords:
509            keywords['getLatestEdit']={'query': self.ZopeTime()-int(keywords['edited']), 'range':'min'}
510            keywords['sort_on']='getLatestEdit'
511            del keywords['edited']
512
513        if not 'Language' in keywords:
514            found=False
515            if 'base' in keywords and keywords['base']=='language':
516                found=True               
517            elif 'portal_type' in keywords and ('MemberFolder' in keywords['portal_type'] or 'GroupBlog' in keywords['portal_type']):
518                found=True
519            else:
520                for compensating in ['SearchableText','listCreators','getTarget_group','getTags','getSubject_area','created','getLatestEdit', 'getLocation_country']:
521                    if compensating in keywords:
522                        found=True
523                        break
524            if not found:
525                keywords['Language']=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
526        if 'Language' in keywords:
527            if not keywords['Language']: # Delete empty 'Language'
528                del keywords['Language']
529            elif 'portal_type' in keywords: # If browsing for MemberFolders, ignore Language, use getLanguage_skills instead
530                if 'MemberFolder' in keywords['portal_type'] or 'GroupBlog' in keywords['portal_type']:
531                    keywords['getLanguage_skills']=keywords['Language']
532                    del keywords['Language']
533
534        if 'base' in keywords:
535            del keywords['base']
536        print 'keywords:', keywords
537        try:
538            results = self.searchResults(keywords)               
539        except ParseError:
540            results = []
541        print 'found ', len(results), ' items.'
542        print 'browsing search:', time.time()-t
543        return results
544
545    #### Fast catalog handling ###########################################
546
547    def wakeLazy(self, lazy):
548        new=[]
549        for l in lazy._seq:
550            if isinstance(l, Lazy):
551                new.extend(self.wakeLazy(l))
552            else:
553                new.append(l)
554        return new
555
556    def fastPick(self, lazy_results, top):
557        """ Takes Lazy results and picks one random metadata obj from top """
558        tries=4
559        choice=randint(0,top)
560        while tries: # since we don't know how long the list is we need to use trial and error
561            # to find if the random index has corresponding item
562            i=0
563            for item in lazy_results._seq:
564                if choice==i:
565                    return lazy_results._func(item)
566                i+=1
567            choice/=2
568            tries-=1
569        return lazy_results._func(item)
570
571    def fastLink(self, user):
572        """ Finds an user from catalog and returns a tuple containing nicename and link
573        because this gets asked so often and it is relatively expensive to build, we cache them in non-permanent dict here in catalog tool
574        """
575        if not hasattr(self, 'author_cache'):
576            self.author_cache={}
577        if not user in self.author_cache:
578            res=self.searchResults(Creator=user, portal_type='MemberFolder')
579            for l in res._seq:
580                authortuple= (self.getEntry('getNicename', l), self.getpath(l))
581                self.author_cache[user]=authortuple
582                return authortuple
583        else:
584            return self.author_cache[user]
585        return None       
586
587    def getEntry(self, index, key, empty=[]):
588        val=self._catalog.getIndex(index).getEntryForObject(key, empty)
589        if callable(val):
590            val=val()
591        return val       
592           
593    def fastLinks(self, results, limit):
594        """ Takes Lazy results and returns a list of tuples (title, url, portal_type) """
595        def safeData(index, key):
596            v=index.getEntryForObject(key, [])
597            if callable(v):
598                return v()
599            return v
600
601        def wakeUp(lazy, c):
602            new=[]
603            for l in lazy._seq:
604                if isinstance(l, Lazy):
605                    c,newer=wakeUp(l,c)
606                    new.extend(newer)
607                else:
608                    new.append((safeData(title_index, l), self.getpath(l), safeData(type_index, l)))
609                    c+=1
610                if c==limit:
611                    break
612            return c,new
613        title_index=self._catalog.getIndex('getNicename')
614        type_index=self._catalog.getIndex('portal_type')
615        c,new=wakeUp(results,0)       
616        return new
617
618    def fastMetadata(self, results, indexes, cut=0):
619        """ Takes Lazy results and returns a list of tuple for values from indexes.
620        Indexes is a string or a tuple of strings.
621        This is an order of magnitude faster than getting CatalogBrains for each object"""
622        c=0
623        def safeData(index, key):
624            v=index.getEntryForObject(key, [])
625            if callable(v):
626                return v()
627            return v
628
629        def wakeUp(lazy,c):
630            new=[]
631            if isinstance(lazy, list):
632                seq=lazy
633            else:
634                seq=lazy._seq
635            for l in seq:
636                if isinstance(l, Lazy):
637                    new.extend(wakeUp(l,c))
638                else:
639                    values=[]   
640                    for index in index_sources:
641                        if index:
642                            values.append(safeData(index, l))
643                        else:
644                            values.append(l)
645                    new.append(tuple(values))
646                    c+=1
647                    if c==cut:
648                        break
649            return new
650        def wakeUpFaster(lazy, c):
651            new=[]
652            if isinstance(lazy, list):
653                seq=lazy
654            else:
655                seq=lazy._seq
656            for l in seq:
657                if isinstance(l, Lazy):
658                    new.extend(wakeUpFaster(l,c))
659                else:
660                    new.append(index.getEntryForObject(l, []))
661                c+=1
662                if c==cut:
663                    break
664            return new
665        if isinstance(indexes, tuple) or isinstance(indexes, list):
666            index_sources=[ind!='rid' and self._catalog.getIndex(ind) for ind in indexes]
667            new=wakeUp(results,c)
668        else:
669            index=self._catalog.getIndex(indexes)
670            new=wakeUpFaster(results,c)
671        return new
672       
673    def fastCount(self, results, indexes):
674        """ Takes Lazy results and returns a dictionary or tuple of dictionaries for values and their counts from indexes.
675        Index is a string or tuple of strings.
676        This is an order of magnitude faster than getting CatalogBrains for each object"""
677
678        def safeData(index, key):
679            v=index.getEntryForObject(key, [])
680            if callable(v):
681                return v()
682            return v
683       
684        def wakeUpAndCount(lazy):
685            for l in lazy._seq:
686                if isinstance(l, Lazy):
687                    wakeUpAndCount(l)
688                else:
689                    values=[]   
690                    for dic, index in indexes:
691                        val=safeData(index, l)
692                        if val:
693                            if isinstance(val, list):
694                                for v in val:                       
695                                    dic[v[:50]]=dic.get(v[:50],0)+1
696                            elif val:
697                                val=str(val)[:50]
698                                dic[val]=dic.get(val,0)+1
699        def wakeUpFasterAndCount(lazy):
700            for l in lazy._seq:
701                if isinstance(l, Lazy):
702                    wakeUpFasterAndCount(l)
703                else:
704                    val=safeData(index, l)
705                    if isinstance(val, list):
706                        for v in val:                       
707                            dic[v[:50]]=dic.get(v[:50],0)+1
708                    elif val:
709                        val=str(val)[:50]
710                        dic[val]=dic.get(val,0)+1
711        index=None
712        if isinstance(indexes, tuple):
713            if not results:
714                return [{} for ind in indexes]
715            indexes=[({}, self._catalog.getIndex(ind)) for ind in indexes]
716            new=wakeUpAndCount(results)
717            return tuple([dic for dic,index in indexes])
718        else:
719            if not results:
720                return {}
721            index=self._catalog.getIndex(indexes)
722            dic={}
723            new=wakeUpFasterAndCount(results)
724            return dic
725
726    def getSomeMetadataForRID(self, rid, md_fields):
727        record = self._catalog.data[rid]
728        schema = self._catalog.schema
729        if isinstance(md_fields, (tuple, list)):
730            result = {}
731            for md in md_fields:
732                result[md]= record[schema[md]]
733            return result
734        else:
735            return record[schema[md_fields]]
736
737
738    ##############################      Clouds      ######################## 
739
740    def buildCloudData(self, results, request=None, size=8, portfolio=False):
741        """ Build tag cloud result tuples (name, tagsize, obj_url, tag_value, nicename) for given form from result set """
742        def adjustTag(val, steps=8):
743            # helper method to adjust hit count of this tag to relative size (1,...,8)
744            try:
745                val=int((steps*log(val-mincount,2))/log(maxcount-mincount,2))
746            except (OverflowError, ZeroDivisionError):
747                val=0
748            if not val:
749                val=1
750            return val
751        lemill_tool = getToolByName(self, 'lemill_tool')
752        if not results:
753            return []
754        if portfolio:
755            cloud_index='getTags'
756            mf=self.getMemberFolder()           
757            link_root='%s/portfolio?tags=' % mf.absolute_url()
758        else:       
759            src=dict(request.form.items())
760            if 'base' not in src:
761                return []
762            portal_url=getToolByName(self, 'portal_url')()
763            path=request['ACTUAL_URL'].split('/')
764            if 'content' in path:
765                link_base='/'.join((portal_url,'content','browse'))
766            elif 'methods' in path:
767                link_base='/'.join((portal_url,'methods','browse'))
768            elif 'tools' in path:
769                link_base='/'.join((portal_url,'tools','browse'))
770            elif 'community' in path:
771                link_base='/'.join((portal_url,'community','browse'))
772            else:
773                link_base='/'.join((portal_url,'browse'))       
774            lang_part=''
775            base=src['base']
776            language=src.get('language','')
777            type_restriction=src.get('type','')
778            if language:
779                lang_part='language=%s&' % language
780            if type_restriction:
781                lang_part+='type=%s&' % type_restriction
782            title_cloud=False
783            if base=='language':
784                link_root='%s?language=' % link_base
785                cloud_index='Language'
786                if 'type' in src:
787                    if 'GroupBlog' in src['type'] or 'MemberFolder' in src['type']:
788                        cloud_index='getLanguage_skills'     
789            elif base=='tags':
790                link_root='%s?%stags=' % (link_base, lang_part)       
791                cloud_index='getTags'
792            elif base=='subject_area':
793                link_root='%s?%ssubject_area=' % (link_base, lang_part)       
794                cloud_index='getSubject_area'
795            elif base=='target_group':
796                link_root='%s?%starget_group=' % (link_base, lang_part)       
797                cloud_index='getTarget_group'
798            elif base=='country':
799                link_root='%s?%scountry=' % (link_base, lang_part)
800                cloud_index='getLocation_country'
801            elif base=='title':
802                resultlist=self.fastMetadata(results, ('sortable_title','getScore','getNicename', 'path'), cut=100)
803                if not resultlist:
804                    return []
805                maxcount=resultlist[0][1]
806                mincount=resultlist[-1][1]             
807                resultlist.sort()
808                resultlist=[(x[2], adjustTag(x[1], steps=6), x[3]) for x in resultlist if x[2]]
809                return resultlist
810        hits=self.fastCount(results, cloud_index)
811        if not hits:
812            return []
813        resultlist=zip(hits.values(),hits.keys())
814        resultlist.sort(reverse=True)
815        maxcount=resultlist[0][0] # first!
816        resultlist = resultlist[:100]
817        mincount=resultlist[-1][0] # last!
818        # At this point resultlist= [(number_of_hits, tag_text),...]
819        # It should end up as: [(displayed_text, number_of_hits, link_url)...]
820        if cloud_index=='Language' or cloud_index=='getLanguage_skills':
821            resultlist=[(lemill_tool.getPrettyLanguage(x[1]), adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
822        elif cloud_index=='getSubject_area':
823            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
824        elif cloud_index=='getTarget_group':
825            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
826        else:
827            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
828        resultlist.sort()
829        return resultlist
830
831    ########### Front page top lists ############
832   
833    def getTopFive(self, results=None, key_index=None, link_body='', clean=False, cache_key=''):
834        """ Returns top five results for key_index (getTags, getSubject_area...) for certain language """
835        if clean or not hasattr(self, 'top5results'):
836            self.top5results={'content':{}, 'methods':{}, 'tools':{}, 'community':{}}
837        data=self.top5results
838        cache_key=cache_key or key_index
839        path=self.REQUEST['ACTUAL_URL'].split('/')
840        for section in ['content','methods','tools','community',None]:
841            if section in path:
842                break
843        if not section:
844            return []
845        ilanguage=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
846        if cache_key in data[section]:
847            top5lists=data[section][cache_key]
848        else:
849            top5lists={}
850            data[section][cache_key]=top5lists
851        if ilanguage in top5lists:
852            return top5lists[ilanguage]
853        else:
854            tops=self.fastCount(results, key_index)
855            resultlist=zip(tops.values(),tops.keys())
856            resultlist.sort(reverse=True)
857            resultlist=resultlist[:5]
858            if key_index=='getSubject_area':
859                resultlist=[(x[1], ''.join((link_body, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
860            elif key_index=='getTarget_group':
861                resultlist=[(x[1], ''.join((link_body, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
862            else:
863                resultlist=[(x[1], ''.join((link_body, x[1]))) for x in resultlist]
864            top5lists[ilanguage]=resultlist
865            print top5lists
866            return resultlist
867
868    ############################# batch ##########################
869
870    def batch(self, results=None, request=None):
871        """ Use Plone's batch """
872        if request and 'b_start' in request.form:
873            try:
874                b_start=int(request.form['b_start'])
875            except ValueError:
876                b_start=0
877        else:
878            b_start=0 
879        b= Batch(results, 30, b_start, orphan=1)   
880        return b 
881
882    def batchBaseUrl(self):
883        """ Keep all other parametres as they are, but add or change 'b_start' """
884        form=self.REQUEST.form
885        if 'b_start' in form:
886            del form['b_start']
887        last_url='?'.join((self.REQUEST.ACTUAL_URL, urlencode(form)))
888        new= last_url+'&b_start=%s'
889        return new
890       
891
892#
893#    def getTagCloud(self, search_results, index_type):
894#        """ Build a cloud based on how many occurences of this item are in results """
895#        if not search_results:
896#            return []
897#        lemill_tool = getToolByName(self, 'lemill_tool')
898#        pc = getToolByName(self, 'portal_catalog')
899#        from math import log
900#        maxcount=0
901#
902#        hits={}
903#        hits=pc.fastCount(search_results, index_type)
904#        resultlist=zip(hits.values(),hits.keys())
905#        if not resultlist:
906#            return []
907#        resultlist.sort()
908#        resultlist.reverse()
909#        maxcount=resultlist[0][0] # first!
910#        # if the first cut score for tag is x, we want to cut off all of the tags with score x.
911#        if len(resultlist)>100:
912#            #cutpoint = [x[0] for x in resultlist].index(resultlist[100]) can't figure this now, fix later
913#            cutpoint = 100
914#            resultlist = resultlist[:cutpoint]
915#        mincount=resultlist[-1][0]
916#        resultlist=[(x[1], x[0], '',x[1],x[1]) for x in resultlist]
917#
918#        # adjust to 1-8. We don't have to worry about score 0, they're already removed.
919#        if maxcount>1:
920#            resultlist=map(adjust, resultlist)
921#        # prettify language names
922#        if index_type=='Language' or index_type=='getLanguage_skills':
923#            resultlist=[(x[0],x[1],x[2],x[3],lemill_tool.getPrettyLanguage(x[4])) for x in resultlist]           
924#        if index_type=='getTarget_group':
925#            def compfunc(t2,t1):
926#                if t2[0] in TARGET_GROUP and t1[0] in TARGET_GROUP:
927#                    return  TARGET_GROUP.index(t2[0]) - TARGET_GROUP.index(t1[0])
928#                else:
929#                    return -1
930#            resultlist.sort(cmp=compfunc)
931#        else:   
932#            resultlist.sort()
933#        return resultlist
934#       
935#
936#    def getTitleCloud(self, search_results, browse_type):
937#        """ Build a cloud based on popularity score for that resource """
938#        pc=getToolByName(self,'portal_catalog')
939#        # uniquetuplelist contains result metadata reordered: (sort_title, count, url, indexvalue, title)
940#        if not search_results:
941#            return []
942#
943#        def isDefaultTitle(x):
944#            """ some heuristic to recognize default titles """
945#            return re.match(r'.*\.(...)$', x) or re.match(r'.*\.(....)$', x)
946#                   
947#        popularity = pc.fastMetadata(search_results, ('getScore','rid','getNicename','sortable_title'))
948#        popularity.sort(reverse=True)
949#        popularity=popularity[:100]
950#        titlecloud=[(sortable_title, getScore, self.REQUEST.physicalPathToURL(pc.getpath(rid)), sortable_title, getNicename or sortable_title) for (getScore, rid, getNicename, sortable_title) in popularity if sortable_title]
951#       
952#        if not titlecloud:
953#            return []
954#        titlecloud.sort()
955#        maxscore=max([x[1] for x in titlecloud])
956#        if maxscore>1:
957#            titlecloud=map(adjust, titlecloud)
958#        return titlecloud
959
960
961
962
963
964CatalogTool.__doc__ = PloneCatalogTool.__doc__
965
966InitializeClass(CatalogTool)
Note: See TracBrowser for help on using the repository browser.