source: trunk/LeMillCatalogTool.py @ 3079

Revision 3079, 42.1 KB checked in by jukka, 9 years ago (diff)

Fixed css for collections.

Line 
1#
2# Plone CatalogTool simplified -- does not use ExtensibleIndexableObjectWrapper, workflows or permissions. Basically just ZCatalog posing as Plone Tool, allowing but ignoring plonish commands.
3#
4
5from Products.CMFPlone import ToolNames
6from AccessControl import ClassSecurityInfo
7from Globals import InitializeClass
8from Globals import DTMLFile
9
10from Products.ZCatalog.ZCatalog import ZCatalog
11from Products.CMFPlone.CatalogTool import CatalogTool as PloneCatalogTool
12from Products.CMFPlone.PloneBatch import Batch
13from Products.PythonScripts.standard import urlencode
14
15from zope.interface import implements
16
17from Products.CMFCore.utils import SimpleRecord, getToolByName
18from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
19from Products.ZCTextIndex.Lexicon import CaseNormalizer
20from Products.ZCTextIndex.Lexicon import Splitter
21from Products.ZCTextIndex.Lexicon import StopWordRemover
22
23from Products.CMFPlone.UnicodeSplitter import Splitter as UnicodeSplitter
24from Products.CMFPlone.UnicodeSplitter import CaseNormalizer as UnicodeCaseNormalizer
25from Products.ZCTextIndex.ZCTextIndex import PLexicon
26from Products.ZCatalog.Lazy import Lazy
27from random import randint
28from config import CONTENT_TYPES, MATERIAL_TYPES, ACTIVITY_TYPES, TOOLS_TYPES, LANGUAGES, LANGUAGES_DICT, TYPE_NAMES, SUBJECT_AREAS_DICT, TARGET_GROUPS_DICT, SUBJECT_AREAS_INVERSE_DICT, TARGET_GROUPS_INVERSE_DICT, COMMUNITY_TYPES, SEARCHABLE_TYPES, DEFAULT_ICONS, TARGET_GROUP
29import time
30from math import log
31from messagefactory_ import i18nme as _
32
33class CatalogTool(PloneCatalogTool):
34
35    meta_type = 'LeMill Catalog Tool'
36    security = ClassSecurityInfo()
37    toolicon = 'skins/lemill/tool.gif'
38
39    __implements__ = PloneCatalogTool.__implements__
40
41    # Originally CatalogTool had some simplified some expensive stuff from Plone's Catalog tool and added some
42    # methods allowing us more control over indexing / unindexing
43   
44    # For LeMill 3.0,
45
46    security.declarePublic( 'enumerateIndexes' ) # Subclass can call
47    def enumerateIndexes( self ):
48        #   Return a list of ( index_name, type, extra ) tuples for the initial
49        #   index set.
50        #   Creator is deprecated and may go away, use listCreators!
51        #   meta_type is deprecated and may go away, use portal_type!
52        plaintext_extra = SimpleRecord( lexicon_id='plaintext_lexicon'
53                                      , index_type='Okapi BM25 Rank'
54                                      )
55        htmltext_extra = SimpleRecord( lexicon_id='htmltext_lexicon'
56                                     , index_type='Okapi BM25 Rank'
57                                     )
58        # Adding plone_lexicon as it should be, no nee dto change the name for our own >>> safer that way
59        plone_extra = SimpleRecord( lexicon_id='plone_lexicon'
60                                  , index_type='Okapi BM25 Rank'
61                                  )
62
63        return ( ('Title', 'ZCTextIndex', plone_extra)
64               , ('Subject', 'KeywordIndex', None)
65               , ('Description', 'ZCTextIndex', plone_extra)
66               , ('Creator', 'FieldIndex', None)
67               , ('listCreators', 'KeywordIndex', None)
68               , ('SearchableText', 'ZCTextIndex', plone_extra)
69               , ('Date', 'DateIndex', None)
70               , ('Type', 'FieldIndex', None)
71               , ('created', 'DateIndex', None)
72               , ('effective', 'DateIndex', None)
73               , ('expires', 'DateIndex', None)
74               , ('modified', 'DateIndex', None)
75               , ('allowedRolesAndUsers', 'KeywordIndex', None)
76               , ('review_state', 'FieldIndex', None)
77               , ('in_reply_to', 'FieldIndex', None)
78               , ('meta_type', 'FieldIndex', None)
79               , ('getId', 'FieldIndex', None)
80               , ('path', 'PathIndex', None)
81               , ('portal_type', 'FieldIndex', None)
82               )
83
84    security.declarePublic('enumerateLexicons')
85    def enumerateLexicons(self):
86        # Creating our own lexicon in a different way, as the automatic creation seems to be needing something to act as a StopWordRemover and fails miserably without it.
87        lexicon = PLexicon('plone_lexicon', '', UnicodeSplitter(), UnicodeCaseNormalizer())
88        self._setObject('plone_lexicon', lexicon)
89        return (
90                 ( 'plaintext_lexicon'
91                 , Splitter()
92                 , CaseNormalizer()
93                 , StopWordRemover()
94                 )
95               , ( 'htmltext_lexicon'
96                 , HTMLWordSplitter()
97                 , CaseNormalizer()
98                 , StopWordRemover()
99                 )
100               )
101    #XXX END
102
103    def catalog_object(self, object, uid, idxs=[],
104                       update_metadata=1, pghandler=None):
105        if object and hasattr(object, 'portal_type') and object.portal_type!='CollectionsFolder':
106            ZCatalog.catalog_object(self, object, uid, idxs, update_metadata, pghandler=pghandler)
107
108    def searchResults(self, REQUEST=None, **kw):
109        """Calls ZCatalog.searchResults """
110        return ZCatalog.searchResults(self, REQUEST, **kw)
111
112    __call__ = searchResults
113
114
115    def titleSearch(self, title='', sort_limit=0):
116        """ search titles containing given string """
117        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
118        if sort_limit:
119            query['sort_limit']=sort_limit
120        query['Title']='%s*' % title
121        results=self.searchResults(query)
122        return results
123
124    def fulltextSearch(self, SearchableText='', sort_limit=0):
125        """ search fulltext for a string """
126        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
127        if sort_limit:
128            query['sort_limit']=sort_limit
129        query['SearchableText']='%s*' % SearchableText
130        results=self.searchResults(query)
131        return results
132
133    ###### 'Adapters' start here
134    # these are methods that are very specifically used by page templates to efficiently return just the results needed there 
135
136    def buildResultsFromBatch(self, batch):
137        """ This method takes a Batch of results (usually 30 or less, LazyMap) and returns a list of
138            *minimal metadata* about them. This metadata is in form of dictionary.
139           
140            The idea is that instantiating real metadata objects from batch results is still a costly process
141            and with this we can show search/browse results without instantiating metadata.
142           
143            This method will also do some preprocessing for metadata, f.ex finds proper names for authors and builds links to them.
144           
145            This method is crafted for resource_list_macros.pt and if you aren't going to display results as they are displayed there, you probably
146            shouldn't use this.
147            """
148        d_list=[]
149        lutool=getToolByName(self, 'lemill_usertool')
150        ltool=getToolByName(self, 'lemill_tool')
151        url_base=getToolByName(self, 'portal_url')()
152        created=False
153        edited=False
154        if hasattr(self, 'REQUEST') and hasattr(self.REQUEST, 'form'):
155            form=self.REQUEST.form
156            if form:
157                created= 'created' in form
158                edited='edited' in form
159        if hasattr(batch._sequence, '_seq'):
160            seq=list(batch._sequence._seq[batch.start-1:batch.end])
161            keys=['rid','getHasCoverImage','getNicename','portal_type','getLocation_country','getTags', 'Language','listCreators']
162            if created:
163                keys.append('created')
164            elif edited:
165                keys.append('getLatestEdit')
166            md=self.fastMetadata(seq, keys)
167            for item in md:
168                if item[3] not in TYPE_NAMES:
169                    continue
170                d={'url':self.getpath(item[0]),
171                    'coverimage_url':item[1],
172                    'title':item[2],
173                    'readable_type':item[3],
174                    'country':item[4],
175                    'tags':item[5],
176                    'language':item[6],
177                    'authors':item[7]}
178                if created:
179                    d['created']=item[8] # These are in weird DateIndex format, difficult to map to actual dates
180                elif edited:
181                    d['edited']=item[8]
182                d_list.append(d)
183            if created or edited:
184                now=time.gmtime() # mirrors DateIndex's conversion script to provide a compatible 'now'
185                now= ( ( ( ( now[0] * 12 + now[1] ) * 31 + now[2] ) * 24 + now[3] ) * 60 + now[4] )
186                useDateIndex=True                         
187        else: # batch has already been converted to catalog Brains object
188            for item in batch:               
189                d={'url':item.getURL(),
190                    'coverimage_url':item.getHasCoverImage,
191                    'title':item.getNicename,
192                    'readable_type':item.portal_type,
193                    'country':item.getLocation_country,
194                    'tags':item.getTags,
195                    'language':item.Language,
196                    'authors':item.listCreators}                 
197                if created:
198                    d['created']=item.created
199                elif edited:
200                    d['edited']=item.getLatestEdit
201                d_list.append(d)
202            now=time.time()
203            useDateIndex=False
204        tag_base='/'.join((url_base,'search?index_type=tags&q='))
205        lang_base='/'.join((url_base,'search?language='))
206        country_base='/'.join((url_base,'community/browse?country='))
207       
208        # Manipulate raw index data to something more useful
209        for item in d_list:
210            if callable(item['title']):
211                item['title']=item['title']()
212            url=item['url']
213            if item['coverimage_url']:
214                item['coverimage_url']='/'.join((url,'coverImage'))
215            else:
216                item['coverimage_url']='/'.join((url_base, DEFAULT_ICONS[item['readable_type']]))
217            ll=item['language']
218            if ll:
219                item['language']=LANGUAGES_DICT[ll]
220                item['language_link']=''.join((lang_base,ll))
221            item['tags']=[(tag, ''.join((tag_base, tag))) for tag in item['tags']]
222            item['authors']=[self.fastLink(author) for author in item['authors']]
223            if item['country']:         
224                item['country_link']=''.join((country_base,item['country']))
225            item['readable_type']=TYPE_NAMES.get(item['readable_type'], None)[0]
226            if 'created' in item:
227                item['timedif']=ltool.getTimeDifference(item['created'], now=now, useDateIndex=useDateIndex)
228            elif 'edited' in item:
229                item['timedif']=ltool.getTimeDifference(item['edited'], now=now, useDateIndex=useDateIndex)
230        return d_list
231
232    def createBrowsingOptions(self, REQUEST, results=None, **kw):
233        """ This is used by browse_macros.pt and other places that need to display filters
234            based on the search results available and the section where the results are displayed.
235       
236            This method analyzes both request and results to decide what filter fields it should display and if it should
237            preselect something or count occurences for each value. The logic for this may look hairy,
238             but it can be understood by approaching this case-by-case:
239            'If we are browsing in Content section, and there are >1000 results, what filters we should show'           
240        """
241        src=dict(REQUEST.form.items())
242        # section determines what kinds of options there are:
243        path=REQUEST['ACTUAL_URL'].split('/')
244        force_language=True
245        if 'portfolio' in kw:
246            do_languages=False
247            do_subject_areas=True
248            do_target_groups=False
249            do_types=True
250            type_selection=SEARCHABLE_TYPES
251            do_edited=True
252            do_tags=False
253            count_indexes=('portal_type','getSubject_area')
254        elif 'content' in path:
255            do_languages=True
256            do_subject_areas=True
257            do_target_groups=True
258            do_types=True
259            type_selection=CONTENT_TYPES
260            do_edited=True
261            do_tags=True
262            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
263        elif 'methods' in path or 'tools' in path:
264            do_languages=True
265            do_subject_areas=False
266            do_target_groups=False
267            do_types=False
268            do_edited=True
269            do_tags=True
270            count_indexes=('Language',)
271        elif 'community' in path:
272            do_languages=True
273            force_language=False
274            do_subject_areas=True
275            do_target_groups=False
276            do_types=True
277            type_selection=COMMUNITY_TYPES
278            do_edited=True
279            do_tags=True
280            count_indexes=('Language','portal_type','getSubject_area')
281        else:
282            do_languages=True
283            force_language=False
284            do_subject_areas=True
285            do_target_groups=True
286            do_types=True
287            type_selection=SEARCHABLE_TYPES
288            do_edited=True
289            do_tags=False
290            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
291       
292        d={'do_languages':do_languages, 'do_subject_areas':do_subject_areas,'do_target_groups':do_target_groups,'do_types':do_types,'do_edited':do_edited, 'do_tags':do_tags, 'q':'', 'state':'', 'author':'', 'created':''}
293
294
295        # now see if the results should be counted and provide a dictionary of counted values if necessary
296        if results and len(results)<1000:
297            count_results= dict(zip(count_indexes, self.fastCount(results, count_indexes)))
298            count=len(results)
299        else:
300            count=0
301
302        # Languages: two lists, one for probable options and other for improbable
303        if do_languages:
304            all_languages=list(LANGUAGES)[1:] # Remove 'language neutral' from options
305            all_languages[0]=('','any language')
306            all_language_codes=[l[0] for l in all_languages]
307            lang_dict = getToolByName(self, 'lemill_tool').language_dict
308            common_language_codes=getToolByName(self,'lemill_usertool').getLanguages()
309            selected_language= src.get('language','')
310            was_empty=not selected_language
311            if was_empty:
312                if force_language:
313                    selected_language=common_language_codes[0]
314                else:
315                    selected_language='all'
316            common_languages=[]
317            rare_languages=[('all','All',int(selected_language=='all'),0)]
318            if count and was_empty and False:
319                langs=count_results['Language']
320                for langcode in common_language_codes:
321                    if langcode and langcode in langs:
322                        common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
323                        all_language_codes.remove(langcode)           
324                for langcode in all_language_codes:
325                    if langcode and langcode in langs:
326                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
327            else:
328                for langcode in common_language_codes:
329                    common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
330                    all_language_codes.remove(langcode)           
331                for langcode in all_language_codes:
332                    if langcode:
333                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
334            d['common_languages']=common_languages
335            d['rare_languages']=rare_languages
336            d['lang_disabled']=False # portal_type==Piece can set this to true
337            if selected_language!='all':
338                d['lang_filter']='language=%s&' % selected_language
339            else:
340                d['lang_filter']=''
341        if do_subject_areas:
342            selected=src.get('subject_area','')
343            if selected:
344                all_count='?'
345            else:
346                all_count=count
347            subject_areas=[('','All',int(not selected), all_count)]
348            if count:
349                counts=count_results['getSubject_area']
350                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
351                    if sa_full in counts:
352                        subject_areas.append((sa_key, sa_full, int(selected==sa_key), counts[sa_full]))
353            else:
354                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
355                    subject_areas.append((sa_key, sa_full, int(selected==sa_key), 0))
356
357            d['subject_area']=subject_areas
358            d['subjs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
359        if do_target_groups:
360            selected=src.get('target_group','')
361            if selected:
362                all_count='?'
363            else:
364                all_count=count
365            target_groups=[('','All',int(not selected),all_count)]
366            if count:
367                counts=count_results['getTarget_group']
368                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
369                    if tg_full in counts:
370                        target_groups.append((tg_key, tg_full, int(selected==tg_key), counts[tg_full]))
371            else:
372                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
373                    target_groups.append((tg_key, tg_full, int(selected==tg_key), 0))
374            d['target_group']=target_groups
375            d['targs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
376        if do_types:
377            selected=src.get('type','')
378            if selected:
379                all_count='?'
380            else:
381                all_count=count
382            types=[('','All',int(not selected),all_count)]
383            if count:
384                counts=count_results['portal_type']
385                for type_key in type_selection:
386                    if type_key in counts:
387                        types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), counts[type_key]))
388            else:
389                for type_key in type_selection:
390                    types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), 0))
391            d['type']=types
392            if selected:
393                if selected in ['Piece','Activity','Tool']:
394                    d['targs_disabled']=True
395                    d['subjs_disabled']=True
396                if selected=='Piece':
397                    d['lang_disabled']=True
398
399        if do_edited:
400            try:
401                selected=int(src.get('edited',0))
402            except ValueError:
403                selected=0
404            d['edited']=[('',_('Any time'),int(not selected),0), (365,_('Last year'),int(selected==365),0), (30,_('Last month'),int(selected==30),0), (7,_('Last week'),int(selected==7),0), (1,_('Yesterday'),int(selected==1),0)]
405        # Previous search term
406        if 'q' in src:
407            d['q']=src['q']
408        if 'state' in src:
409            d['state']=src['state']
410        if 'author' in src:
411            d['author']=src['author']           
412        return d
413
414
415    def decideBrowsingSubType(self):
416        """ Browse page heading needs to know what kind of browsing is going on """
417        form=self.REQUEST.form       
418        if 'state' in form:
419            if form['state']=='public': return 'published'
420            if form['state']=='draft': return 'drafts'
421        path=self.REQUEST['ACTUAL_URL'].split('/')
422        for key in ['content','methods','tools','community']:
423            if key in path: return key
424        return ''           
425
426    def decideSearchSubType(self):
427        """ Search page heading needs to know what kind of search is going on """
428        form=self.REQUEST.form       
429        if 'created' in form:
430            if 'type' in form and form['type']=='MemberFolder':
431                return 'new_members'
432            else:
433                return 'new_resources'
434        if 'edited' in form and 'q' not in form:
435            return 'recent_edits'
436        return 'search'
437
438
439    def browsingSearch(self, REQUEST=None, **kw):       
440        """ This is a general purpose catalog search that can convert readable keywords from request into actual search terms.
441        recognized keywords: language, subject_area, target_group, type, tags, state, author, created, edited                 
442        """
443        src=REQUEST.form
444        if '-C' in src:
445            del src['-C']
446        if not (src or kw):
447            path=REQUEST['ACTUAL_URL'].split('/')
448            if 'browse' in path or 'search' in path or 'cloud' in path:
449                return []
450        keywords={'sort_on':'getScore','sort_order':'reverse','getState':('draft', 'public')}
451        keywords.update(src)
452        keywords.update(kw)
453        if 'language' in keywords:
454            if keywords['language']=='all':
455                keywords['Language']=''
456            else:           
457                keywords['Language']=keywords['language']
458            del keywords['language']
459        if 'subject_area' in keywords:
460            if keywords['subject_area'] in SUBJECT_AREAS_DICT:
461                keywords['getSubject_area']=SUBJECT_AREAS_DICT[keywords['subject_area']]
462            del keywords['subject_area']
463        if 'target_group' in keywords:
464            if keywords['target_group'] in TARGET_GROUPS_DICT:
465                keywords['getTarget_group']=TARGET_GROUPS_DICT[keywords['target_group']]
466            del keywords['target_group']
467        if 'tags' in keywords:
468            keywords['getTags']=keywords['tags']
469            del keywords['tags']
470        if 'author' in keywords and keywords['author']:
471            keywords['listCreators']=keywords['author']
472            del keywords['author']
473        if 'group' in keywords and keywords['group']:
474            keywords['getRawGroupEditing']=keywords['group']
475            del keywords['group']
476        if 'state' in keywords:
477            keywords['getState']=keywords['state']
478            del keywords['state']
479        if 'country' in keywords:
480            keywords['getLocation_country']=keywords['country']
481        if 'type' in keywords:
482            if keywords['type']=='lr':
483                keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool']
484            else:
485                keywords['portal_type']=keywords['type']
486            del keywords['type']
487        else: # portal_type is determined by location
488            path=REQUEST['ACTUAL_URL'].split('/')
489            if 'portfolio' in kw:
490                keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool','Collection']
491                del keywords['portfolio']
492            elif 'content' in path:
493                keywords['portal_type']=list(CONTENT_TYPES)
494            elif 'methods' in path:
495                keywords['portal_type']='Activity'
496            elif 'tools' in path:
497                keywords['portal_type']='Tool'
498            elif 'community' in path:
499                keywords['portal_type']=list(COMMUNITY_TYPES)
500            else:
501                keywords['portal_type']=list(SEARCHABLE_TYPES)
502        if 'q' in keywords:
503            keywords['SearchableText']=keywords['q']
504            del keywords['q']
505        if 'created' in keywords:
506            keywords['created']={'query': self.ZopeTime()-int(keywords['created']), 'range':'min'}
507            keywords['sort_on']='created'
508        if 'edited' in keywords:
509            keywords['getLatestEdit']={'query': self.ZopeTime()-int(keywords['edited']), 'range':'min'}
510            keywords['sort_on']='getLatestEdit'
511            del keywords['edited']
512
513        if not 'Language' in keywords:
514            found=False
515            if 'base' in keywords and keywords['base']=='language':
516                found=True               
517            elif 'portal_type' in keywords and ('MemberFolder' in keywords['portal_type'] or 'GroupBlog' in keywords['portal_type']):
518                found=True
519            else:
520                for compensating in ['SearchableText','listCreators','getTarget_group','getTags','getSubject_area','created','getLatestEdit', 'getLocation_country', 'getRawGroupEditing']:
521                    if compensating in keywords:
522                        found=True
523                        break
524            if not found:
525                keywords['Language']=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
526        if 'Language' in keywords:
527            if not keywords['Language']: # Delete empty 'Language'
528                del keywords['Language']
529            elif 'portal_type' in keywords: # If browsing for MemberFolders, ignore Language, use getLanguage_skills instead
530                if 'MemberFolder' in keywords['portal_type'] or 'GroupBlog' in keywords['portal_type']:
531                    keywords['getLanguage_skills']=keywords['Language']
532                    del keywords['Language']
533
534        if 'base' in keywords:
535            del keywords['base']
536        try:
537            results = self.searchResults(keywords)               
538        except ParseError:
539            results = []
540        return results
541
542    #### Fast catalog handling ###########################################
543
544    def wakeLazy(self, lazy):
545        new=[]
546        for l in lazy._seq:
547            if isinstance(l, Lazy):
548                new.extend(self.wakeLazy(l))
549            else:
550                new.append(l)
551        return new
552
553    def fastPick(self, lazy_results, top):
554        """ Takes Lazy results and picks one random metadata obj from top """
555        tries=4
556        choice=randint(0,top)
557        while tries: # since we don't know how long the list is we need to use trial and error
558            # to find if the random index has corresponding item
559            i=0
560            for item in lazy_results._seq:
561                if choice==i:
562                    return lazy_results._func(item)
563                i+=1
564            choice/=2
565            tries-=1
566        return lazy_results._func(item)
567
568    def fastLink(self, user):
569        """ Finds an user from catalog and returns a tuple containing nicename and link
570        because this gets asked so often and it is relatively expensive to build, we cache them in non-permanent dict here in catalog tool
571        """
572        if not hasattr(self, 'author_cache'):
573            self.author_cache={}
574        if not user in self.author_cache:
575            res=self.searchResults(Creator=user, portal_type='MemberFolder')
576            for l in res._seq:
577                authortuple= (self.getEntry('getNicename', l), self.getpath(l))
578                self.author_cache[user]=authortuple
579                return authortuple
580        else:
581            return self.author_cache[user]
582        return None       
583
584    def getEntry(self, index, key, empty=[]):
585        val=self._catalog.getIndex(index).getEntryForObject(key, empty)
586        if callable(val):
587            val=val()
588        return val       
589           
590    def fastLinks(self, results, limit):
591        """ Takes Lazy results and returns a list of tuples (title, url, portal_type) """
592        def safeData(index, key):
593            v=index.getEntryForObject(key, [])
594            if callable(v):
595                return v()
596            return v
597
598        def wakeUp(lazy, c):
599            new=[]
600            for l in lazy._seq:
601                if isinstance(l, Lazy):
602                    c,newer=wakeUp(l,c)
603                    new.extend(newer)
604                else:
605                    new.append((safeData(title_index, l), self.getpath(l), safeData(type_index, l)))
606                    c+=1
607                if c==limit:
608                    break
609            return c,new
610        title_index=self._catalog.getIndex('getNicename')
611        type_index=self._catalog.getIndex('portal_type')
612        c,new=wakeUp(results,0)       
613        return new
614
615    def fastMetadata(self, results, indexes, cut=0):
616        """ Takes Lazy results and returns a list of tuple for values from indexes.
617        Indexes is a string or a tuple of strings.
618        This is an order of magnitude faster than getting CatalogBrains for each object"""
619        c=0
620        def safeData(index, key):
621            v=index.getEntryForObject(key, [])
622            if callable(v):
623                return v()
624            return v
625
626        def wakeUp(lazy,c):
627            new=[]
628            if isinstance(lazy, list):
629                seq=lazy
630            else:
631                seq=lazy._seq
632            for l in seq:
633                if isinstance(l, Lazy):
634                    new.extend(wakeUp(l,c))
635                else:
636                    values=[]   
637                    for index in index_sources:
638                        if index:
639                            values.append(safeData(index, l))
640                        else:
641                            values.append(l)
642                    new.append(tuple(values))
643                    c+=1
644                    if c==cut:
645                        break
646            return new
647        def wakeUpFaster(lazy, c):
648            new=[]
649            if isinstance(lazy, list):
650                seq=lazy
651            else:
652                seq=lazy._seq
653            for l in seq:
654                if isinstance(l, Lazy):
655                    new.extend(wakeUpFaster(l,c))
656                else:
657                    new.append(index.getEntryForObject(l, []))
658                c+=1
659                if c==cut:
660                    break
661            return new
662        if isinstance(indexes, tuple) or isinstance(indexes, list):
663            index_sources=[ind!='rid' and self._catalog.getIndex(ind) for ind in indexes]
664            new=wakeUp(results,c)
665        else:
666            index=self._catalog.getIndex(indexes)
667            new=wakeUpFaster(results,c)
668        return new
669       
670    def fastCount(self, results, indexes):
671        """ Takes Lazy results and returns a dictionary or tuple of dictionaries for values and their counts from indexes.
672        Index is a string or tuple of strings.
673        This is an order of magnitude faster than getting CatalogBrains for each object"""
674
675        def safeData(index, key):
676            v=index.getEntryForObject(key, [])
677            if callable(v):
678                return v()
679            return v
680       
681        def wakeUpAndCount(lazy):
682            for l in lazy._seq:
683                if isinstance(l, Lazy):
684                    wakeUpAndCount(l)
685                else:
686                    values=[]   
687                    for dic, index in indexes:
688                        val=safeData(index, l)
689                        if val:
690                            if isinstance(val, list):
691                                for v in val:                       
692                                    dic[v[:50]]=dic.get(v[:50],0)+1
693                            elif val:
694                                val=str(val)[:50]
695                                dic[val]=dic.get(val,0)+1
696        def wakeUpFasterAndCount(lazy):
697            for l in lazy._seq:
698                if isinstance(l, Lazy):
699                    wakeUpFasterAndCount(l)
700                else:
701                    val=safeData(index, l)
702                    if isinstance(val, list):
703                        for v in val:                       
704                            dic[v[:50]]=dic.get(v[:50],0)+1
705                    elif val:
706                        val=str(val)[:50]
707                        dic[val]=dic.get(val,0)+1
708        index=None
709        if isinstance(indexes, tuple):
710            if not results:
711                return [{} for ind in indexes]
712            indexes=[({}, self._catalog.getIndex(ind)) for ind in indexes]
713            new=wakeUpAndCount(results)
714            return tuple([dic for dic,index in indexes])
715        else:
716            if not results:
717                return {}
718            index=self._catalog.getIndex(indexes)
719            dic={}
720            new=wakeUpFasterAndCount(results)
721            return dic
722
723    def getSomeMetadataForRID(self, rid, md_fields):
724        record = self._catalog.data[rid]
725        schema = self._catalog.schema
726        if isinstance(md_fields, (tuple, list)):
727            result = {}
728            for md in md_fields:
729                result[md]= record[schema[md]]
730            return result
731        else:
732            return record[schema[md_fields]]
733
734
735    ##############################      Clouds      ######################## 
736
737    def buildCloudData(self, results, request=None, size=8, portfolio=False):
738        """ Build tag cloud result tuples (name, tagsize, obj_url, tag_value, nicename) for given form from result set """
739        def adjustTag(val, steps=8):
740            # helper method to adjust hit count of this tag to relative size (1,...,8)
741            try:
742                val=int((steps*log(val-mincount,2))/log(maxcount-mincount,2))
743            except (OverflowError, ZeroDivisionError):
744                val=0
745            if not val:
746                val=1
747            return val
748        lemill_tool = getToolByName(self, 'lemill_tool')
749        if not results:
750            return []
751        if portfolio:
752            cloud_index='getTags'
753            mf=self.getMemberFolder()           
754            link_root='%s/portfolio?tags=' % mf.absolute_url()
755        else:       
756            src=dict(request.form.items())
757            if 'base' not in src:
758                return []
759            portal_url=getToolByName(self, 'portal_url')()
760            path=request['ACTUAL_URL'].split('/')
761            if 'content' in path:
762                link_base='/'.join((portal_url,'content','browse'))
763            elif 'methods' in path:
764                link_base='/'.join((portal_url,'methods','browse'))
765            elif 'tools' in path:
766                link_base='/'.join((portal_url,'tools','browse'))
767            elif 'community' in path:
768                link_base='/'.join((portal_url,'community','browse'))
769            else:
770                link_base='/'.join((portal_url,'browse'))       
771            lang_part=''
772            base=src['base']
773            language=src.get('language','')
774            type_restriction=src.get('type','')
775            if language:
776                lang_part='language=%s&' % language
777            if type_restriction:
778                lang_part+='type=%s&' % type_restriction
779            title_cloud=False
780            if base=='language':
781                link_root='%s?%slanguage=' % (link_base, lang_part)
782                cloud_index='Language'
783                if 'type' in src:
784                    if 'GroupBlog' in src['type'] or 'MemberFolder' in src['type']:
785                        cloud_index='getLanguage_skills'     
786            elif base=='tags':
787                link_root='%s?%stags=' % (link_base, lang_part)       
788                cloud_index='getTags'
789            elif base=='subject_area':
790                link_root='%s?%ssubject_area=' % (link_base, lang_part)       
791                cloud_index='getSubject_area'
792            elif base=='target_group':
793                link_root='%s?%starget_group=' % (link_base, lang_part)       
794                cloud_index='getTarget_group'
795            elif base=='country':
796                link_root='%s?%scountry=' % (link_base, lang_part)
797                cloud_index='getLocation_country'
798            elif base=='title':
799                resultlist=self.fastMetadata(results, ('sortable_title','getScore','getNicename', 'path'), cut=100)
800                if not resultlist:
801                    return []
802                maxcount=resultlist[0][1]
803                mincount=resultlist[-1][1]             
804                resultlist.sort()
805                resultlist=[(x[2], adjustTag(x[1], steps=6), x[3]) for x in resultlist if x[2]]
806                return resultlist
807        hits=self.fastCount(results, cloud_index)
808        if not hits:
809            return []
810        resultlist=zip(hits.values(),hits.keys())
811        resultlist.sort(reverse=True)
812        maxcount=resultlist[0][0] # first!
813        resultlist = resultlist[:100]
814        mincount=resultlist[-1][0] # last!
815        # At this point resultlist= [(number_of_hits, tag_text),...]
816        # It should end up as: [(displayed_text, number_of_hits, link_url)...]
817        if cloud_index=='Language' or cloud_index=='getLanguage_skills':
818            resultlist=[(lemill_tool.getPrettyLanguage(x[1]), adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
819        elif cloud_index=='getSubject_area':
820            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
821        elif cloud_index=='getTarget_group':
822            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
823        else:
824            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
825        resultlist.sort()
826        return resultlist
827
828    ########### Front page top lists ############
829   
830    def getTopFive(self, results=None, key_index=None, link_body='', clean=False, cache_key=''):
831        """ Returns top five results for key_index (getTags, getSubject_area...) for certain language """
832        if clean or not hasattr(self, 'top5results'):
833            self.top5results={'content':{}, 'methods':{}, 'tools':{}, 'community':{}}
834        data=self.top5results
835        cache_key=cache_key or key_index
836        path=self.REQUEST['ACTUAL_URL'].split('/')
837        for section in ['content','methods','tools','community',None]:
838            if section in path:
839                break
840        if not section:
841            return []
842        ilanguage=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
843        if cache_key in data[section]:
844            top5lists=data[section][cache_key]
845        else:
846            top5lists={}
847            data[section][cache_key]=top5lists
848        if ilanguage in top5lists:
849            return top5lists[ilanguage]
850        else:
851            tops=self.fastCount(results, key_index)
852            resultlist=zip(tops.values(),tops.keys())
853            resultlist.sort(reverse=True)
854            resultlist=resultlist[:5]
855            if key_index=='getSubject_area':
856                resultlist=[(x[1], ''.join((link_body, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
857            elif key_index=='getTarget_group':
858                resultlist=[(x[1], ''.join((link_body, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
859            else:
860                resultlist=[(x[1], ''.join((link_body, x[1]))) for x in resultlist]
861            top5lists[ilanguage]=resultlist
862            return resultlist
863
864    ############################# batch ##########################
865
866    def batch(self, results=None, request=None):
867        """ Use Plone's batch """
868        if request and 'b_start' in request.form:
869            try:
870                b_start=int(request.form['b_start'])
871            except ValueError:
872                b_start=0
873        else:
874            b_start=0 
875        b= Batch(results, 30, b_start, orphan=1)   
876        return b 
877
878    def batchBaseUrl(self):
879        """ Keep all other parametres as they are, but add or change 'b_start' """
880        form=self.REQUEST.form
881        if 'b_start' in form:
882            del form['b_start']
883        last_url='?'.join((self.REQUEST.ACTUAL_URL, urlencode(form)))
884        new= last_url+'&b_start=%s'
885        return new
886       
887
888#
889#    def getTagCloud(self, search_results, index_type):
890#        """ Build a cloud based on how many occurences of this item are in results """
891#        if not search_results:
892#            return []
893#        lemill_tool = getToolByName(self, 'lemill_tool')
894#        pc = getToolByName(self, 'portal_catalog')
895#        from math import log
896#        maxcount=0
897#
898#        hits={}
899#        hits=pc.fastCount(search_results, index_type)
900#        resultlist=zip(hits.values(),hits.keys())
901#        if not resultlist:
902#            return []
903#        resultlist.sort()
904#        resultlist.reverse()
905#        maxcount=resultlist[0][0] # first!
906#        # if the first cut score for tag is x, we want to cut off all of the tags with score x.
907#        if len(resultlist)>100:
908#            #cutpoint = [x[0] for x in resultlist].index(resultlist[100]) can't figure this now, fix later
909#            cutpoint = 100
910#            resultlist = resultlist[:cutpoint]
911#        mincount=resultlist[-1][0]
912#        resultlist=[(x[1], x[0], '',x[1],x[1]) for x in resultlist]
913#
914#        # adjust to 1-8. We don't have to worry about score 0, they're already removed.
915#        if maxcount>1:
916#            resultlist=map(adjust, resultlist)
917#        # prettify language names
918#        if index_type=='Language' or index_type=='getLanguage_skills':
919#            resultlist=[(x[0],x[1],x[2],x[3],lemill_tool.getPrettyLanguage(x[4])) for x in resultlist]           
920#        if index_type=='getTarget_group':
921#            def compfunc(t2,t1):
922#                if t2[0] in TARGET_GROUP and t1[0] in TARGET_GROUP:
923#                    return  TARGET_GROUP.index(t2[0]) - TARGET_GROUP.index(t1[0])
924#                else:
925#                    return -1
926#            resultlist.sort(cmp=compfunc)
927#        else:   
928#            resultlist.sort()
929#        return resultlist
930#       
931#
932#    def getTitleCloud(self, search_results, browse_type):
933#        """ Build a cloud based on popularity score for that resource """
934#        pc=getToolByName(self,'portal_catalog')
935#        # uniquetuplelist contains result metadata reordered: (sort_title, count, url, indexvalue, title)
936#        if not search_results:
937#            return []
938#
939#        def isDefaultTitle(x):
940#            """ some heuristic to recognize default titles """
941#            return re.match(r'.*\.(...)$', x) or re.match(r'.*\.(....)$', x)
942#                   
943#        popularity = pc.fastMetadata(search_results, ('getScore','rid','getNicename','sortable_title'))
944#        popularity.sort(reverse=True)
945#        popularity=popularity[:100]
946#        titlecloud=[(sortable_title, getScore, self.REQUEST.physicalPathToURL(pc.getpath(rid)), sortable_title, getNicename or sortable_title) for (getScore, rid, getNicename, sortable_title) in popularity if sortable_title]
947#       
948#        if not titlecloud:
949#            return []
950#        titlecloud.sort()
951#        maxscore=max([x[1] for x in titlecloud])
952#        if maxscore>1:
953#            titlecloud=map(adjust, titlecloud)
954#        return titlecloud
955
956
957
958
959
960CatalogTool.__doc__ = PloneCatalogTool.__doc__
961
962InitializeClass(CatalogTool)
Note: See TracBrowser for help on using the repository browser.