source: trunk/LeMillCatalogTool.py @ 3054

Revision 3054, 37.1 KB checked in by jukka, 9 years ago (diff)

Front page links to last weeks resources and members work now.

Line 
1#
2# Plone CatalogTool simplified -- does not use ExtensibleIndexableObjectWrapper, workflows or permissions. Basically just ZCatalog posing as Plone Tool, allowing but ignoring plonish commands.
3#
4
5from Products.CMFPlone import ToolNames
6from AccessControl import ClassSecurityInfo
7from Globals import InitializeClass
8from Globals import DTMLFile
9
10from Products.ZCatalog.ZCatalog import ZCatalog
11from Products.CMFPlone.CatalogTool import CatalogTool as PloneCatalogTool
12from Products.CMFPlone.PloneBatch import Batch
13from Products.PythonScripts.standard import urlencode
14
15from zope.interface import implements
16
17from Products.CMFCore.utils import SimpleRecord, getToolByName
18from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
19from Products.ZCTextIndex.Lexicon import CaseNormalizer
20from Products.ZCTextIndex.Lexicon import Splitter
21from Products.ZCTextIndex.Lexicon import StopWordRemover
22
23from Products.CMFPlone.UnicodeSplitter import Splitter as UnicodeSplitter
24from Products.CMFPlone.UnicodeSplitter import CaseNormalizer as UnicodeCaseNormalizer
25from Products.ZCTextIndex.ZCTextIndex import PLexicon
26from Products.ZCatalog.Lazy import Lazy
27from random import randint
28from config import CONTENT_TYPES, MATERIAL_TYPES, ACTIVITY_TYPES, TOOLS_TYPES, LANGUAGES, LANGUAGES_DICT, TYPE_NAMES, SUBJECT_AREAS_DICT, TARGET_GROUPS_DICT, SUBJECT_AREAS_INVERSE_DICT, TARGET_GROUPS_INVERSE_DICT, COMMUNITY_TYPES, SEARCHABLE_TYPES, DEFAULT_ICONS, TARGET_GROUP
29import time
30from math import log
31
32class CatalogTool(PloneCatalogTool):
33
34    meta_type = 'LeMill Catalog Tool'
35    security = ClassSecurityInfo()
36    toolicon = 'skins/lemill/tool.gif'
37
38    __implements__ = PloneCatalogTool.__implements__
39
40   
41    #XXX START
42    # This is a copy from CMFCore/CatalogTool.py file, made several modifications so that it would do as we want
43    security.declarePublic( 'enumerateIndexes' ) # Subclass can call
44    def enumerateIndexes( self ):
45        #   Return a list of ( index_name, type, extra ) tuples for the initial
46        #   index set.
47        #   Creator is deprecated and may go away, use listCreators!
48        #   meta_type is deprecated and may go away, use portal_type!
49        plaintext_extra = SimpleRecord( lexicon_id='plaintext_lexicon'
50                                      , index_type='Okapi BM25 Rank'
51                                      )
52        htmltext_extra = SimpleRecord( lexicon_id='htmltext_lexicon'
53                                     , index_type='Okapi BM25 Rank'
54                                     )
55        # Adding plone_lexicon as it should be, no nee dto change the name for our own >>> safer that way
56        plone_extra = SimpleRecord( lexicon_id='plone_lexicon'
57                                  , index_type='Okapi BM25 Rank'
58                                  )
59
60        return ( ('Title', 'ZCTextIndex', plone_extra)
61               , ('Subject', 'KeywordIndex', None)
62               , ('Description', 'ZCTextIndex', plone_extra)
63               , ('Creator', 'FieldIndex', None)
64               , ('listCreators', 'KeywordIndex', None)
65               , ('SearchableText', 'ZCTextIndex', plone_extra)
66               , ('Date', 'DateIndex', None)
67               , ('Type', 'FieldIndex', None)
68               , ('created', 'DateIndex', None)
69               , ('effective', 'DateIndex', None)
70               , ('expires', 'DateIndex', None)
71               , ('modified', 'DateIndex', None)
72               , ('allowedRolesAndUsers', 'KeywordIndex', None)
73               , ('review_state', 'FieldIndex', None)
74               , ('in_reply_to', 'FieldIndex', None)
75               , ('meta_type', 'FieldIndex', None)
76               , ('getId', 'FieldIndex', None)
77               , ('path', 'PathIndex', None)
78               , ('portal_type', 'FieldIndex', None)
79               )
80
81    security.declarePublic('enumerateLexicons')
82    def enumerateLexicons(self):
83        # Creating our own lexicon in a different way, as the automatic creation seems to be needing something to act as a StopWordRemover and fails miserably without it.
84        lexicon = PLexicon('plone_lexicon', '', UnicodeSplitter(), UnicodeCaseNormalizer())
85        self._setObject('plone_lexicon', lexicon)
86        return (
87                 ( 'plaintext_lexicon'
88                 , Splitter()
89                 , CaseNormalizer()
90                 , StopWordRemover()
91                 )
92               , ( 'htmltext_lexicon'
93                 , HTMLWordSplitter()
94                 , CaseNormalizer()
95                 , StopWordRemover()
96                 )
97               )
98    #XXX END
99
100    def catalog_object(self, object, uid, idxs=[],
101                       update_metadata=1, pghandler=None):
102        if object and hasattr(object, 'portal_type') and object.portal_type!='CollectionsFolder':
103            ZCatalog.catalog_object(self, object, uid, idxs, update_metadata, pghandler=pghandler)
104
105    def searchResults(self, REQUEST=None, **kw):
106        """Calls ZCatalog.searchResults """
107        return ZCatalog.searchResults(self, REQUEST, **kw)
108
109    __call__ = searchResults
110
111
112    def titleSearch(self, title='', sort_limit=0):
113        """ search titles containing given string """
114        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
115        if sort_limit:
116            query['sort_limit']=sort_limit
117        query['Title']='%s*' % title
118        results=self.searchResults(query)
119        return results
120
121    def fulltextSearch(self, SearchableText='', sort_limit=0):
122        """ search fulltext for a string """
123        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
124        if sort_limit:
125            query['sort_limit']=sort_limit
126        query['SearchableText']='%s*' % SearchableText
127        results=self.searchResults(query)
128        return results
129
130    def buildResultsFromBatch(self, batch):
131        t=time.time()
132        d_list=[]
133        lutool=getToolByName(self, 'lemill_usertool')
134        url_base=getToolByName(self, 'portal_url')()
135
136        if hasattr(batch._sequence, '_seq'):
137            print 'using fast metadata to build results'
138            seq=list(batch._sequence._seq[batch.start:batch.end])
139            print time.time()-t
140            md=self.fastMetadata(seq, ['rid','getHasCoverImage','getNicename','portal_type','getLocation_country','getTags', 'Language','listCreators'])
141            print time.time()-t
142            for item in md:
143                if item[3] not in TYPE_NAMES:
144                    print 'rejected item:', item
145                    continue
146                d={'url':self.getpath(item[0]),
147                    'coverimage_url':item[1],
148                    'title':item[2],
149                    'readable_type':item[3],
150                    'country':item[4],
151                    'tags':item[5],
152                    'language':item[6],
153                    'authors':item[7]}
154                d_list.append(d)
155        else: # batch has already been converted to catalog Brains object
156            print 'using Brains objects to build results'
157            for item in batch:               
158                d={'url':item.getURL(),
159                    'coverimage_url':item.getHasCoverImage,
160                    'title':item.getNicename,
161                    'readable_type':item.portal_type,
162                    'country':item.getLocation_country,
163                    'tags':item.getTags,
164                    'language':item.Language,
165                    'authors':item.listCreators}                 
166                d_list.append(d)
167        tag_base='/'.join((url_base,'search?index_type=tags&q='))
168        lang_base='/'.join((url_base,'search?language='))
169        country_base='/'.join((url_base,'community/browse?country='))
170        # Manipulate raw index data to something more useful
171        for item in d_list:
172            if callable(item['title']):
173                item['title']=item['title']()
174            url=item['url']
175            if item['coverimage_url']:
176                item['coverimage_url']='/'.join((url,'coverImage'))
177            else:
178                item['coverimage_url']='/'.join((url_base, DEFAULT_ICONS[item['readable_type']]))
179            ll=item['language']
180            if ll:
181                item['language']=LANGUAGES_DICT[ll]
182                item['language_link']=''.join((lang_base,ll))
183            item['tags']=[(tag, ''.join((tag_base, tag))) for tag in item['tags']]
184            item['authors']=[self.fastLink(author) for author in item['authors']]
185            if item['country']:         
186                item['country_link']=''.join((country_base,item['country']))
187            item['readable_type']=TYPE_NAMES.get(item['readable_type'], None)[0]
188
189        print 'result batch creation took: ',time.time()-t
190        return d_list
191
192
193    def createBrowsingOptions(self, REQUEST, results=None):
194        """takes search results as input and returns a dictionary that tells what values there are available for each selection box and how many results there are of each value."""
195        t=time.time()
196        src=dict(REQUEST.form.items())
197        print src
198        # section determines what kinds of options there are:
199        path=REQUEST['ACTUAL_URL'].split('/')
200        force_language=True
201
202        if 'content' in path:
203            do_languages=True
204            do_subject_areas=True
205            do_target_groups=True
206            do_types=True
207            type_selection=CONTENT_TYPES
208            do_edited=True
209            do_tags=True
210            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
211        elif 'methods' in path or 'tools' in path:
212            do_languages=True
213            do_subject_areas=False
214            do_target_groups=False
215            do_types=False
216            do_edited=True
217            do_tags=True
218            count_indexes=('Language',)
219        elif 'community' in path:
220            do_languages=True
221            do_subject_areas=True
222            do_target_groups=False
223            do_types=True
224            type_selection=COMMUNITY_TYPES
225            do_edited=True
226            do_tags=True
227            count_indexes=('Language','portal_type','getSubject_area')
228        else:
229            do_languages=True
230            force_language=False
231            do_subject_areas=True
232            do_target_groups=True
233            do_types=True
234            type_selection=SEARCHABLE_TYPES
235            do_edited=True
236            do_tags=False
237            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
238       
239        d={'do_languages':do_languages, 'do_subject_areas':do_subject_areas,'do_target_groups':do_target_groups,'do_types':do_types,'do_edited':do_edited, 'do_tags':do_tags, 'q':'', 'state':'', 'author':'', 'created':''}
240
241
242        # now see if the results should be counted and provide a dictionary of counted values if necessary
243        if results and len(results)<1000:
244            count_results= dict(zip(count_indexes, self.fastCount(results, count_indexes)))
245            count=len(results)
246        else:
247            count=0
248
249        # Languages: two lists, one for probable options and other for improbable
250        if do_languages:
251            all_languages=list(LANGUAGES)[1:] # Remove 'language neutral' from options
252            all_languages[0]=('','any language')
253            all_language_codes=[l[0] for l in all_languages]
254            lang_dict = getToolByName(self, 'lemill_tool').language_dict
255            common_language_codes=getToolByName(self,'lemill_usertool').getLanguages()
256            selected_language= src.get('language','')
257            was_empty=not selected_language
258            if was_empty:
259                if force_language:
260                    selected_language=common_language_codes[0]
261                else:
262                    selected_language='all'
263            common_languages=[]
264            rare_languages=[('all','All',int(selected_language=='all'),0)]
265            if count and was_empty and False:
266                langs=count_results['Language']
267                for langcode in common_language_codes:
268                    if langcode and langcode in langs:
269                        common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
270                        all_language_codes.remove(langcode)           
271                for langcode in all_language_codes:
272                    if langcode and langcode in langs:
273                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
274            else:
275                for langcode in common_language_codes:
276                    common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
277                    all_language_codes.remove(langcode)           
278                for langcode in all_language_codes:
279                    if langcode:
280                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
281            d['common_languages']=common_languages
282            d['rare_languages']=rare_languages
283            d['lang_disabled']=False # portal_type==Piece can set this to true
284            if selected_language!='all':
285                d['lang_filter']='language=%s&' % selected_language
286            else:
287                d['lang_filter']=''
288        if do_subject_areas:
289            selected=src.get('subject_area','')
290            if selected:
291                all_count='?'
292            else:
293                all_count=count
294            subject_areas=[('','All',int(not selected), all_count)]
295            if count:
296                counts=count_results['getSubject_area']
297                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
298                    if sa_full in counts:
299                        subject_areas.append((sa_key, sa_full, int(selected==sa_key), counts[sa_full]))
300            else:
301                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
302                    subject_areas.append((sa_key, sa_full, int(selected==sa_key), 0))
303
304            d['subject_area']=subject_areas
305            d['subjs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
306        if do_target_groups:
307            selected=src.get('target_group','')
308            if selected:
309                all_count='?'
310            else:
311                all_count=count
312            target_groups=[('','All',int(not selected),all_count)]
313            if count:
314                counts=count_results['getTarget_group']
315                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
316                    if tg_full in counts:
317                        target_groups.append((tg_key, tg_full, int(selected==tg_key), counts[tg_full]))
318            else:
319                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
320                    target_groups.append((tg_key, tg_full, int(selected==tg_key), 0))
321            d['target_group']=target_groups
322            d['targs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
323        if do_types:
324            selected=src.get('type','')
325            if selected:
326                all_count='?'
327            else:
328                all_count=count
329            types=[('','All',int(not selected),all_count)]
330            if count:
331                counts=count_results['portal_type']
332                for type_key in type_selection:
333                    if type_key in counts:
334                        types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), counts[type_key]))
335            else:
336                for type_key in type_selection:
337                    types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), 0))
338            d['type']=types
339            if selected:
340                if selected in ['Piece','Activity','Tool']:
341                    d['targs_disabled']=True
342                    d['subjs_disabled']=True
343                if selected=='Piece':
344                    d['lang_disabled']=True
345
346        if do_edited:
347            try:
348                selected=int(src.get('edited',0))
349            except ValueError:
350                selected=0
351            d['edited']=[('','Any time',int(not selected),0), (365,'Last year',int(selected==365),0), (30,'Last month',int(selected==30),0), (7,'Last week',int(selected==7),0), (1,'Yesterday',int(selected==1),0)]
352        # Previous search term
353        if 'q' in src:
354            d['q']=src['q']
355        if 'state' in src:
356            d['state']=src['state']
357        if 'author' in src:
358            d['author']=src['author']           
359               
360        print 'creating browsing options took', time.time()-t
361        return d
362
363
364    def decideBrowsingSubType(self):
365        """ Browse page header needs to know what kind of browsing is going on """
366        form=self.REQUEST.form       
367        if 'state' in form:
368            if form['state']=='public': return 'published'
369            if form['state']=='draft': return 'drafts'
370        path=self.REQUEST['ACTUAL_URL'].split('/')
371        for key in ['content','methods','tools','community']:
372            if key in path: return key
373        return ''           
374
375    def decideSearchSubType(self):
376        """ Search page header needs to know what kind of search is going on """
377        form=self.REQUEST.form       
378        if 'created' in form:
379            if 'type' in form and form['type']=='MemberFolder':
380                return 'new_members'
381            else:
382                return 'new_resources'
383        if 'edited' in form and 'q' not in form:
384            return 'recent_edits'
385        return 'search'
386
387
388    def browsingSearch(self, REQUEST=None, **kw):       
389        """ this search tries to use arguments from browsing form """
390        print 'browsing search called'
391        t=time.time()
392        src=REQUEST.form
393        if '-C' in src:
394            del src['-C']
395        if not (src or kw):
396            path=REQUEST['ACTUAL_URL'].split('/')
397            if 'browse' in path or 'search' in path or 'cloud' in path:
398                print 'empty search, return []'
399                return []
400        keywords={'sort_on':'getScore','sort_order':'reverse','getState':('draft', 'public')}
401        keywords.update(src)
402        keywords.update(kw)
403        if 'language' in keywords:
404            if keywords['language']=='all':
405                keywords['Language']=''
406            else:           
407                keywords['Language']=keywords['language']
408            del keywords['language']
409        if 'subject_area' in keywords:
410            if keywords['subject_area'] in SUBJECT_AREAS_DICT:
411                keywords['getSubject_area']=SUBJECT_AREAS_DICT[keywords['subject_area']]
412            del keywords['subject_area']
413        if 'target_group' in keywords:
414            if keywords['target_group'] in TARGET_GROUPS_DICT:
415                keywords['getTarget_group']=TARGET_GROUPS_DICT[keywords['target_group']]
416            del keywords['target_group']
417        if 'tags' in keywords:
418            keywords['getTags']=keywords['tags']
419            del keywords['tags']
420        if 'author' in keywords:
421            keywords['listCreators']=keywords['author']
422            del keywords['author']
423        if 'state' in keywords:
424            keywords['getState']=keywords['state']
425            del keywords['state']
426        if 'type' in keywords:
427            if keywords['type']=='lr':
428                keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool']
429            else:
430                keywords['portal_type']=keywords['type']
431            del keywords['type']
432        else: # portal_type is determined by location
433            path=REQUEST['ACTUAL_URL'].split('/')
434            if 'content' in path:
435                keywords['portal_type']=CONTENT_TYPES
436            elif 'methods' in path:
437                keywords['portal_type']='Activity'
438            elif 'tools' in path:
439                keywords['portal_type']='Tool'
440            elif 'community' in path:
441                keywords['portal_type']=COMMUNITY_TYPES
442            else:
443                keywords['portal_type']=SEARCHABLE_TYPES
444        if 'q' in keywords:
445            keywords['SearchableText']=keywords['q']
446            del keywords['q']
447        if 'created' in keywords:
448            keywords['created']={'query': self.ZopeTime()-int(keywords['created']), 'range':'min'}
449        if 'edited' in keywords:
450            keywords['getLatestEdit']={'query': self.ZopeTime()-int(keywords['edited']), 'range':'min'}
451            del keywords['edited']
452        if not 'Language' in keywords:
453            found=False
454            if 'base' in keywords:
455                if keywords['base']=='language':
456                    found=True
457            for compensating in ['SearchableText','listCreators','getTarget_group','getTags','getSubject_area','created','getLatestEdit']:
458                if compensating in keywords:
459                    found=True
460                    break
461            if not found:
462                keywords['Language']=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
463        if 'Language' in keywords:
464            if not keywords['Language']:
465                del keywords['Language']
466        print 'keywords:', keywords
467        try:
468            results = self.searchResults(keywords)               
469        except ParseError:
470            results = []
471        print 'browsing search:', time.time()-t
472        return results
473
474    #### Fast catalog handling ###########################################
475
476    def wakeLazy(self, lazy):
477        new=[]
478        for l in lazy._seq:
479            if isinstance(l, Lazy):
480                new.extend(self.wakeLazy(l))
481            else:
482                new.append(l)
483        return new
484
485    def fastPick(self, lazy_results, top):
486        """ Takes Lazy results and picks one random metadata obj from top """
487        tries=4
488        choice=randint(0,top)
489        while tries: # since we don't know how long the list is we need to use trial and error
490            # to find if the random index has corresponding item
491            i=0
492            for item in lazy_results._seq:
493                if choice==i:
494                    return lazy_results._func(item)
495                i+=1
496            choice/=2
497            tries-=1
498        return lazy_results._func(item)
499
500    def fastLink(self, user):
501        """ Finds an user from catalog and returns a tuple containing nicename and link
502        because this gets asked so often and it is relatively expensive to build, we cache them in non-permanent dict here in catalog tool
503        """
504        if not hasattr(self, 'author_cache'):
505            self.author_cache={}
506        if not user in self.author_cache:
507            res=self.searchResults(Creator=user, portal_type='MemberFolder')
508            for l in res._seq:
509                authortuple= (self.getEntry('getNicename', l), self.getpath(l))
510                self.author_cache[user]=authortuple
511                return authortuple
512        else:
513            return self.author_cache[user]
514        return None       
515
516    def getEntry(self, index, key, empty=[]):
517        val=self._catalog.getIndex(index).getEntryForObject(key, empty)
518        if callable(val):
519            val=val()
520        return val       
521           
522    def fastLinks(self, results, limit):
523        """ Takes Lazy results and returns a list of tuples (title, url, portal_type) """
524        def safeData(index, key):
525            v=index.getEntryForObject(key, [])
526            if callable(v):
527                return v()
528            return v
529
530        def wakeUp(lazy, c):
531            new=[]
532            for l in lazy._seq:
533                if isinstance(l, Lazy):
534                    c,newer=wakeUp(l,c)
535                    new.extend(newer)
536                else:
537                    new.append((safeData(title_index, l), self.getpath(l), safeData(type_index, l)))
538                    c+=1
539                if c==limit:
540                    break
541            return c,new
542        title_index=self._catalog.getIndex('getNicename')
543        type_index=self._catalog.getIndex('portal_type')
544        c,new=wakeUp(results,0)       
545        return new
546
547    def fastMetadata(self, results, indexes, cut=0):
548        """ Takes Lazy results and returns a list of tuple for values from indexes.
549        Indexes is a string or a tuple of strings.
550        This is an order of magnitude faster than getting CatalogBrains for each object"""
551        c=0
552        def safeData(index, key):
553            v=index.getEntryForObject(key, [])
554            if callable(v):
555                return v()
556            return v
557
558        def wakeUp(lazy,c):
559            new=[]
560            if isinstance(lazy, list):
561                seq=lazy
562            else:
563                seq=lazy._seq
564            for l in seq:
565                if isinstance(l, Lazy):
566                    new.extend(wakeUp(l,c))
567                else:
568                    values=[]   
569                    for index in index_sources:
570                        if index:
571                            values.append(safeData(index, l))
572                        else:
573                            values.append(l)
574                    new.append(tuple(values))
575                    c+=1
576                    if c==cut:
577                        break
578            return new
579        def wakeUpFaster(lazy, c):
580            new=[]
581            if isinstance(lazy, list):
582                seq=lazy
583            else:
584                seq=lazy._seq
585            for l in seq:
586                if isinstance(l, Lazy):
587                    new.extend(wakeUpFaster(l,c))
588                else:
589                    new.append(index.getEntryForObject(l, []))
590                c+=1
591                if c==cut:
592                    break
593            return new
594        if isinstance(indexes, tuple) or isinstance(indexes, list):
595            index_sources=[ind!='rid' and self._catalog.getIndex(ind) for ind in indexes]
596            new=wakeUp(results,c)
597        else:
598            index=self._catalog.getIndex(indexes)
599            new=wakeUpFaster(results,c)
600        return new
601       
602    def fastCount(self, results, indexes):
603        """ Takes Lazy results and returns a dictionary or tuple of dictionaries for values and their counts from indexes.
604        Index is a string or tuple of strings.
605        This is an order of magnitude faster than getting CatalogBrains for each object"""
606
607        def safeData(index, key):
608            v=index.getEntryForObject(key, [])
609            if callable(v):
610                return v()
611            return v
612       
613        def wakeUpAndCount(lazy):
614            for l in lazy._seq:
615                if isinstance(l, Lazy):
616                    wakeUpAndCount(l)
617                else:
618                    values=[]   
619                    for dic, index in indexes:
620                        val=safeData(index, l)
621                        if val:
622                            if isinstance(val, list):
623                                for v in val:                       
624                                    dic[v[:50]]=dic.get(v[:50],0)+1
625                            elif val:
626                                val=str(val)[:50]
627                                dic[val]=dic.get(val,0)+1
628        def wakeUpFasterAndCount(lazy):
629            for l in lazy._seq:
630                if isinstance(l, Lazy):
631                    wakeUpFasterAndCount(l)
632                else:
633                    val=safeData(index, l)
634                    if isinstance(val, list):
635                        for v in val:                       
636                            dic[v[:50]]=dic.get(v[:50],0)+1
637                    elif val:
638                        val=str(val)[:50]
639                        dic[val]=dic.get(val,0)+1
640        index=None
641        if isinstance(indexes, tuple):
642            if not results:
643                return [{} for ind in indexes]
644            indexes=[({}, self._catalog.getIndex(ind)) for ind in indexes]
645            new=wakeUpAndCount(results)
646            return tuple([dic for dic,index in indexes])
647        else:
648            if not results:
649                return {}
650            index=self._catalog.getIndex(indexes)
651            dic={}
652            new=wakeUpFasterAndCount(results)
653            return dic
654
655    def getSomeMetadataForRID(self, rid, md_fields):
656        record = self._catalog.data[rid]
657        schema = self._catalog.schema
658        if isinstance(md_fields, (tuple, list)):
659            result = {}
660            for md in md_fields:
661                result[md]= record[schema[md]]
662            return result
663        else:
664            return record[schema[md_fields]]
665
666
667    ##############################      Clouds      ######################## 
668
669    def buildCloudData(self, results, request=None):
670        """ Build tag cloud result tuples (name, tagsize, obj_url, tag_value, nicename) for given form from result set """
671        def adjustTag(val, steps=8):
672            # helper method to adjust hit count of this tag to relative size (1,...,8)
673            try:
674                val=int((8*log(val-mincount,2))/log(maxcount-mincount,2))
675            except (OverflowError, ZeroDivisionError):
676                val=0
677            if not val:
678                val=1
679            return val
680        lemill_tool = getToolByName(self, 'lemill_tool')
681        if not results:
682            return []       
683        src=dict(request.form.items())
684        if 'base' not in src:
685            return []
686        portal_url=getToolByName(self, 'portal_url')()
687        path=request['ACTUAL_URL'].split('/')
688        if 'content' in path:
689            link_base='/'.join((portal_url,'content','browse'))
690        elif 'methods' in path:
691            link_base='/'.join((portal_url,'methods','browse'))
692        elif 'tools' in path:
693            link_base='/'.join((portal_url,'tools','browse'))
694        elif 'community' in path:
695            link_base='/'.join((portal_url,'community','browse'))
696        else:
697            link_base='/'.join((portal_url,'browse'))       
698        lang_part=''
699        base=src['base']
700        language=src.get('language','')
701        if language:
702            lang_part='language=%s&' % language
703        title_cloud=False
704        if base=='language':
705            link_root='%s?language=' % link_base       
706            cloud_index='Language'
707        elif base=='tags':
708            link_root='%s?%stags=' % (link_base, lang_part)       
709            cloud_index='getTags'
710        elif base=='subject_area':
711            link_root='%s?%ssubject_area=' % (link_base, lang_part)       
712            cloud_index='getSubject_area'
713        elif base=='target_group':
714            link_root='%s?%starget_group=' % (link_base, lang_part)       
715            cloud_index='getTarget_group'
716        elif base=='title':
717            resultlist=self.fastMetadata(results, ('sortable_title','getScore','getNicename', 'path'), cut=100)
718            maxcount=resultlist[0][1]
719            mincount=resultlist[-1][1]             
720            resultlist.sort()
721            resultlist=[(x[2], adjustTag(x[1], steps=6), x[3]) for x in resultlist if x[2]]
722            return resultlist
723           
724        hits=self.fastCount(results, cloud_index)
725        resultlist=zip(hits.values(),hits.keys())
726        resultlist.sort(reverse=True)
727        maxcount=resultlist[0][0] # first!
728        resultlist = resultlist[:100]
729        mincount=resultlist[-1][0] # last!
730        # At this point resultlist= [(number_of_hits, tag_text),...]
731        # It should end up as: [(displayed_text, number_of_hits, link_url)...]
732        if cloud_index=='Language' or cloud_index=='getLanguage_skills':
733            resultlist=[(lemill_tool.getPrettyLanguage(x[1]), adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
734        elif cloud_index=='getSubject_area':
735            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
736        elif cloud_index=='getTarget_group':
737            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
738        else:
739            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
740        resultlist.sort()
741        return resultlist
742
743    ########### Front page top lists ############
744   
745    def getTopFive(self, results=None, key_index=None, link_body='', clean=False):
746        """ Returns top five results for key_index (getTags, getSubject_area...) for certain language """
747        if clean or not hasattr(self, 'top5results'):
748            self.top5results={'content':{}, 'methods':{}, 'tools':{}, 'community':{}}
749        data=self.top5results
750        path=self.REQUEST['ACTUAL_URL'].split('/')
751        for section in ['content','methods','tools','community',None]:
752            if section in path:
753                break
754        if not section:
755            return []
756        ilanguage=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
757        if key_index in data[section]:
758            top5lists=data[section][key_index]
759        else:
760            top5lists={}
761            data[section][key_index]=top5lists
762        if ilanguage in top5lists:
763            return top5lists[ilanguage]
764        else:
765            tops=self.fastCount(results, key_index)
766            resultlist=zip(tops.values(),tops.keys())
767            resultlist.sort(reverse=True)
768            resultlist=resultlist[:5]
769            if key_index=='getSubject_area':
770                resultlist=[(x[1], ''.join((link_body, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
771            elif key_index=='getTarget_group':
772                resultlist=[(x[1], ''.join((link_body, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
773            else:
774                resultlist=[(x[1], ''.join((link_body, x[1]))) for x in resultlist]
775            top5lists[ilanguage]=resultlist
776            print top5lists
777            return resultlist
778
779    ############################# batch ##########################
780
781    def batch(self, results=None, request=None):
782        """ Use Plone's batch """
783        if request and 'b_start' in request.form:
784            try:
785                b_start=int(request.form['b_start'])
786            except ValueError:
787                b_start=0
788        else:
789            b_start=0 
790        b= Batch(results, 30, b_start, orphan=1)   
791        return b 
792
793    def batchBaseUrl(self):
794        """ Keep all other parametres as they are, but add or change 'b_start' """
795        form=self.REQUEST.form
796        if 'b_start' in form:
797            del form['b_start']
798        last_url='?'.join((self.REQUEST.ACTUAL_URL, urlencode(form)))
799        new= last_url+'&b_start=%s'
800        return new
801       
802
803#
804#    def getTagCloud(self, search_results, index_type):
805#        """ Build a cloud based on how many occurences of this item are in results """
806#        if not search_results:
807#            return []
808#        lemill_tool = getToolByName(self, 'lemill_tool')
809#        pc = getToolByName(self, 'portal_catalog')
810#        from math import log
811#        maxcount=0
812#
813#        hits={}
814#        hits=pc.fastCount(search_results, index_type)
815#        resultlist=zip(hits.values(),hits.keys())
816#        if not resultlist:
817#            return []
818#        resultlist.sort()
819#        resultlist.reverse()
820#        maxcount=resultlist[0][0] # first!
821#        # if the first cut score for tag is x, we want to cut off all of the tags with score x.
822#        if len(resultlist)>100:
823#            #cutpoint = [x[0] for x in resultlist].index(resultlist[100]) can't figure this now, fix later
824#            cutpoint = 100
825#            resultlist = resultlist[:cutpoint]
826#        mincount=resultlist[-1][0]
827#        resultlist=[(x[1], x[0], '',x[1],x[1]) for x in resultlist]
828#
829#        # adjust to 1-8. We don't have to worry about score 0, they're already removed.
830#        if maxcount>1:
831#            resultlist=map(adjust, resultlist)
832#        # prettify language names
833#        if index_type=='Language' or index_type=='getLanguage_skills':
834#            resultlist=[(x[0],x[1],x[2],x[3],lemill_tool.getPrettyLanguage(x[4])) for x in resultlist]           
835#        if index_type=='getTarget_group':
836#            def compfunc(t2,t1):
837#                if t2[0] in TARGET_GROUP and t1[0] in TARGET_GROUP:
838#                    return  TARGET_GROUP.index(t2[0]) - TARGET_GROUP.index(t1[0])
839#                else:
840#                    return -1
841#            resultlist.sort(cmp=compfunc)
842#        else:   
843#            resultlist.sort()
844#        return resultlist
845#       
846#
847#    def getTitleCloud(self, search_results, browse_type):
848#        """ Build a cloud based on popularity score for that resource """
849#        pc=getToolByName(self,'portal_catalog')
850#        # uniquetuplelist contains result metadata reordered: (sort_title, count, url, indexvalue, title)
851#        if not search_results:
852#            return []
853#
854#        def isDefaultTitle(x):
855#            """ some heuristic to recognize default titles """
856#            return re.match(r'.*\.(...)$', x) or re.match(r'.*\.(....)$', x)
857#                   
858#        popularity = pc.fastMetadata(search_results, ('getScore','rid','getNicename','sortable_title'))
859#        popularity.sort(reverse=True)
860#        popularity=popularity[:100]
861#        titlecloud=[(sortable_title, getScore, self.REQUEST.physicalPathToURL(pc.getpath(rid)), sortable_title, getNicename or sortable_title) for (getScore, rid, getNicename, sortable_title) in popularity if sortable_title]
862#       
863#        if not titlecloud:
864#            return []
865#        titlecloud.sort()
866#        maxscore=max([x[1] for x in titlecloud])
867#        if maxscore>1:
868#            titlecloud=map(adjust, titlecloud)
869#        return titlecloud
870
871
872
873
874
875CatalogTool.__doc__ = PloneCatalogTool.__doc__
876
877InitializeClass(CatalogTool)
Note: See TracBrowser for help on using the repository browser.