source: trunk/LeMillCatalogTool.py @ 3052

Revision 3052, 36.1 KB checked in by jukka, 9 years ago (diff)

Removed some of the unused templates, community and portfolio related pages are still there. All kinds of work done towards LeMill 3.

Line 
1#
2# Plone CatalogTool simplified -- does not use ExtensibleIndexableObjectWrapper, workflows or permissions. Basically just ZCatalog posing as Plone Tool, allowing but ignoring plonish commands.
3#
4
5from Products.CMFPlone import ToolNames
6from AccessControl import ClassSecurityInfo
7from Globals import InitializeClass
8from Globals import DTMLFile
9
10from Products.ZCatalog.ZCatalog import ZCatalog
11from Products.CMFPlone.CatalogTool import CatalogTool as PloneCatalogTool
12from Products.CMFPlone.PloneBatch import Batch
13from Products.PythonScripts.standard import urlencode
14
15from zope.interface import implements
16
17from Products.CMFCore.utils import SimpleRecord, getToolByName
18from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
19from Products.ZCTextIndex.Lexicon import CaseNormalizer
20from Products.ZCTextIndex.Lexicon import Splitter
21from Products.ZCTextIndex.Lexicon import StopWordRemover
22
23from Products.CMFPlone.UnicodeSplitter import Splitter as UnicodeSplitter
24from Products.CMFPlone.UnicodeSplitter import CaseNormalizer as UnicodeCaseNormalizer
25from Products.ZCTextIndex.ZCTextIndex import PLexicon
26from Products.ZCatalog.Lazy import Lazy
27from random import randint
28from config import CONTENT_TYPES, MATERIAL_TYPES, ACTIVITY_TYPES, TOOLS_TYPES, LANGUAGES, LANGUAGES_DICT, TYPE_NAMES, SUBJECT_AREAS_DICT, TARGET_GROUPS_DICT, SUBJECT_AREAS_INVERSE_DICT, TARGET_GROUPS_INVERSE_DICT, COMMUNITY_TYPES, SEARCHABLE_TYPES, DEFAULT_ICONS, TARGET_GROUP
29import time
30from math import log
31
32class CatalogTool(PloneCatalogTool):
33
34    meta_type = 'LeMill Catalog Tool'
35    security = ClassSecurityInfo()
36    toolicon = 'skins/lemill/tool.gif'
37
38    __implements__ = PloneCatalogTool.__implements__
39
40   
41    #XXX START
42    # This is a copy from CMFCore/CatalogTool.py file, made several modifications so that it would do as we want
43    security.declarePublic( 'enumerateIndexes' ) # Subclass can call
44    def enumerateIndexes( self ):
45        #   Return a list of ( index_name, type, extra ) tuples for the initial
46        #   index set.
47        #   Creator is deprecated and may go away, use listCreators!
48        #   meta_type is deprecated and may go away, use portal_type!
49        plaintext_extra = SimpleRecord( lexicon_id='plaintext_lexicon'
50                                      , index_type='Okapi BM25 Rank'
51                                      )
52        htmltext_extra = SimpleRecord( lexicon_id='htmltext_lexicon'
53                                     , index_type='Okapi BM25 Rank'
54                                     )
55        # Adding plone_lexicon as it should be, no nee dto change the name for our own >>> safer that way
56        plone_extra = SimpleRecord( lexicon_id='plone_lexicon'
57                                  , index_type='Okapi BM25 Rank'
58                                  )
59
60        return ( ('Title', 'ZCTextIndex', plone_extra)
61               , ('Subject', 'KeywordIndex', None)
62               , ('Description', 'ZCTextIndex', plone_extra)
63               , ('Creator', 'FieldIndex', None)
64               , ('listCreators', 'KeywordIndex', None)
65               , ('SearchableText', 'ZCTextIndex', plone_extra)
66               , ('Date', 'DateIndex', None)
67               , ('Type', 'FieldIndex', None)
68               , ('created', 'DateIndex', None)
69               , ('effective', 'DateIndex', None)
70               , ('expires', 'DateIndex', None)
71               , ('modified', 'DateIndex', None)
72               , ('allowedRolesAndUsers', 'KeywordIndex', None)
73               , ('review_state', 'FieldIndex', None)
74               , ('in_reply_to', 'FieldIndex', None)
75               , ('meta_type', 'FieldIndex', None)
76               , ('getId', 'FieldIndex', None)
77               , ('path', 'PathIndex', None)
78               , ('portal_type', 'FieldIndex', None)
79               )
80
81    security.declarePublic('enumerateLexicons')
82    def enumerateLexicons(self):
83        # Creating our own lexicon in a different way, as the automatic creation seems to be needing something to act as a StopWordRemover and fails miserably without it.
84        lexicon = PLexicon('plone_lexicon', '', UnicodeSplitter(), UnicodeCaseNormalizer())
85        self._setObject('plone_lexicon', lexicon)
86        return (
87                 ( 'plaintext_lexicon'
88                 , Splitter()
89                 , CaseNormalizer()
90                 , StopWordRemover()
91                 )
92               , ( 'htmltext_lexicon'
93                 , HTMLWordSplitter()
94                 , CaseNormalizer()
95                 , StopWordRemover()
96                 )
97               )
98    #XXX END
99
100    def catalog_object(self, object, uid, idxs=[],
101                       update_metadata=1, pghandler=None):
102        if object and hasattr(object, 'portal_type') and object.portal_type!='CollectionsFolder':
103            ZCatalog.catalog_object(self, object, uid, idxs, update_metadata, pghandler=pghandler)
104
105    def searchResults(self, REQUEST=None, **kw):
106        """Calls ZCatalog.searchResults """
107        return ZCatalog.searchResults(self, REQUEST, **kw)
108
109    __call__ = searchResults
110
111
112    def titleSearch(self, title='', sort_limit=0):
113        """ search titles containing given string """
114        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
115        if sort_limit:
116            query['sort_limit']=sort_limit
117        query['Title']='%s*' % title
118        results=self.searchResults(query)
119        return results
120
121    def fulltextSearch(self, SearchableText='', sort_limit=0):
122        """ search fulltext for a string """
123        query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
124        if sort_limit:
125            query['sort_limit']=sort_limit
126        query['SearchableText']='%s*' % SearchableText
127        results=self.searchResults(query)
128        return results
129
130    def buildResultsFromBatch(self, batch):
131        t=time.time()
132        d_list=[]
133        lutool=getToolByName(self, 'lemill_usertool')
134        url_base=getToolByName(self, 'portal_url')()
135
136        if hasattr(batch._sequence, '_seq'):
137            print 'using fast metadata to build results'
138            seq=list(batch._sequence._seq[batch.start:batch.end])
139            print time.time()-t
140            md=self.fastMetadata(seq, ['rid','getHasCoverImage','getNicename','portal_type','getLocation_country','getTags', 'Language','listCreators'])
141            print time.time()-t
142            for item in md:
143                if item[3] not in TYPE_NAMES:
144                    print 'rejected item:', item
145                    continue
146                d={'url':self.getpath(item[0]),
147                    'coverimage_url':item[1],
148                    'title':item[2],
149                    'readable_type':item[3],
150                    'country':item[4],
151                    'tags':item[5],
152                    'language':item[6],
153                    'authors':item[7]}
154                d_list.append(d)
155        else: # batch has already been converted to catalog Brains object
156            print 'using Brains objects to build results'
157            for item in batch:               
158                d={'url':item.getURL(),
159                    'coverimage_url':item.getHasCoverImage,
160                    'title':item.getNicename,
161                    'readable_type':item.portal_type,
162                    'country':item.getLocation_country,
163                    'tags':item.getTags,
164                    'language':item.Language,
165                    'authors':item.listCreators}                 
166                d_list.append(d)
167        tag_base='/'.join((url_base,'search?index_type=tags&q='))
168        lang_base='/'.join((url_base,'search?language='))
169        country_base='/'.join((url_base,'community/browse?country='))
170        # Manipulate raw index data to something more useful
171        for item in d_list:
172            if callable(item['title']):
173                item['title']=item['title']()
174            url=item['url']
175            if item['coverimage_url']:
176                item['coverimage_url']='/'.join((url,'coverImage'))
177            else:
178                item['coverimage_url']='/'.join((url_base, DEFAULT_ICONS[item['readable_type']]))
179            ll=item['language']
180            if ll:
181                item['language']=LANGUAGES_DICT[ll]
182                item['language_link']=''.join((lang_base,ll))
183            item['tags']=[(tag, ''.join((tag_base, tag))) for tag in item['tags']]
184            item['authors']=[self.fastLink(author) for author in item['authors']]
185            if item['country']:         
186                item['country_link']=''.join((country_base,item['country']))
187            item['readable_type']=TYPE_NAMES.get(item['readable_type'], None)[0]
188
189        print 'result batch creation took: ',time.time()-t
190        return d_list
191
192
193    def createBrowsingOptions(self, REQUEST, results=None):
194        """takes search results as input and returns a dictionary that tells what values there are available for each selection box and how many results there are of each value."""
195        t=time.time()
196        src=dict(REQUEST.form.items())
197        print src
198        # section determines what kinds of options there are:
199        path=REQUEST['ACTUAL_URL'].split('/')
200        force_language=True
201
202        if 'content' in path:
203            do_languages=True
204            do_subject_areas=True
205            do_target_groups=True
206            do_types=True
207            type_selection=CONTENT_TYPES
208            do_edited=True
209            do_tags=True
210            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
211        elif 'methods' in path or 'tools' in path:
212            do_languages=True
213            do_subject_areas=False
214            do_target_groups=False
215            do_types=False
216            do_edited=True
217            do_tags=True
218            count_indexes=('Language',)
219        elif 'community' in path:
220            do_languages=True
221            do_subject_areas=True
222            do_target_groups=False
223            do_types=True
224            type_selection=COMMUNITY_TYPES
225            do_edited=True
226            do_tags=True
227            count_indexes=('Language','portal_type','getSubject_area')
228        else:
229            do_languages=True
230            force_language=False
231            do_subject_areas=True
232            do_target_groups=True
233            do_types=True
234            type_selection=SEARCHABLE_TYPES
235            do_edited=True
236            do_tags=False
237            count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
238       
239        d={'do_languages':do_languages, 'do_subject_areas':do_subject_areas,'do_target_groups':do_target_groups,'do_types':do_types,'do_edited':do_edited, 'do_tags':do_tags, 'q':'', 'state':'', 'author':''}
240
241
242        # now see if the results should be counted and provide a dictionary of counted values if necessary
243        if results and len(results)<1000:
244            count_results= dict(zip(count_indexes, self.fastCount(results, count_indexes)))
245            count=len(results)
246        else:
247            count=0
248
249        # Languages: two lists, one for probable options and other for improbable
250        if do_languages:
251            all_languages=list(LANGUAGES)[1:] # Remove 'language neutral' from options
252            all_languages[0]=('','any language')
253            all_language_codes=[l[0] for l in all_languages]
254            lang_dict = getToolByName(self, 'lemill_tool').language_dict
255            common_language_codes=getToolByName(self,'lemill_usertool').getLanguages()
256            selected_language= src.get('language','')
257            was_empty=not selected_language
258            if was_empty:
259                if force_language:
260                    selected_language=common_language_codes[0]
261                else:
262                    selected_language='all'
263            common_languages=[]
264            rare_languages=[('all','All',int(selected_language=='all'),0)]
265            if count and was_empty and False:
266                langs=count_results['Language']
267                for langcode in common_language_codes:
268                    if langcode and langcode in langs:
269                        common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
270                        all_language_codes.remove(langcode)           
271                for langcode in all_language_codes:
272                    if langcode and langcode in langs:
273                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
274            else:
275                for langcode in common_language_codes:
276                    common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
277                    all_language_codes.remove(langcode)           
278                for langcode in all_language_codes:
279                    if langcode:
280                        rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
281            d['common_languages']=common_languages
282            d['rare_languages']=rare_languages
283            d['lang_disabled']=False # portal_type==Piece can set this to true
284            if selected_language!='all':
285                d['lang_filter']='language=%s&' % selected_language
286            else:
287                d['lang_filter']=''
288        if do_subject_areas:
289            selected=src.get('subject_area','')
290            if selected:
291                all_count='?'
292            else:
293                all_count=count
294            subject_areas=[('','All',int(not selected), all_count)]
295            if count:
296                counts=count_results['getSubject_area']
297                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
298                    if sa_full in counts:
299                        subject_areas.append((sa_key, sa_full, int(selected==sa_key), counts[sa_full]))
300            else:
301                for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
302                    subject_areas.append((sa_key, sa_full, int(selected==sa_key), 0))
303
304            d['subject_area']=subject_areas
305            d['subjs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
306        if do_target_groups:
307            selected=src.get('target_group','')
308            if selected:
309                all_count='?'
310            else:
311                all_count=count
312            target_groups=[('','All',int(not selected),all_count)]
313            if count:
314                counts=count_results['getTarget_group']
315                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
316                    if tg_full in counts:
317                        target_groups.append((tg_key, tg_full, int(selected==tg_key), counts[tg_full]))
318            else:
319                for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
320                    target_groups.append((tg_key, tg_full, int(selected==tg_key), 0))
321            d['target_group']=target_groups
322            d['targs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
323        if do_types:
324            selected=src.get('type','')
325            if selected:
326                all_count='?'
327            else:
328                all_count=count
329            types=[('','All',int(not selected),all_count)]
330            if count:
331                counts=count_results['portal_type']
332                for type_key in type_selection:
333                    if type_key in counts:
334                        types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), counts[type_key]))
335            else:
336                for type_key in type_selection:
337                    types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), 0))
338            d['type']=types
339            if selected:
340                if selected in ['Piece','Activity','Tool']:
341                    d['targs_disabled']=True
342                    d['subjs_disabled']=True
343                if selected=='Piece':
344                    d['lang_disabled']=True
345
346        if do_edited:
347            try:
348                selected=int(src.get('edited',0))
349            except ValueError:
350                selected=0
351            d['edited']=[('','Any time',int(not selected),0), (365,'Last year',int(selected==365),0), (30,'Last month',int(selected==30),0), (7,'Last week',int(selected==7),0), (1,'Yesterday',int(selected==1),0)]
352        # Previous search term
353        if 'q' in src:
354            d['q']=src['q']
355        if 'state' in src:
356            d['state']=src['state']
357        if 'author' in src:
358            d['author']=src['author']           
359               
360        print 'creating browsing options took', time.time()-t
361        return d
362
363
364    def decideBrowsingSubType(self):
365        """ Browse page header needs to know what kind of browsing is going on """
366        form=self.REQUEST.form       
367        if 'state' in form:
368            if form['state']=='public': return 'published'
369            if form['state']=='draft': return 'drafts'
370        path=self.REQUEST['ACTUAL_URL'].split('/')
371        for key in ['content','methods','tools','community']:
372            if key in path: return key
373        return ''           
374
375    def browsingSearch(self, REQUEST=None, **kw):       
376        """ this search tries to use arguments from browsing form """
377        print 'browsing search called'
378        t=time.time()
379        keywords={'sort_on':'getScore','sort_order':'reverse','getState':('draft', 'public')}
380        keywords.update(REQUEST.form.items())
381        keywords.update(kw)
382        if 'language' in keywords:
383            if keywords['language']=='all':
384                keywords['Language']=''
385            else:           
386                keywords['Language']=keywords['language']
387            del keywords['language']
388        if 'subject_area' in keywords:
389            if keywords['subject_area'] in SUBJECT_AREAS_DICT:
390                keywords['getSubject_area']=SUBJECT_AREAS_DICT[keywords['subject_area']]
391            del keywords['subject_area']
392        if 'target_group' in keywords:
393            if keywords['target_group'] in TARGET_GROUPS_DICT:
394                keywords['getTarget_group']=TARGET_GROUPS_DICT[keywords['target_group']]
395            del keywords['target_group']
396        if 'tags' in keywords:
397            keywords['getTags']=keywords['tags']
398            del keywords['tags']
399        if 'author' in keywords:
400            keywords['listCreators']=keywords['author']
401            del keywords['author']
402        if 'state' in keywords:
403            keywords['getState']=keywords['state']
404            del keywords['state']
405        if 'type' in keywords:
406            keywords['portal_type']=keywords['type']
407            del keywords['type']
408        else: # portal_type is determined by location
409            path=REQUEST['ACTUAL_URL'].split('/')
410            if 'content' in path:
411                keywords['portal_type']=CONTENT_TYPES
412            elif 'methods' in path:
413                keywords['portal_type']='Activity'
414            elif 'tools' in path:
415                keywords['portal_type']='Tool'
416            elif 'community' in path:
417                keywords['portal_type']=COMMUNITY_TYPES
418            else:
419                keywords['portal_type']=SEARCHABLE_TYPES
420        if 'q' in keywords:
421            keywords['SearchableText']=keywords['q']
422            del keywords['q']
423        if 'edited' in keywords:
424            keywords['getLatestEdit']={'query': self.ZopeTime()-int(keywords['edited']), 'range':'min'}
425        print 'keywords:', keywords
426        if not 'Language' in keywords:
427            found=False
428            if 'base' in keywords:
429                if keywords['base']=='language':
430                    found=True
431            for compensating in ['SearchableText','listCreators','getTarget_group','getTags','getSubject_area']:
432                if compensating in keywords:
433                    found=True
434                    break
435            if not found:
436                keywords['Language']=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
437        if 'Language' in keywords:
438            if not keywords['Language']:
439                del keywords['Language']
440        try:
441            results = self.searchResults(keywords)               
442        except ParseError:
443            results = []
444        print 'browsing search:', time.time()-t
445        return results
446
447    #### Fast catalog handling ###########################################
448
449    def wakeLazy(self, lazy):
450        new=[]
451        for l in lazy._seq:
452            if isinstance(l, Lazy):
453                new.extend(self.wakeLazy(l))
454            else:
455                new.append(l)
456        return new
457
458    def fastPick(self, lazy_results, top):
459        """ Takes Lazy results and picks one random metadata obj from top """
460        tries=4
461        choice=randint(0,top)
462        while tries: # since we don't know how long the list is we need to use trial and error
463            # to find if the random index has corresponding item
464            i=0
465            for item in lazy_results._seq:
466                if choice==i:
467                    return lazy_results._func(item)
468                i+=1
469            choice/=2
470            tries-=1
471        return lazy_results._func(item)
472
473    def fastLink(self, user):
474        """ Finds an user from catalog and returns a tuple containing nicename and link
475        because this gets asked so often and it is relatively expensive to build, we cache them in non-permanent dict here in catalog tool
476        """
477        if not hasattr(self, 'author_cache'):
478            self.author_cache={}
479        if not user in self.author_cache:
480            res=self.searchResults(Creator=user, portal_type='MemberFolder')
481            for l in res._seq:
482                authortuple= (self.getEntry('getNicename', l), self.getpath(l))
483                self.author_cache[user]=authortuple
484                return authortuple
485        else:
486            return self.author_cache[user]
487        return None       
488
489    def getEntry(self, index, key, empty=[]):
490        val=self._catalog.getIndex(index).getEntryForObject(key, empty)
491        if callable(val):
492            val=val()
493        return val       
494           
495    def fastLinks(self, results, limit):
496        """ Takes Lazy results and returns a list of tuples (title, url, portal_type) """
497        def safeData(index, key):
498            v=index.getEntryForObject(key, [])
499            if callable(v):
500                return v()
501            return v
502
503        def wakeUp(lazy, c):
504            new=[]
505            for l in lazy._seq:
506                if isinstance(l, Lazy):
507                    c,newer=wakeUp(l,c)
508                    new.extend(newer)
509                else:
510                    new.append((safeData(title_index, l), self.getpath(l), safeData(type_index, l)))
511                    c+=1
512                if c==limit:
513                    break
514            return c,new
515        title_index=self._catalog.getIndex('getNicename')
516        type_index=self._catalog.getIndex('portal_type')
517        c,new=wakeUp(results,0)       
518        return new
519
520    def fastMetadata(self, results, indexes, cut=0):
521        """ Takes Lazy results and returns a list of tuple for values from indexes.
522        Indexes is a string or a tuple of strings.
523        This is an order of magnitude faster than getting CatalogBrains for each object"""
524        c=0
525        def safeData(index, key):
526            v=index.getEntryForObject(key, [])
527            if callable(v):
528                return v()
529            return v
530
531        def wakeUp(lazy,c):
532            new=[]
533            if isinstance(lazy, list):
534                seq=lazy
535            else:
536                seq=lazy._seq
537            for l in seq:
538                if isinstance(l, Lazy):
539                    new.extend(wakeUp(l,c))
540                else:
541                    values=[]   
542                    for index in index_sources:
543                        if index:
544                            values.append(safeData(index, l))
545                        else:
546                            values.append(l)
547                    new.append(tuple(values))
548                    c+=1
549                    if c==cut:
550                        break
551            return new
552        def wakeUpFaster(lazy, c):
553            new=[]
554            if isinstance(lazy, list):
555                seq=lazy
556            else:
557                seq=lazy._seq
558            for l in seq:
559                if isinstance(l, Lazy):
560                    new.extend(wakeUpFaster(l,c))
561                else:
562                    new.append(index.getEntryForObject(l, []))
563                c+=1
564                if c==cut:
565                    break
566            return new
567        if isinstance(indexes, tuple) or isinstance(indexes, list):
568            index_sources=[ind!='rid' and self._catalog.getIndex(ind) for ind in indexes]
569            new=wakeUp(results,c)
570        else:
571            index=self._catalog.getIndex(indexes)
572            new=wakeUpFaster(results,c)
573        return new
574       
575    def fastCount(self, results, indexes):
576        """ Takes Lazy results and returns a dictionary or tuple of dictionaries for values and their counts from indexes.
577        Index is a string or tuple of strings.
578        This is an order of magnitude faster than getting CatalogBrains for each object"""
579
580        def safeData(index, key):
581            v=index.getEntryForObject(key, [])
582            if callable(v):
583                return v()
584            return v
585       
586        def wakeUpAndCount(lazy):
587            for l in lazy._seq:
588                if isinstance(l, Lazy):
589                    wakeUpAndCount(l)
590                else:
591                    values=[]   
592                    for dic, index in indexes:
593                        val=safeData(index, l)
594                        if val:
595                            if isinstance(val, list):
596                                for v in val:                       
597                                    dic[v[:50]]=dic.get(v[:50],0)+1
598                            elif val:
599                                val=str(val)[:50]
600                                dic[val]=dic.get(val,0)+1
601        def wakeUpFasterAndCount(lazy):
602            for l in lazy._seq:
603                if isinstance(l, Lazy):
604                    wakeUpFasterAndCount(l)
605                else:
606                    val=safeData(index, l)
607                    if isinstance(val, list):
608                        for v in val:                       
609                            dic[v[:50]]=dic.get(v[:50],0)+1
610                    elif val:
611                        val=str(val)[:50]
612                        dic[val]=dic.get(val,0)+1
613        index=None
614        if isinstance(indexes, tuple):
615            if not results:
616                return [{} for ind in indexes]
617            indexes=[({}, self._catalog.getIndex(ind)) for ind in indexes]
618            new=wakeUpAndCount(results)
619            return tuple([dic for dic,index in indexes])
620        else:
621            if not results:
622                return {}
623            index=self._catalog.getIndex(indexes)
624            dic={}
625            new=wakeUpFasterAndCount(results)
626            return dic
627
628    def getSomeMetadataForRID(self, rid, md_fields):
629        record = self._catalog.data[rid]
630        schema = self._catalog.schema
631        if isinstance(md_fields, (tuple, list)):
632            result = {}
633            for md in md_fields:
634                result[md]= record[schema[md]]
635            return result
636        else:
637            return record[schema[md_fields]]
638
639
640    ##############################      Clouds      ######################## 
641
642    def buildCloudData(self, results, request=None):
643        """ Build tag cloud result tuples (name, tagsize, obj_url, tag_value, nicename) for given form from result set """
644        def adjustTag(val, steps=8):
645            # helper method to adjust hit count of this tag to relative size (1,...,8)
646            try:
647                val=int((8*log(val-mincount,2))/log(maxcount-mincount,2))
648            except (OverflowError, ZeroDivisionError):
649                val=0
650            if not val:
651                val=1
652            return val
653        lemill_tool = getToolByName(self, 'lemill_tool')
654        if not results:
655            return []       
656        src=dict(request.form.items())
657        if 'base' not in src:
658            return []
659        portal_url=getToolByName(self, 'portal_url')()
660        path=request['ACTUAL_URL'].split('/')
661        if 'content' in path:
662            link_base='/'.join((portal_url,'content','browse'))
663        elif 'methods' in path:
664            link_base='/'.join((portal_url,'methods','browse'))
665        elif 'tools' in path:
666            link_base='/'.join((portal_url,'tools','browse'))
667        elif 'community' in path:
668            link_base='/'.join((portal_url,'community','browse'))
669        else:
670            link_base='/'.join((portal_url,'browse'))       
671        lang_part=''
672        base=src['base']
673        language=src.get('language','')
674        if language:
675            lang_part='language=%s&' % language
676        title_cloud=False
677        if base=='language':
678            link_root='%s?language=' % link_base       
679            cloud_index='Language'
680        elif base=='tags':
681            link_root='%s?%stags=' % (link_base, lang_part)       
682            cloud_index='getTags'
683        elif base=='subject_area':
684            link_root='%s?%ssubject_area=' % (link_base, lang_part)       
685            cloud_index='getSubject_area'
686        elif base=='target_group':
687            link_root='%s?%starget_group=' % (link_base, lang_part)       
688            cloud_index='getTarget_group'
689        elif base=='title':
690            resultlist=self.fastMetadata(results, ('sortable_title','getScore','getNicename', 'path'), cut=100)
691            maxcount=resultlist[0][1]
692            mincount=resultlist[-1][1]             
693            resultlist.sort()
694            resultlist=[(x[2], adjustTag(x[1], steps=6), x[3]) for x in resultlist if x[2]]
695            return resultlist
696           
697        hits=self.fastCount(results, cloud_index)
698        resultlist=zip(hits.values(),hits.keys())
699        resultlist.sort(reverse=True)
700        maxcount=resultlist[0][0] # first!
701        resultlist = resultlist[:100]
702        mincount=resultlist[-1][0] # last!
703        # At this point resultlist= [(number_of_hits, tag_text),...]
704        # It should end up as: [(displayed_text, number_of_hits, link_url)...]
705        if cloud_index=='Language' or cloud_index=='getLanguage_skills':
706            resultlist=[(lemill_tool.getPrettyLanguage(x[1]), adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
707        elif cloud_index=='getSubject_area':
708            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
709        elif cloud_index=='getTarget_group':
710            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
711        else:
712            resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
713        resultlist.sort()
714        return resultlist
715
716    ########### Front page top lists ############
717   
718    def getTopFive(self, results=None, key_index=None, link_body='', clean=False):
719        """ Returns top five results for key_index (getTags, getSubject_area...) for certain language """
720        if clean or not hasattr(self, 'top5results'):
721            self.top5results={'content':{}, 'methods':{}, 'tools':{}, 'community':{}}
722        data=self.top5results
723        path=self.REQUEST['ACTUAL_URL'].split('/')
724        for section in ['content','methods','tools','community',None]:
725            if section in path:
726                break
727        if not section:
728            return []
729        ilanguage=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
730        if key_index in data[section]:
731            top5lists=data[section][key_index]
732        else:
733            top5lists={}
734            data[section][key_index]=top5lists
735        if ilanguage in top5lists:
736            return top5lists[ilanguage]
737        else:
738            tops=self.fastCount(results, key_index)
739            resultlist=zip(tops.values(),tops.keys())
740            resultlist.sort(reverse=True)
741            resultlist=resultlist[:5]
742            if key_index=='getSubject_area':
743                resultlist=[(x[1], ''.join((link_body, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
744            elif key_index=='getTarget_group':
745                resultlist=[(x[1], ''.join((link_body, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
746            else:
747                resultlist=[(x[1], ''.join((link_body, x[1]))) for x in resultlist]
748            top5lists[ilanguage]=resultlist
749            print top5lists
750            return resultlist
751
752    ############################# batch ##########################
753
754    def batch(self, results=None, request=None):
755        """ Use Plone's batch """
756        if request and 'b_start' in request.form:
757            try:
758                b_start=int(request.form['b_start'])
759            except ValueError:
760                b_start=0
761        else:
762            b_start=0 
763        b= Batch(results, 30, b_start, orphan=1)   
764        return b 
765
766    def batchBaseUrl(self):
767        """ Keep all other parametres as they are, but add or change 'b_start' """
768        form=self.REQUEST.form
769        if 'b_start' in form:
770            del form['b_start']
771        last_url='?'.join((self.REQUEST.ACTUAL_URL, urlencode(form)))
772        new= last_url+'&b_start=%s'
773        return new
774       
775
776#
777#    def getTagCloud(self, search_results, index_type):
778#        """ Build a cloud based on how many occurences of this item are in results """
779#        if not search_results:
780#            return []
781#        lemill_tool = getToolByName(self, 'lemill_tool')
782#        pc = getToolByName(self, 'portal_catalog')
783#        from math import log
784#        maxcount=0
785#
786#        hits={}
787#        hits=pc.fastCount(search_results, index_type)
788#        resultlist=zip(hits.values(),hits.keys())
789#        if not resultlist:
790#            return []
791#        resultlist.sort()
792#        resultlist.reverse()
793#        maxcount=resultlist[0][0] # first!
794#        # if the first cut score for tag is x, we want to cut off all of the tags with score x.
795#        if len(resultlist)>100:
796#            #cutpoint = [x[0] for x in resultlist].index(resultlist[100]) can't figure this now, fix later
797#            cutpoint = 100
798#            resultlist = resultlist[:cutpoint]
799#        mincount=resultlist[-1][0]
800#        resultlist=[(x[1], x[0], '',x[1],x[1]) for x in resultlist]
801#
802#        # adjust to 1-8. We don't have to worry about score 0, they're already removed.
803#        if maxcount>1:
804#            resultlist=map(adjust, resultlist)
805#        # prettify language names
806#        if index_type=='Language' or index_type=='getLanguage_skills':
807#            resultlist=[(x[0],x[1],x[2],x[3],lemill_tool.getPrettyLanguage(x[4])) for x in resultlist]           
808#        if index_type=='getTarget_group':
809#            def compfunc(t2,t1):
810#                if t2[0] in TARGET_GROUP and t1[0] in TARGET_GROUP:
811#                    return  TARGET_GROUP.index(t2[0]) - TARGET_GROUP.index(t1[0])
812#                else:
813#                    return -1
814#            resultlist.sort(cmp=compfunc)
815#        else:   
816#            resultlist.sort()
817#        return resultlist
818#       
819#
820#    def getTitleCloud(self, search_results, browse_type):
821#        """ Build a cloud based on popularity score for that resource """
822#        pc=getToolByName(self,'portal_catalog')
823#        # uniquetuplelist contains result metadata reordered: (sort_title, count, url, indexvalue, title)
824#        if not search_results:
825#            return []
826#
827#        def isDefaultTitle(x):
828#            """ some heuristic to recognize default titles """
829#            return re.match(r'.*\.(...)$', x) or re.match(r'.*\.(....)$', x)
830#                   
831#        popularity = pc.fastMetadata(search_results, ('getScore','rid','getNicename','sortable_title'))
832#        popularity.sort(reverse=True)
833#        popularity=popularity[:100]
834#        titlecloud=[(sortable_title, getScore, self.REQUEST.physicalPathToURL(pc.getpath(rid)), sortable_title, getNicename or sortable_title) for (getScore, rid, getNicename, sortable_title) in popularity if sortable_title]
835#       
836#        if not titlecloud:
837#            return []
838#        titlecloud.sort()
839#        maxscore=max([x[1] for x in titlecloud])
840#        if maxscore>1:
841#            titlecloud=map(adjust, titlecloud)
842#        return titlecloud
843
844
845
846
847
848CatalogTool.__doc__ = PloneCatalogTool.__doc__
849
850InitializeClass(CatalogTool)
Note: See TracBrowser for help on using the repository browser.