root/trunk/LeMillCatalogTool.py

Revision 3190, 42.2 kB (checked in by jukka, 3 weeks ago)

Better fix for #2045

Line 
1 #
2 # Plone CatalogTool simplified -- does not use ExtensibleIndexableObjectWrapper, workflows or permissions. Basically just ZCatalog posing as Plone Tool, allowing but ignoring plonish commands.
3 #
4
5 from Products.CMFPlone import ToolNames
6 from AccessControl import ClassSecurityInfo
7 from Globals import InitializeClass
8 from Globals import DTMLFile
9 from Acquisition import aq_inner, aq_parent
10
11
12 from Products.ZCatalog.ZCatalog import ZCatalog
13 from Products.ZCTextIndex.ParseTree import ParseError
14 from Products.CMFPlone.CatalogTool import CatalogTool as PloneCatalogTool
15 from Products.CMFPlone.PloneBatch import Batch
16 from Products.PythonScripts.standard import urlencode
17
18 from zope.interface import implements
19
20 from Products.CMFCore.utils import SimpleRecord, getToolByName
21 from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
22 from Products.ZCTextIndex.Lexicon import CaseNormalizer
23 from Products.ZCTextIndex.Lexicon import Splitter
24 from Products.ZCTextIndex.Lexicon import StopWordRemover
25
26 from Products.CMFPlone.UnicodeSplitter import Splitter as UnicodeSplitter
27 from Products.CMFPlone.UnicodeSplitter import CaseNormalizer as UnicodeCaseNormalizer
28 from Products.ZCTextIndex.ZCTextIndex import PLexicon
29 from Products.ZCatalog.Lazy import Lazy, LazyMap
30 from random import randint
31 from config import CONTENT_TYPES, MATERIAL_TYPES, ACTIVITY_TYPES, TOOLS_TYPES, LANGUAGES, LANGUAGES_DICT, TYPE_NAMES, SUBJECT_AREAS_DICT, TARGET_GROUPS_DICT, SUBJECT_AREAS_INVERSE_DICT, TARGET_GROUPS_INVERSE_DICT, COMMUNITY_TYPES, SEARCHABLE_TYPES, DEFAULT_ICONS, TARGET_GROUP
32 import time
33 from math import log
34 from messagefactory_ import i18nme as _
35
36 class CatalogTool(PloneCatalogTool):
37
38     meta_type = 'LeMill Catalog Tool'
39     security = ClassSecurityInfo()
40     toolicon = 'skins/lemill/tool.gif'
41
42     __implements__ = PloneCatalogTool.__implements__
43
44     # Originally CatalogTool had some simplified some expensive stuff from Plone's Catalog tool and added some
45     # methods allowing us more control over indexing / unindexing
46     
47     # For LeMill 3.0,
48
49     security.declarePublic( 'enumerateIndexes' ) # Subclass can call
50     def enumerateIndexes( self ):
51         #   Return a list of ( index_name, type, extra ) tuples for the initial
52         #   index set.
53         #   Creator is deprecated and may go away, use listCreators!
54         #   meta_type is deprecated and may go away, use portal_type!
55         plaintext_extra = SimpleRecord( lexicon_id='plaintext_lexicon'
56                                       , index_type='Okapi BM25 Rank'
57                                       )
58         htmltext_extra = SimpleRecord( lexicon_id='htmltext_lexicon'
59                                      , index_type='Okapi BM25 Rank'
60                                      )
61         # Adding plone_lexicon as it should be, no nee dto change the name for our own >>> safer that way
62         plone_extra = SimpleRecord( lexicon_id='plone_lexicon'
63                                   , index_type='Okapi BM25 Rank'
64                                   )
65
66         return ( ('Title', 'ZCTextIndex', plone_extra)
67                , ('Subject', 'KeywordIndex', None)
68                , ('Description', 'ZCTextIndex', plone_extra)
69                , ('Creator', 'FieldIndex', None)
70                , ('listCreators', 'KeywordIndex', None)
71                , ('SearchableText', 'ZCTextIndex', plone_extra)
72                , ('Date', 'DateIndex', None)
73                , ('Type', 'FieldIndex', None)
74                , ('created', 'DateIndex', None)
75                , ('effective', 'DateIndex', None)
76                , ('expires', 'DateIndex', None)
77                , ('modified', 'DateIndex', None)
78                , ('allowedRolesAndUsers', 'KeywordIndex', None)
79                , ('review_state', 'FieldIndex', None)
80                , ('in_reply_to', 'FieldIndex', None)
81                , ('meta_type', 'FieldIndex', None)
82                , ('getId', 'FieldIndex', None)
83                , ('path', 'PathIndex', None)
84                , ('portal_type', 'FieldIndex', None)
85                )
86
87     security.declarePublic('enumerateLexicons')
88     def enumerateLexicons(self):
89         # Creating our own lexicon in a different way, as the automatic creation seems to be needing something to act as a StopWordRemover and fails miserably without it.
90         lexicon = PLexicon('plone_lexicon', '', UnicodeSplitter(), UnicodeCaseNormalizer())
91         self._setObject('plone_lexicon', lexicon)
92         return (
93                  ( 'plaintext_lexicon'
94                  , Splitter()
95                  , CaseNormalizer()
96                  , StopWordRemover()
97                  )
98                , ( 'htmltext_lexicon'
99                  , HTMLWordSplitter()
100                  , CaseNormalizer()
101                  , StopWordRemover()
102                  )
103                )
104     #XXX END
105
106     def catalog_object(self, object, uid, idxs=[],
107                        update_metadata=1, pghandler=None):
108         if object and hasattr(object, 'portal_type') and object.portal_type!='CollectionsFolder':
109             ZCatalog.catalog_object(self, object, uid, idxs, update_metadata, pghandler=pghandler)
110
111     def searchResults(self, REQUEST=None, **kw):
112         """Calls ZCatalog.searchResults """
113         return ZCatalog.searchResults(self, REQUEST, **kw)
114
115     __call__ = searchResults
116
117
118     def titleSearch(self, title='', sort_limit=0):
119         """ search titles containing given string """
120         query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
121         if sort_limit:
122             query['sort_limit']=sort_limit
123         query['Title']='%s*' % title
124         results=self.searchResults(query)
125         return results
126
127     def fulltextSearch(self, SearchableText='', sort_limit=0):
128         """ search fulltext for a string """
129         query={'getState':('draft', 'public'), 'sort_on':'getScore', 'sort_order':'reverse'}
130         if sort_limit:
131             query['sort_limit']=sort_limit
132         query['SearchableText']='%s*' % SearchableText
133         results=self.searchResults(query)
134         return results
135
136     ###### 'Adapters' start here
137     # these are methods that are very specifically used by page templates to efficiently return just the results needed there 
138
139     def buildResultsFromBatch(self, batch):
140         """ This method takes a Batch of results (usually 30 or less, LazyMap) and returns a list of
141             *minimal metadata* about them. This metadata is in form of dictionary.
142             
143             The idea is that instantiating real metadata objects from batch results is still a costly process
144             and with this we can show search/browse results without instantiating metadata.
145             
146             This method will also do some preprocessing for metadata, f.ex finds proper names for authors and builds links to them.
147             
148             This method is crafted for resource_list_macros.pt and if you aren't going to display results as they are displayed there, you probably
149             shouldn't use this.
150             """
151         d_list=[]
152         lutool=getToolByName(self, 'lemill_usertool')
153         ltool=getToolByName(self, 'lemill_tool')
154         url_base=getToolByName(self, 'portal_url')()
155         created=False
156         edited=False
157         if hasattr(self, 'REQUEST') and hasattr(self.REQUEST, 'form'):
158             form=self.REQUEST.form
159             if form:
160                 created= 'created' in form
161                 edited='edited' in form
162         if hasattr(batch._sequence, '_seq'):
163             seq=list(batch._sequence._seq[batch.start-1:batch.end])
164             keys=['rid','getHasCoverImage','getNicename','portal_type','getLocation_country','getTags', 'Language','listCreators']
165             if created:
166                 keys.append('created')
167             elif edited:
168                 keys.append('getLatestEdit')
169             md=self.fastMetadata(seq, keys)
170             for item in md:
171                 if item[3] not in TYPE_NAMES:
172                     continue
173                 d={'url':self.getUrlPath(item[0]),
174                     'coverimage_url':item[1],
175                     'title':item[2],
176                     'readable_type':item[3],
177                     'country':item[4],
178                     'tags':item[5],
179                     'language':item[6],
180                     'authors':item[7]}
181                 if created:
182                     d['created']=item[8] # These are in weird DateIndex format, difficult to map to actual dates
183                 elif edited:
184                     d['edited']=item[8]
185                 d_list.append(d)
186             if created or edited:
187                 now=time.gmtime() # mirrors DateIndex's conversion script to provide a compatible 'now'
188                 now= ( ( ( ( now[0] * 12 + now[1] ) * 31 + now[2] ) * 24 + now[3] ) * 60 + now[4] )
189                 useDateIndex=True                         
190         else: # batch has already been converted to catalog Brains object
191             for item in batch:               
192                 d={'url':item.getURL(),
193                     'coverimage_url':item.getHasCoverImage,
194                     'title':item.getNicename,
195                     'readable_type':item.portal_type,
196                     'country':item.getLocation_country,
197                     'tags':item.getTags,
198                     'language':item.Language,
199                     'authors':item.listCreators}                 
200                 if created:
201                     d['created']=item.created
202                 elif edited:
203                     d['edited']=item.getLatestEdit
204                 d_list.append(d)
205             now=time.time()
206             useDateIndex=False
207         tag_base='/'.join((url_base,'search?index_type=tags&q='))
208         lang_base='/'.join((url_base,'search?language='))
209         country_base='/'.join((url_base,'community/browse?country='))
210        
211         # Manipulate raw index data to something more useful
212         for item in d_list:
213             if callable(item['title']):
214                 item['title']=item['title']()
215             url=item['url']
216             if item['coverimage_url']:
217                 item['coverimage_url']='/'.join((url,'coverImage'))
218             else:
219                 item['coverimage_url']='/'.join((url_base, DEFAULT_ICONS[item['readable_type']]))
220             ll=item['language']
221             if ll:
222                 item['language']=LANGUAGES_DICT[ll]
223                 item['language_link']=''.join((lang_base,ll))
224             item['tags']=[(tag, ''.join((tag_base, tag))) for tag in item['tags']]
225             item['authors']=[self.fastLink(author) for author in item['authors']]
226             if item['country']:         
227                 item['country_link']=''.join((country_base,item['country']))
228             item['readable_type']=TYPE_NAMES.get(item['readable_type'], None)[0]
229             if 'created' in item:
230                 item['timedif']=ltool.getTimeDifference(item['created'], now=now, useDateIndex=useDateIndex)
231             elif 'edited' in item:
232                 item['timedif']=ltool.getTimeDifference(item['edited'], now=now, useDateIndex=useDateIndex)
233         return d_list
234
235     def createBrowsingOptions(self, REQUEST, results=None, **kw):
236         """ This is used by browse_macros.pt and other places that need to display filters
237             based on the search results available and the section where the results are displayed.
238         
239             This method analyzes both request and results to decide what filter fields it should display and if it should
240             preselect something or count occurences for each value. The logic for this may look hairy,
241              but it can be understood by approaching this case-by-case:
242             'If we are browsing in Content section, and there are >1000 results, what filters we should show'           
243         """
244         force_language=True
245         src=dict(REQUEST.form.items())
246         # section determines what kinds of options there are:
247         path=REQUEST['ACTUAL_URL'].split('/')
248         if 'portfolio' in kw:
249             do_languages=False
250             do_subject_areas=True
251             do_target_groups=False
252             do_types=True
253             type_selection=SEARCHABLE_TYPES
254             do_edited=True
255             do_tags=False
256             count_indexes=('portal_type','getSubject_area')
257         elif 'content' in path:
258             do_languages=True
259             do_subject_areas=True
260             do_target_groups=True
261             do_types=True
262             if 'drafts' in path or 'published' in path:
263                 type_selection=CONTENT_TYPES
264             else:
265                 type_selection=MATERIAL_TYPES+('LeMillPrintResource',)
266             do_edited=True
267             do_tags=True
268             count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
269         elif 'methods' in path or 'tools' in path:
270             do_languages=True
271             do_subject_areas=False
272             do_target_groups=False
273             do_types=False
274             do_edited=True
275             do_tags=True
276             count_indexes=('Language',)
277         elif 'community' in path:
278             do_languages=True
279             do_subject_areas=True
280             do_target_groups=False
281             do_types=True
282             type_selection=COMMUNITY_TYPES
283             do_edited=True
284             do_tags=True
285             count_indexes=('getLanguage_skills','portal_type','getSubject_area')
286         else:
287             do_languages=True
288             force_language=False
289             do_subject_areas=True
290             do_target_groups=True
291             do_types=True
292             type_selection=SEARCHABLE_TYPES
293             do_edited=True
294             do_tags=False
295             count_indexes=('Language','getTarget_group','portal_type','getSubject_area')
296        
297         d={'do_languages':do_languages, 'do_subject_areas':do_subject_areas,'do_target_groups':do_target_groups,'do_types':do_types,'do_edited':do_edited, 'do_tags':do_tags, 'q':'', 'state':'', 'author':'', 'created':''}
298
299
300         # now see if the results should be counted and provide a dictionary of counted values if necessary
301         if results: #and len(results)<1000:
302             count_results= dict(zip(count_indexes, self.fastCount(results, count_indexes)))
303             count=len(results)
304         else:
305             count=0
306
307         # Languages: two lists, one for probable options and other for improbable
308         if do_languages:
309             lang_dict = getToolByName(self, 'lemill_tool').language_dict
310             common_language_codes=getToolByName(self,'lemill_usertool').getLanguages()
311             selected_language= src.get('language','')
312             was_empty=not selected_language
313             if was_empty:               
314                 if force_language:
315                     selected_language=common_language_codes[0]
316                 else:
317                     selected_language='all'
318             common_languages=[]
319             rare_languages=[('all','All',int(selected_language=='all'),0)]
320             if count:
321                 if 'Language' in count_results:
322                     langs=count_results['Language']
323                 else:
324                     langs=count_results['getLanguage_skills']
325                 for langcode in common_language_codes:
326                     common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs.get(langcode, 0)))
327                 for langcode in langs:
328                     rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), langs[langcode]))
329             else:
330                 all_languages=list(LANGUAGES)[1:] # Remove 'language neutral' from options
331                 all_languages[0]=('','any language')
332                 all_language_codes=[l[0] for l in all_languages]
333                 for langcode in common_language_codes:
334                     common_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
335                     all_language_codes.remove(langcode)           
336                 for langcode in all_language_codes:
337                     if langcode:
338                         rare_languages.append((langcode, lang_dict[langcode], int(langcode==selected_language), 0))
339             d['common_languages']=common_languages
340             d['rare_languages']=rare_languages
341             d['lang_disabled']=False # portal_type==Piece can set this to true
342             if selected_language!='all':
343                 d['lang_filter']='language=%s&' % selected_language
344             else:
345                 d['lang_filter']=''
346         if do_subject_areas:
347             selected=src.get('subject_area','')
348             if selected:
349                 all_count='?'
350             else:
351                 all_count=count
352             subject_areas=[('','All',int(not selected), all_count)]
353             if count:
354                 counts=count_results['getSubject_area']
355                 for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
356                     if sa_full in counts:
357                         subject_areas.append((sa_key, sa_full, int(selected==sa_key), counts[sa_full]))
358             else:
359                 for sa_key,sa_full in sorted(SUBJECT_AREAS_DICT.items()):
360                     subject_areas.append((sa_key, sa_full, int(selected==sa_key), 0))
361
362             d['subject_area']=subject_areas
363             d['subjs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
364         if do_target_groups:
365             selected=src.get('target_group','')
366             if selected:
367                 all_count='?'
368             else:
369                 all_count=count
370             target_groups=[('','All',int(not selected),all_count)]
371             if count:
372                 counts=count_results['getTarget_group']
373                 for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
374                     if tg_full in counts:
375                         target_groups.append((tg_key, tg_full, int(selected==tg_key), counts[tg_full]))
376             else:
377                 for tg_key,tg_full in sorted(TARGET_GROUPS_DICT.items()):
378                     target_groups.append((tg_key, tg_full, int(selected==tg_key), 0))
379             d['target_group']=target_groups
380             d['targs_disabled']=False # portal_type in ['Piece','Activity','Tool',...] will disable this
381         if do_types:
382             selected=src.get('type','')
383             if selected:
384                 all_count='?'
385             else:
386                 all_count=count
387             types=[('','All',int(not selected),all_count)]
388             if count:
389                 counts=count_results['portal_type']
390                 for type_key in type_selection:
391                     if type_key in counts:
392                         types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), counts[type_key]))
393             else:
394                 for type_key in type_selection:
395                     types.append((type_key, TYPE_NAMES[type_key][1], int(selected==type_key), 0))
396             d['type']=types
397             if selected:
398                 if selected in ['Piece','Activity','Tool']:
399                     d['targs_disabled']=True
400                     d['subjs_disabled']=True
401                 if selected=='Piece':
402                     d['lang_disabled']=True
403
404         if do_edited:
405             try:
406                 selected=int(src.get('edited',0))
407             except ValueError:
408                 selected=0
409             d['edited']=[('',_('Any time'),int(not selected),0), (365,_('Last year'),int(selected==365),0), (30,_('Last month'),int(selected==30),0), (7,_('Last week'),int(selected==7),0), (1,_('Yesterday'),int(selected==1),0)]
410         # Previous search term
411         if 'q' in src:
412             d['q']=src['q']
413         if 'state' in src:
414             d['state']=src['state']
415         if 'author' in src:
416             d['author']=src['author']           
417         return d
418
419
420     def decideBrowsingSubType(self):
421         """ Browse result page main title needs to know what kind of browsing is going on """
422         form=self.REQUEST.form       
423         if 'state' in form:
424             if form['state']=='public': return 'published'
425             if form['state']=='draft': return 'drafts'
426         path=self.REQUEST['ACTUAL_URL'].split('/')
427         for key in ['content','methods','tools','community']:
428             if key in path: return key
429         return ''           
430
431     def decideSearchSubType(self):
432         """ Search result page main title needs to know what kind of search is going on """
433         form=self.REQUEST.form       
434         if 'created' in form:
435             if 'type' in form and form['type']=='MemberFolder':
436                 return 'new_members'
437             else:
438                 return 'new_resources'
439         if 'edited' in form and 'q' not in form:
440             return 'recent_edits'
441         return 'search'
442
443     def browsingSearch(self, REQUEST=None, **kw):       
444         """ This is a general purpose catalog search that can convert readable keywords from request into actual search terms.
445         recognized keywords: language, subject_area, target_group, type, tags, state, author, created, edited, group, country.
446         
447         These same keywords are used all over in interface to build selection boxes, select page headers etc.
448         The actual search indexes like 'getTags' etc. are used only here and should not be used elsewhere.               
449         """
450         src=REQUEST.form
451         if '-C' in src:
452             del src['-C']
453         if not (src or kw):
454             path=REQUEST['ACTUAL_URL'].split('/')
455             if 'browse' in path or 'search' in path or 'cloud' in path:
456                 return []
457         keywords={'sort_on':'getScore','sort_order':'reverse','getState':('draft', 'public')}
458         keywords.update(src)
459         keywords.update(kw)
460
461         find_empty_values=keywords.get('empties','')
462
463         if 'language' in keywords:
464             if keywords['language']=='all':
465                 keywords['Language']=''
466             else:           
467                 keywords['Language']=keywords['language']
468             del keywords['language']
469         if 'subject_area' in keywords:
470             if keywords['subject_area'] in SUBJECT_AREAS_DICT:
471                 keywords['getSubject_area']=SUBJECT_AREAS_DICT[keywords['subject_area']]
472             del keywords['subject_area']
473         if 'target_group' in keywords:
474             if keywords['target_group'] in TARGET_GROUPS_DICT:
475                 keywords['getTarget_group']=TARGET_GROUPS_DICT[keywords['target_group']]
476             del keywords['target_group']
477         if 'tags' in keywords:
478             keywords['getTags']=keywords['tags']
479             del keywords['tags']
480         if 'author' in keywords and keywords['author']:
481             keywords['listCreators']=keywords['author']
482             del keywords['author']
483         if 'group' in keywords and keywords['group']:
484             keywords['getRawGroupEditing']=keywords['group']
485             del keywords['group']
486         if 'state' in keywords:
487             if keywords['state']=='draft':
488                 lutool=getToolByName(self,'lemill_usertool')               
489                 if 'listCreators' in keywords and keywords['listCreators'] == lutool.getAuthenticatedId(): # only allow authenticated author to view privates
490                     keywords['getState']=('draft', 'private')
491                 else:
492                     keywords['getState']='draft'
493             elif keywords['state']=='private': # don't allow manually mungling browse terms
494                 return []
495             else:
496                 keywords['getState']=keywords['state']           
497             del keywords['state']
498         if 'country' in keywords:
499             keywords['getLocation_country']=keywords['country']
500         if 'type' in keywords:
501             if keywords['type']=='lr':
502                 keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool']
503             else:
504                 keywords['portal_type']=keywords['type']
505             del keywords['type']
506         else: # portal_type is determined by location
507             path=REQUEST['ACTUAL_URL'].split('/')
508             if 'portfolio' in kw:
509                 keywords['portal_type']=list(CONTENT_TYPES)+['Activity','Tool','Collection']
510                 del keywords['portfolio']
511             elif 'content' in path:
512                 if 'listCreators' in keywords: # my published, my drafts and portfolios should display pieces
513                     keywords['portal_type']=list(CONTENT_TYPES)
514                 else:
515                     keywords['portal_type']=list(MATERIAL_TYPES)+['LeMillPrintResource']
516             elif 'methods' in path:
517                 keywords['portal_type']='Activity'
518             elif 'tools' in path:
519                 keywords['portal_type']='Tool'
520             elif 'community' in path:
521                 keywords['portal_type']=list(COMMUNITY_TYPES)
522             else:
523                 keywords['portal_type']=list(SEARCHABLE_TYPES)
524         if 'q' in keywords:
525             keywords['SearchableText']=keywords['q']
526             del keywords['q']
527         if 'created' in keywords:
528             keywords['created']={'query': self.ZopeTime()-int(keywords['created']), 'range':'min'}
529             keywords['sort_on']='created'
530         if 'edited' in keywords:
531             keywords['getLatestEdit']={'query': self.ZopeTime()-int(keywords['edited']), 'range':'min'}
532             keywords['sort_on']='getLatestEdit'
533             del keywords['edited']
534
535         if not 'Language' in keywords:
536             found=False
537             if 'RSS' in keywords:
538                 found=True
539             elif 'empties' in keywords:
540                 found=True
541             elif 'base' in keywords and keywords['base']=='language':
542                 found=True               
543             else:
544                 for compensating in ['SearchableText', 'listCreators', 'getTarget_group', 'getTags', 'getSubject_area', 'created', 'getLatestEdit', 'getLocation_country', 'getRawGroupEditing']:
545                     if compensating in keywords:
546                         found=True
547                         break
548             if not found:
549                 keywords['Language']=getToolByName(self,'lemill_usertool').getLanguages()[0]
550         if 'Language' in keywords:
551             if not keywords['Language']: # Delete empty 'Language' -- now results from all languages are used
552                 del keywords['Language']
553             elif 'portal_type' in keywords: # If browsing for MemberFolders, ignore Language, use getLanguage_skills instead
554                 if 'MemberFolder' in keywords['portal_type'] or 'GroupBlog' in keywords['portal_type']:
555                     keywords['getLanguage_skills']=keywords['Language']
556                     del keywords['Language']
557
558         if 'portal_type' in keywords:
559             if not isinstance(keywords['portal_type'], list) and keywords['portal_type']=='Piece':
560                 # ignore language, subject_area and target_group:
561                 for k in ['Language','getTarget_group','getSubject_area']:
562                     if k in keywords:
563                         del keywords[k]
564
565         for k in ['base','portfolio','RSS','empties']:
566             if k in keywords:
567                 del keywords[k]
568         try:
569             results = self.searchResults(keywords)               
570         except ParseError:
571             results = []
572         if find_empty_values:
573             results = self.fastFindEmptyValues(results, find_empty_values)
574         return results
575
576     #### Fast catalog handling ###########################################
577
578     def wakeLazy(self, lazy):
579         new=[]
580         for l in lazy._seq:
581             if isinstance(l, Lazy):
582                 new.extend(self.wakeLazy(l))
583             else:
584                 new.append(l)
585         return new
586
587     def fastPick(self, lazy_results, top):
588         """ Takes Lazy results and picks one random metadata obj from top """
589         tries=4
590         choice=randint(0,top)
591         while tries: # since we don't know how long the list is we need to use trial and error
592             # to find if the random index has corresponding item
593             i=0
594             for item in lazy_results._seq:
595                 if choice==i:
596                     return lazy_results._func(item)
597                 i+=1
598             choice/=2
599             tries-=1
600         return lazy_results._func(item)
601
602     def getBaseUrl(self):
603         """ Just absolute url, hopefully this gets cached ( it's fast, checked )"""
604         return aq_parent( aq_inner(self) ).absolute_url()
605
606     def getUrlPath(self,data_obj):
607         """ Build a working url from catalog path data """
608         path=self.getpath(data_obj).split('/')[1:]
609         url_base=self.getBaseUrl().split('/')
610         if url_base[-1]==path[0]:
611             path=path[1:] # remove lemill-server
612         return '/'.join(url_base+path)
613
614     def fastLink(self, user):
615         """ Finds an user from catalog and returns a tuple containing nicename and link
616         because this gets asked so often and it is relatively expensive to build, we cache them in non-permanent dict here in catalog tool
617         """
618         if not hasattr(self, 'author_cache'):
619             self.author_cache={}
620         if not user in self.author_cache:
621             res=self.searchResults(Creator=user, portal_type='MemberFolder')
622             for l in res._seq:
623                 authortuple= (self.getEntry('getNicename', l), self.getUrlPath(l))
624                 self.author_cache[user]=authortuple
625                 return authortuple
626         else:
627             return self.author_cache[user]
628         return (user,'')       
629
630     def getEntry(self, index, key, empty=[]):
631         val=self._catalog.getIndex(index).getEntryForObject(key, empty)
632         if callable(val):
633             val=val()
634         return val       
635            
636     def fastLinks(self, results, limit):
637         """ Takes Lazy results and returns a list of tuples (title, url, portal_type) """
638         def safeData(index, key):
639             v=index.getEntryForObject(key, [])
640             if callable(v):
641                 return v()
642             return v
643
644         def wakeUp(lazy, c):
645             new=[]
646             for l in lazy._seq:
647                 if isinstance(l, Lazy):
648                     c,newer=wakeUp(l,c)
649                     new.extend(newer)
650                 else:
651                     new.append((safeData(title_index, l), self.getUrlPath(l), safeData(type_index, l)))
652                     c+=1
653                 if c==limit:
654                     break
655             return c,new
656         title_index=self._catalog.getIndex('getNicename')
657         type_index=self._catalog.getIndex('portal_type')
658         c,new=wakeUp(results,0)       
659         return new
660
661     def fastMetadata(self, results, indexes, cut=0):
662         """ Takes Lazy results and returns a list of tuple for values from indexes.
663         Indexes is a string or a tuple of strings.
664         This is an order of magnitude faster than getting CatalogBrains for each object"""
665         c=0
666         def safeData(index, key):
667             v=index.getEntryForObject(key, [])
668             if callable(v):
669                 return v()
670             return v
671
672         def wakeUp(lazy,c):
673             new=[]
674             if isinstance(lazy, list):
675                 seq=lazy
676             else:
677                 seq=lazy._seq
678             for l in seq:
679                 if isinstance(l, Lazy):
680                     new.extend(wakeUp(l,c))
681                 else:
682                     values=[]   
683                     for index in index_sources:
684                         if index:
685                             values.append(safeData(index, l))
686                         else:
687                             values.append(l)
688                     new.append(tuple(values))
689                     c+=1
690                     if c==cut:
691                         break
692             return new
693         def wakeUpFaster(lazy, c):
694             new=[]
695             if isinstance(lazy, list):
696                 seq=lazy
697             else:
698                 seq=lazy._seq
699             for l in seq:
700                 if isinstance(l, Lazy):
701                     new.extend(wakeUpFaster(l,c))
702                 else:
703                     new.append(index.getEntryForObject(l, []))
704                 c+=1
705                 if c==cut:
706                     break
707             return new
708         if isinstance(indexes, tuple) or isinstance(indexes, list):
709             index_sources=[ind!='rid' and self._catalog.getIndex(ind) for ind in indexes]
710             new=wakeUp(results,c)
711         else:
712             index=self._catalog.getIndex(indexes)
713             new=wakeUpFaster(results,c)
714         return new
715        
716     def fastCount(self, results, indexes):
717         """ Takes Lazy results and returns a dictionary or tuple of dictionaries for values and their counts from indexes.
718         Index is a string or tuple of strings.
719         This is an order of magnitude faster than getting CatalogBrains for each object"""
720
721         def safeData(index, key):
722             v=index.getEntryForObject(key, [])
723             if callable(v):
724                 return v()
725             return v
726        
727         def wakeUpAndCount(lazy):
728             for l in lazy._seq:
729                 if isinstance(l, Lazy):
730                     wakeUpAndCount(l)
731                 else:
732                     values=[]   
733                     for dic, index in indexes:
734                         val=safeData(index, l)
735                         if val:
736                             if isinstance(val, list):
737                                 for v in val:                       
738                                     dic[v[:50]]=dic.get(v[:50],0)+1
739                             elif val:
740                                 val=str(val)[:50]
741                                 dic[val]=dic.get(val,0)+1
742         def wakeUpFasterAndCount(lazy):
743             for l in lazy._seq:
744                 if isinstance(l, Lazy):
745                     wakeUpFasterAndCount(l)
746                 else:
747                     val=safeData(index, l)
748                     if isinstance(val, list):
749                         for v in val:                       
750                             dic[v[:50]]=dic.get(v[:50],0)+1
751                     elif val:
752                         val=str(val)[:50]
753                         dic[val]=dic.get(val,0)+1
754         index=None
755         if isinstance(indexes, tuple):
756             if not results:
757                 return [{} for ind in indexes]
758             indexes=[({}, self._catalog.getIndex(ind)) for ind in indexes]
759             new=wakeUpAndCount(results)
760             return tuple([dic for dic,index in indexes])
761         else:
762             if not results:
763                 return {}
764             index=self._catalog.getIndex(indexes)
765             dic={}
766             new=wakeUpFasterAndCount(results)
767             return dic
768
769
770     def fastFindEmptyValues(self, results, index_to_look):
771         """ Takes Lazy results and return those results where the given index returns empty"""
772         new_results=[]
773         def safeData(index, key):
774             v=index.getEntryForObject(key, [])
775             if callable(v):
776                 return v()
777             return v
778        
779         def wakeUpAndFilter(lazy):
780             for l in lazy._seq:
781                 if isinstance(l, Lazy):
782                     wakeUpAndFilter(l)
783                 else:
784                     val=safeData(index, l)
785                     if not val:
786                         new_results.append(l)
787         if not results:
788             return []
789         index=self._catalog.getIndex(index_to_look)
790         wakeUpAndFilter(results)
791         assert isinstance(results,LazyMap)
792         return LazyMap(results._func, new_results, len(new_results))
793
794
795     def getSomeMetadataForRID(self, rid, md_fields):
796         record = self._catalog.data[rid]
797         schema = self._catalog.schema
798         if isinstance(md_fields, (tuple, list)):
799             result = {}
800             for md in md_fields:
801                 result[md]= record[schema[md]]
802             return result
803         else:
804             return record[schema[md_fields]]
805
806
807     ##############################      Clouds      ######################## 
808
809     def buildCloudData(self, results, request=None, size=8, portfolio=False):
810         """ Build tag cloud result tuples (name, tagsize, obj_url, tag_value, nicename) for given form from result set """
811         def adjustTag(val, steps=8):
812             # helper method to adjust hit count of this tag to relative size (1,...,8)
813             try:
814                 val=int((steps*log(val-mincount,2))/log(maxcount-mincount,2))
815             except (OverflowError, ZeroDivisionError):
816                 val=0
817             if not val:
818                 val=1
819             return val
820         lemill_tool = getToolByName(self, 'lemill_tool')
821         if not results:
822             return []
823         if portfolio:
824             cloud_index='getTags'
825             mf=self.getMemberFolder()           
826             link_root='%s/portfolio?tags=' % mf.absolute_url()
827         else:       
828             src=dict(request.form.items())
829             if 'base' not in src:
830                 return []
831             portal_url=getToolByName(self, 'portal_url')()
832             path=request['ACTUAL_URL'].split('/')
833             if 'content' in path:
834                 link_base='/'.join((portal_url,'content','browse'))
835             elif 'methods' in path:
836                 link_base='/'.join((portal_url,'methods','browse'))
837             elif 'tools' in path:
838                 link_base='/'.join((portal_url,'tools','browse'))
839             elif 'community' in path:
840                 link_base='/'.join((portal_url,'community','browse'))
841             else:
842                 link_base='/'.join((portal_url,'browse'))       
843             lang_part=''
844             base=src['base']
845             language=src.get('language','')
846             type_restriction=src.get('type','')
847             if language:
848                 lang_part='language=%s&' % language
849             if type_restriction:
850                 lang_part+='type=%s&' % type_restriction
851             title_cloud=False
852             if base=='language':
853                 link_root='%s?%slanguage=' % (link_base, lang_part)
854                 cloud_index='Language'
855                 if 'type' in src:
856                     if 'GroupBlog' in src['type'] or 'MemberFolder' in src['type']:
857                         cloud_index='getLanguage_skills'     
858             elif base=='tags':
859                 link_root='%s?%stags=' % (link_base, lang_part)       
860                 cloud_index='getTags'
861             elif base=='subject_area':
862                 link_root='%s?%ssubject_area=' % (link_base, lang_part)       
863                 cloud_index='getSubject_area'
864             elif base=='target_group':
865                 link_root='%s?%starget_group=' % (link_base, lang_part)       
866                 cloud_index='getTarget_group'
867             elif base=='country':
868                 link_root='%s?%scountry=' % (link_base, lang_part)
869                 cloud_index='getLocation_country'
870             elif base=='title':
871                 resultlist=self.fastMetadata(results, ('sortable_title','getScore','getNicename', 'path'), cut=100)
872                 if not resultlist:
873                     return []
874                 maxcount=resultlist[0][1]
875                 mincount=resultlist[-1][1]             
876                 resultlist.sort()
877                 resultlist=[(x[2], adjustTag(x[1], steps=6), x[3]) for x in resultlist if x[2]]
878                 return resultlist
879         hits=self.fastCount(results, cloud_index)
880         if not hits:
881             return []
882         resultlist=zip(hits.values(),hits.keys())
883         resultlist.sort(reverse=True)
884         maxcount=resultlist[0][0] # first!
885         resultlist = resultlist[:100]
886         mincount=resultlist[-1][0] # last!
887         # At this point resultlist= [(number_of_hits, tag_text),...]
888         # It should end up as: [(displayed_text, number_of_hits, link_url)...]
889         if cloud_index=='Language' or cloud_index=='getLanguage_skills':
890             resultlist=[(lemill_tool.getPrettyLanguage(x[1]), adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
891         elif cloud_index=='getSubject_area':
892             resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
893         elif cloud_index=='getTarget_group':
894             resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]           
895         else:
896             resultlist=[(x[1], adjustTag(x[0]), ''.join((link_root, x[1]))) for x in resultlist]           
897         resultlist.sort()
898         return resultlist
899
900
901     ########### Front page top lists ############
902     
903     def getTopFive(self, results=None, key_index=None, link_body='', clean=False, cache_key=''):
904         """ Returns top five results for key_index (getTags, getSubject_area...) for certain language """
905         if clean or not hasattr(self, 'top5results'):
906             self.top5results={'content':{}, 'methods':{}, 'tools':{}, 'community':{}}
907         data=self.top5results
908         cache_key=cache_key or key_index
909         path=self.REQUEST['ACTUAL_URL'].split('/')
910         for section in ['content','methods','tools','community',None]:
911             if section in path:
912                 break
913         if not section:
914             return []
915         ilanguage=getToolByName(self, 'portal_languages').getLanguageCookie() or 'en'
916         if cache_key in data[section]:
917             top5lists=data[section][cache_key]
918         else:
919             top5lists={}
920             data[section][cache_key]=top5lists
921         if ilanguage in top5lists:
922             return top5lists[ilanguage]
923         else:
924             tops=self.fastCount(results, key_index)
925             resultlist=zip(tops.values(),tops.keys())
926             resultlist.sort(reverse=True)
927             resultlist=resultlist[:5]
928             if key_index=='getSubject_area':
929                 resultlist=[(x[1], ''.join((link_body, SUBJECT_AREAS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
930             elif key_index=='getTarget_group':
931                 resultlist=[(x[1], ''.join((link_body, TARGET_GROUPS_INVERSE_DICT.get(x[1], x[1])))) for x in resultlist]
932             else:
933                 resultlist=[(x[1], ''.join((link_body, x[1]))) for x in resultlist]
934             top5lists[ilanguage]=resultlist
935             return resultlist
936
937     ############################# batch ##########################
938
939     def batch(self, results=None, request=None):
940         """ Use Plone's batch """
941         if request and 'b_start' in request.form:
942             try:
943                 b_start=int(request.form['b_start'])
944             except ValueError:
945                 b_start=0
946         else:
947             b_start=0 
948         b= Batch(results, 30, b_start, orphan=1)   
949         return
950
951     def batchBaseUrl(self):
952         """ Keep all other parametres as they are, but add or change 'b_start' """
953         form=self.REQUEST.form
954         if 'b_start' in form:
955             del form['b_start']
956         last_url='?'.join((self.REQUEST.ACTUAL_URL, urlencode(form)))
957         new= last_url+'&b_start='
958         return new
959        
960 CatalogTool.__doc__ = PloneCatalogTool.__doc__
961
962 InitializeClass(CatalogTool)
Note: See TracBrowser for help on using the browser.