| 522 | | xurl = url |
|---|
| 523 | | try: |
|---|
| 524 | | if not url.startswith('http://'): |
|---|
| 525 | | xurl = base + '/' + url |
|---|
| 526 | | src = urlopen(xurl) |
|---|
| 527 | | print xurl |
|---|
| 528 | | media_type, subtype = src.headers.getheader('Content-Type').split('/') |
|---|
| 529 | | if media_type == 'image': |
|---|
| 530 | | tmp_url = src.url |
|---|
| 531 | | if src.url.startswith(self.portal_url()): |
|---|
| 532 | | for s in ('/coverImage', '/image_large'): |
|---|
| 533 | | i = src.url.find(s) |
|---|
| 534 | | if i != -1: |
|---|
| 535 | | tmp_url = list(src.url) |
|---|
| 536 | | tmp_url[i] = '_' |
|---|
| 537 | | tmp_url = ''.join(tmp_url[:i + len(s)]) |
|---|
| 538 | | break |
|---|
| 539 | | basename = tmp_url.split('/')[-1].rsplit('.', 1)[0] |
|---|
| 540 | | filename = '%s.%s' % (basename, subtype.split(';', 1)[0]) |
|---|
| 541 | | newnames[filename] = newnames.get(filename, 0) + 1 |
|---|
| 542 | | if newnames[filename] > 1: |
|---|
| 543 | | filename = '%s-%d.%s' % (basename, newnames[filename] - 1, subtype) |
|---|
| 544 | | else: |
|---|
| 545 | | filename = src.url.split('/')[-1] |
|---|
| 546 | | url_content = src.read() |
|---|
| 547 | | srcs[url] = (filename, url_content) |
|---|
| 548 | | cache[url] = (filename, url_content) |
|---|
| 549 | | except HTTPError, e: |
|---|
| 550 | | print "Failed to download %s: %s" % (xurl, e) |
|---|
| 551 | | |
|---|
| 552 | | return srcs, split_html |
|---|
| 553 | | |
|---|
| 554 | | zipcontent = [] |
|---|
| 555 | | |
|---|
| 556 | | main_dir = self.getId() |
|---|
| 557 | | url_cache = {} |
|---|
| 558 | | zipcontent.append((main_dir, do_stuff(self.standalone_view(), url_cache))) |
|---|
| 559 | | for content in self.getResources(reftype='relatedContent'): |
|---|
| 560 | | zipcontent.append(('%s/content/%s' % (main_dir, content.getId()), do_stuff(content.standalone_view(), url_cache))) |
|---|
| 561 | | |
|---|
| 562 | | for method in self.getResources(reftype='relatedMethods'): |
|---|
| 563 | | zipcontent.append(('%s/methods/%s' % (main_dir, method.getId()), do_stuff(method.standalone_view(), url_cache))) |
|---|
| 564 | | |
|---|
| 565 | | for tool in self.getResources(reftype='relatedTools'): |
|---|
| 566 | | zipcontent.append(('%s/tools/%s' % (main_dir, tool.getId()), do_stuff(tool.standalone_view(), url_cache))) |
|---|
| 567 | | |
|---|
| 568 | | file_ref_count = {} |
|---|
| 569 | | for dir_, (srcs, split_html) in zipcontent: |
|---|
| 570 | | for url in srcs.iterkeys(): |
|---|
| 571 | | file_ref_count[url] = file_ref_count.get(url, 0) + 1 |
|---|
| 572 | | |
|---|
| 573 | | s = StringIO() |
|---|
| 574 | | zf = zipfile.ZipFile(s, 'w', compression=zipfile.ZIP_DEFLATED) |
|---|
| 575 | | |
|---|
| 576 | | common_urls = [key for key, val in file_ref_count.iteritems() if val > 1] |
|---|
| 577 | | for dir_, (srcs, split_html) in zipcontent: |
|---|
| 578 | | for url, (newname, content) in srcs.items(): |
|---|
| 579 | | if url in common_urls: |
|---|
| 580 | | srcs[url] = ('%s/common_files/%s' % (main_dir, newname), content) |
|---|
| | 522 | previousInfo = None |
|---|
| | 523 | text = inputList[i] |
|---|
| | 524 | del infoList[i] |
|---|
| | 525 | del inputList[i] |
|---|
| | 526 | elif previousInfo == None: |
|---|
| | 527 | previousInfo = '' |
|---|
| | 528 | infoList.insert(i+1, None) |
|---|
| | 529 | inputList.insert(i+1, text) |
|---|
| | 530 | if previousInfo == None: |
|---|
| | 531 | infoList.insert(0, None) |
|---|
| | 532 | inputList.insert(0, text) |
|---|
| | 533 | |
|---|
| | 534 | def reqursiveRegularSplit(inputList, infoList, info, reg, first, next): |
|---|
| | 535 | if isinstance(inputList, (list, tuple)): |
|---|
| | 536 | for i in range(len(inputList)): |
|---|
| | 537 | if not isinstance(infoList[i], str): |
|---|
| | 538 | inputList[i], infoList[i] = reqursiveRegularSplit(inputList[i], infoList[i], info, reg, first, next) |
|---|
| | 539 | else: |
|---|
| | 540 | reg = sre.compile(reg, sre.DOTALL | sre.IGNORECASE) |
|---|
| | 541 | inputList = sre.split(reg, inputList) |
|---|
| | 542 | if len(inputList) > 1: |
|---|
| | 543 | infoList = [] |
|---|
| | 544 | for i in range(len(inputList)): |
|---|
| | 545 | if i % next == first: |
|---|
| | 546 | infoList.append(info) |
|---|
| | 547 | else: |
|---|
| | 548 | infoList.append(None) |
|---|
| | 549 | combineNones(inputList, infoList) |
|---|
| 582 | | srcs[url] = ('%s/%s' % (dir_, newname), content) |
|---|
| 583 | | |
|---|
| 584 | | def calc_url(link): |
|---|
| 585 | | url = srcs.get(link, (link,))[0] |
|---|
| 586 | | depth = dir_.count('/') |
|---|
| 587 | | return "../" * depth + url.split('/', 1)[1] |
|---|
| 588 | | |
|---|
| 589 | | new_html_l = ['%s%ssrc="%s"' % (stuff, tag, calc_url(link)) for stuff, tag, link in zip(split_html[::3], split_html[1::3], split_html[2::3])] |
|---|
| 590 | | new_html_l.append(split_html[-1]) |
|---|
| 591 | | zf.writestr(dir_ + '/index.html', ''.join(new_html_l)) |
|---|
| 592 | | |
|---|
| 593 | | in_zip = {} |
|---|
| 594 | | for dir_, (srcs, split_html) in zipcontent: |
|---|
| 595 | | for url, (newname, content) in srcs.items(): |
|---|
| 596 | | if not in_zip.has_key(newname): |
|---|
| 597 | | in_zip[newname] = content |
|---|
| 598 | | zf.writestr(newname, content) |
|---|
| | 551 | infoList = [infoList] |
|---|
| | 552 | return inputList, infoList |
|---|
| | 553 | |
|---|
| | 554 | splitHtml, infoList = reqursiveRegularSplit(htmlPage, None, 'base', '<base href="(.*?)".*?/>', 2, 4) |
|---|
| | 555 | if len(splitHtml) > 1: |
|---|
| | 556 | baseURL = splitHtml[-2] |
|---|
| | 557 | del splitHtml[1::2] |
|---|
| | 558 | del infoList[1::2] |
|---|
| | 559 | else: |
|---|
| | 560 | baseURL = self.portal_url() |
|---|
| | 561 | if not baseURL.endswith('/'): |
|---|
| | 562 | baseURL += '/' |
|---|
| | 563 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, None, r'<a[^>]*?>(\s*<img.*?>\s*)</a>', 1, 2) |
|---|
| | 564 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'fvars', '(<embed[^>]*?flashvars=")(.*?)(")', 2, 4) |
|---|
| | 565 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'XMLmp3', '(<voiceover[^>]+)(src=")(.*?)(")', 3, 5) |
|---|
| | 566 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'src', '(<[^>]+)(src=")(.*?)(")', 3, 5) |
|---|
| | 567 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'src', '(<param.+?name="movie".*?value=")(.*?)(")', 2, 4) |
|---|
| | 568 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'fvars', r"(<script.*?>.*?AC_FL_RunContent\(.*?'flashvars', ')(.*?)((?<!\\)'.*?</script>)", 2, 4) |
|---|
| | 569 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'jsfname', "(<script.*?>.*?AC_FL_RunContent\(.*?'movie', ')(.*?)(')", 2, 4) |
|---|
| | 570 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'fvars', '(<param.+?name="flashvars".*?value=")(.*?)(")', 2, 4) |
|---|
| | 571 | |
|---|
| | 572 | return flatten(splitHtml), flatten(infoList), baseURL |
|---|
| | 573 | |
|---|
| | 574 | def _getAbsoluteURL(self, url, info, baseURL=None): |
|---|
| | 575 | flash = None |
|---|
| | 576 | if info in ('src', 'jsfname', 'XMLmp3'): |
|---|
| | 577 | if url.startswith('http://') or baseURL == None: |
|---|
| | 578 | url2 = url |
|---|
| | 579 | else: |
|---|
| | 580 | url2 = baseURL + url |
|---|
| | 581 | if info == 'jsfname': |
|---|
| | 582 | url2 += '.swf' |
|---|
| | 583 | elif info == 'fvars': |
|---|
| | 584 | if url.startswith('file='): |
|---|
| | 585 | url2 = url[5:-5] |
|---|
| | 586 | flash = 'mp3' |
|---|
| | 587 | elif url.startswith('xml='): |
|---|
| | 588 | url2 = url[4:] |
|---|
| | 589 | flash = 'pilot' |
|---|
| | 590 | elif url.startswith("config={videoFile: '"): |
|---|
| | 591 | url2 = url[20:-2] |
|---|
| | 592 | flash = 'flv1' |
|---|
| | 593 | elif url.startswith("config={videoFile: \\'"): |
|---|
| | 594 | url2 = url[21:-3] |
|---|
| | 595 | flash = 'flv2' |
|---|
| | 596 | return url2, flash |
|---|
| | 597 | |
|---|
| | 598 | def _getFileNameOfURL(self, url): |
|---|
| | 599 | return sre.split(r'[/\\]', url.split('?')[0])[-1] |
|---|
| | 600 | |
|---|
| | 601 | def _hasFileNameInSrcs(self, fileName, srcs): |
|---|
| | 602 | for absURL in srcs: |
|---|
| | 603 | src = srcs[absURL] |
|---|
| | 604 | if src.has_key('fileName') and src['fileName'] == fileName: |
|---|
| | 605 | return True, src |
|---|
| | 606 | return False, None |
|---|
| | 607 | |
|---|
| | 608 | def _splitFileName(self, fileName): |
|---|
| | 609 | splitted = fileName.split('.') |
|---|
| | 610 | if len(splitted) > 1: |
|---|
| | 611 | return '.'.join(splitted[:-1]), splitted[-1] |
|---|
| | 612 | return fileName, '' |
|---|
| | 613 | |
|---|
| | 614 | def _findUniqueFileName(self, absURL, srcs): |
|---|
| | 615 | fileName = self._getFileNameOfURL(absURL) |
|---|
| | 616 | hasFileName, src2 = self._hasFileNameInSrcs(fileName, srcs) |
|---|
| | 617 | if hasFileName: |
|---|
| | 618 | src2['counter'] += 1 |
|---|
| | 619 | fileName, extension = self._splitFileName(fileName) |
|---|
| | 620 | return self._findUniqueFileName('%s(%d)%s' % (fileName, src2['counter'], extension), srcs) |
|---|
| | 621 | else: |
|---|
| | 622 | return fileName |
|---|
| | 623 | |
|---|
| | 624 | def _processHtml(self, splitHtml, infoList, srcs, baseURL, htmlIndex): |
|---|
| | 625 | for i in range(len(infoList)): |
|---|
| | 626 | if infoList[i] in ('src', 'jsfname', 'fvars', 'XMLmp3'): |
|---|
| | 627 | absURL, flash = self._getAbsoluteURL(splitHtml[i], infoList[i], baseURL) |
|---|
| | 628 | if not srcs.has_key(absURL): |
|---|
| | 629 | srcs[absURL] = {} |
|---|
| | 630 | src = srcs[absURL] |
|---|
| | 631 | src['fileName'] = self._findUniqueFileName(absURL, srcs) |
|---|
| | 632 | src['finalURL'] = '' |
|---|
| | 633 | src['usedBy'] = set([htmlIndex]) |
|---|
| | 634 | src['counter'] = 0 |
|---|
| | 635 | if flash == 'mp3': |
|---|
| | 636 | src['extension'] = 'mp3' |
|---|
| | 637 | elif flash == 'pilot': |
|---|
| | 638 | src['extension'] = 'xml' |
|---|
| | 639 | elif flash in ('flv1', 'flv2'): |
|---|
| | 640 | src['extension'] = 'flv' |
|---|
| | 641 | elif infoList[i] == 'XMLmp3': |
|---|
| | 642 | src['extension'] = 'mp3' |
|---|
| | 643 | else: |
|---|
| | 644 | src['extension'] = '' |
|---|
| 600 | | assert in_zip[newname] == content |
|---|
| 601 | | |
|---|
| 602 | | print '-------------- _buildZIP done ------------' |
|---|
| 603 | | return s, zf |
|---|
| 604 | | |
|---|
| 605 | | def getZIP(self): |
|---|
| 606 | | s, zf = self._buildZIP() |
|---|
| 607 | | zf.close() |
|---|
| | 646 | srcs[absURL]['usedBy'].add(htmlIndex) |
|---|
| | 647 | |
|---|
| | 648 | def _processPilotXMLs(self, srcs, baseURLs, baseDirs): |
|---|
| | 649 | xmlDatas = {} |
|---|
| | 650 | xmlList = [absURL for absURL in srcs if srcs[absURL]['extension'] == 'xml'] |
|---|
| | 651 | for absURL in xmlList: |
|---|
| | 652 | try: |
|---|
| | 653 | src = srcs[absURL] |
|---|
| | 654 | file = urlopen(absURL) |
|---|
| | 655 | #file = urlopen('http://localhost:81/buildXML.xml') |
|---|
| | 656 | xml = file.read().decode('utf-16') |
|---|
| | 657 | file.close() |
|---|
| | 658 | xmlDatas[absURL]={} |
|---|
| | 659 | xmlData = xmlDatas[absURL] |
|---|
| | 660 | xmlData['split'], xmlData['info'], baseURL = self._splitHTML(xml) |
|---|
| | 661 | xmlData['fileName'] = src['fileName'] |
|---|
| | 662 | xmlData['baseDirs'] = [] |
|---|
| | 663 | for htmlIndex in src['usedBy']: |
|---|
| | 664 | self._processHtml(xmlData['split'], xmlData['info'], srcs, baseURLs[htmlIndex], htmlIndex) |
|---|
| | 665 | xmlData['baseDirs'].append(baseDirs[htmlIndex]) |
|---|
| | 666 | except (HTTPError, URLError): |
|---|
| | 667 | print "File download error: " + absURL |
|---|
| | 668 | return xmlDatas |
|---|
| | 669 | |
|---|
| | 670 | def _addExtension(self, fileName, extension): |
|---|
| | 671 | name, ext = self._splitFileName(fileName) |
|---|
| | 672 | if ext == '' and extension != '': |
|---|
| | 673 | fileName = '%s.%s' % (fileName, extension) |
|---|
| | 674 | return fileName |
|---|
| | 675 | |
|---|
| | 676 | def _downloadFiles(self, srcs, zip, baseDirs = []): |
|---|
| | 677 | for absURL in srcs: |
|---|
| | 678 | src = srcs[absURL] |
|---|
| | 679 | data = '' |
|---|
| | 680 | subType = '' |
|---|
| | 681 | try: |
|---|
| | 682 | if src['extension'] != 'xml': |
|---|
| | 683 | file = urlopen(absURL) |
|---|
| | 684 | data = file.read() |
|---|
| | 685 | file.close() |
|---|
| | 686 | mediaType, subType = file.headers.getheader('Content-Type').split('/') |
|---|
| | 687 | subType = subType.split(';')[0] |
|---|
| | 688 | if mediaType == 'image': |
|---|
| | 689 | if subType == 'jpeg': |
|---|
| | 690 | src['extension'] = 'jpg' |
|---|
| | 691 | elif subType in ('png', 'gif', 'bmp'): |
|---|
| | 692 | src['extension'] = subType |
|---|
| | 693 | pass |
|---|
| | 694 | except (HTTPError, URLError): |
|---|
| | 695 | print "File download error: " + absURL |
|---|
| | 696 | |
|---|
| | 697 | fileName = self._addExtension(src['fileName'], src['extension']) |
|---|
| | 698 | if len(src['usedBy']) > 1: |
|---|
| | 699 | src['finalURL'] = '_SharedFiles/' + fileName |
|---|
| | 700 | if src['extension'] != 'xml': |
|---|
| | 701 | zip.writestr((src['finalURL']).encode('latin-1'), data) |
|---|
| | 702 | src['finalURL'] = '../../' + src['finalURL'] |
|---|
| | 703 | else: |
|---|
| | 704 | src['finalURL'] = '_Files/' + fileName |
|---|
| | 705 | if src['extension'] != 'xml': |
|---|
| | 706 | for i in src['usedBy']: |
|---|
| | 707 | zip.writestr((baseDirs[i] + src['finalURL']).encode('latin-1'), data) |
|---|
| | 708 | |
|---|
| | 709 | def _updateHtmls(self, splitHtmls, infoLists, baseURLs, srcs): |
|---|
| | 710 | for i in range(len(splitHtmls)): |
|---|
| | 711 | splitHtml = splitHtmls[i] |
|---|
| | 712 | infoList = infoLists[i] |
|---|
| | 713 | for j in range(len(splitHtml)): |
|---|
| | 714 | if infoList[j] in ('src', 'jsfname', 'fvars'): |
|---|
| | 715 | absURL, flash = self._getAbsoluteURL(splitHtml[j], infoList[j], baseURLs[i]) |
|---|
| | 716 | if infoList[j] == 'src': |
|---|
| | 717 | splitHtml[j] = srcs[absURL]['finalURL'] |
|---|
| | 718 | elif infoList[j] == 'jsfname': |
|---|
| | 719 | splitHtml[j] = '.'.join(srcs[absURL]['finalURL'].split('.')[:-1]) |
|---|
| | 720 | elif infoList[j] == 'fvars': |
|---|
| | 721 | if flash == 'mp3': |
|---|
| | 722 | splitHtml[j] = 'file=%s' % srcs[absURL]['finalURL'] |
|---|
| | 723 | elif flash == 'pilot': |
|---|
| | 724 | splitHtml[j] = 'xml=%s' % srcs[absURL]['finalURL'] |
|---|
| | 725 | elif flash == 'flv1': |
|---|
| | 726 | splitHtml[j] = "config={videoFile: '../%s'}" % srcs[absURL]['finalURL'] |
|---|
| | 727 | elif flash == 'flv2': |
|---|
| | 728 | splitHtml[j] = "config={videoFile: \\'../%s\\'}" % srcs[absURL]['finalURL'] |
|---|
| | 729 | |
|---|
| | 730 | def _updatePilotXMLs(self, xmlDatas, baseURLs, srcs): |
|---|
| | 731 | for absURL in xmlDatas: |
|---|
| | 732 | splitXML = xmlDatas[absURL]['split'] |
|---|
| | 733 | xmlInfoList = xmlDatas[absURL]['info'] |
|---|
| | 734 | for j in range(len(splitXML)): |
|---|
| | 735 | if xmlInfoList[j] in ('src', 'XMLmp3'): |
|---|
| | 736 | absURL2, flash = self._getAbsoluteURL(splitXML[j], xmlInfoList[j]) |
|---|
| | 737 | splitXML[j] = srcs[absURL2]['finalURL'] |
|---|
| | 738 | |
|---|
| | 739 | def _addHtmlsToZip(self, splitHtmls, zip, baseDirs = None): |
|---|
| | 740 | i = 0 |
|---|
| | 741 | baseDir = '' |
|---|
| | 742 | for splitHtml in splitHtmls: |
|---|
| | 743 | if isinstance(baseDirs, (list, tuple)): |
|---|
| | 744 | baseDir = baseDirs[i] |
|---|
| | 745 | zip.writestr(baseDir + 'index.html', ''.join(splitHtml)) |
|---|
| | 746 | i += 1 |
|---|
| | 747 | |
|---|
| | 748 | def _addPilotXMLsToZip(self, xmlDatas, srcs, zip): |
|---|
| | 749 | for absURL in xmlDatas: |
|---|
| | 750 | xmlData = xmlDatas[absURL] |
|---|
| | 751 | xml = ''.join(xmlData['split']).encode('utf-16') |
|---|
| | 752 | for baseDir in xmlData['baseDirs']: |
|---|
| | 753 | zip.writestr(baseDir + srcs[absURL]['finalURL'], xml) |
|---|
| | 754 | break |
|---|
| | 755 | |
|---|
| | 756 | def buildZip(self): |
|---|
| | 757 | """ Builds a Zip """ |
|---|
| | 758 | splitHtmls = [] |
|---|
| | 759 | infoLists = [] |
|---|
| | 760 | baseURLs = [] |
|---|
| | 761 | baseDirs = [] |
|---|
| | 762 | srcs = {} |
|---|
| | 763 | htmlIndex = 0 |
|---|
| | 764 | resourceTypes = ('Content', 'Methods', 'Tools') |
|---|
| | 765 | htmlList = [(self, '')] |
|---|
| | 766 | for resourceType in resourceTypes: |
|---|
| | 767 | for resource in self.getResources(reftype = 'related%s' % resourceType): |
|---|
| | 768 | htmlList.append((resource, '%s/%s/' % (resourceType.lower(), resource.getId()))) |
|---|
| | 769 | |
|---|
| | 770 | for html in htmlList: |
|---|
| | 771 | splitHtml, infoList, baseURL = self._splitHTML(html[0].standalone_view()) |
|---|
| | 772 | splitHtmls.append(splitHtml) |
|---|
| | 773 | infoLists.append(infoList) |
|---|
| | 774 | baseURLs.append(baseURL) |
|---|
| | 775 | baseDirs.append(html[1]) |
|---|
| | 776 | self._processHtml(splitHtml, infoList, srcs, baseURL, htmlIndex) |
|---|
| | 777 | htmlIndex += 1 |
|---|
| | 778 | |
|---|
| | 779 | xmlDatas = self._processPilotXMLs(srcs, baseURLs, baseDirs) |
|---|
| | 780 | zipStr = StringIO() |
|---|
| | 781 | zip = zipfile.ZipFile(zipStr, 'w', compression = zipfile.ZIP_DEFLATED) |
|---|
| | 782 | self._downloadFiles(srcs, zip, baseDirs) |
|---|
| | 783 | self._updateHtmls(splitHtmls, infoLists, baseURLs, srcs) |
|---|
| | 784 | self._updatePilotXMLs(xmlDatas, baseURLs, srcs) |
|---|
| | 785 | self._addHtmlsToZip(splitHtmls, zip, baseDirs) |
|---|
| | 786 | self._addPilotXMLsToZip(xmlDatas, srcs, zip) |
|---|
| | 787 | zip.close() |
|---|