| 501 | | def _splitHTML(self, htmlPage, pageURL): |
|---|
| 502 | | |
|---|
| 503 | | def flatten(seq): |
|---|
| 504 | | res = [] |
|---|
| 505 | | for item in seq: |
|---|
| 506 | | if isinstance(item, (list, tuple)): |
|---|
| 507 | | res.extend(flatten(item)) |
|---|
| | 501 | def download(self, SHARED='False', SCORM='False'): |
|---|
| | 502 | """ Builds a zip. Set SHARED to True to make shared files appear only once in the zip. Set SCORM to True to generate an imsmanifest.xml file, too. """ |
|---|
| | 503 | |
|---|
| | 504 | def splitHTML(htmlPage, pageURL): |
|---|
| | 505 | |
|---|
| | 506 | def flatten(seq): |
|---|
| | 507 | res = [] |
|---|
| | 508 | for item in seq: |
|---|
| | 509 | if isinstance(item, (list, tuple)): |
|---|
| | 510 | res.extend(flatten(item)) |
|---|
| | 511 | else: |
|---|
| | 512 | res.append(item) |
|---|
| | 513 | return res |
|---|
| | 514 | |
|---|
| | 515 | def combineNones(inputList, infoList): |
|---|
| | 516 | previousInfo = '' |
|---|
| | 517 | text = '' |
|---|
| | 518 | list = range(len(infoList)) |
|---|
| | 519 | list.reverse() |
|---|
| | 520 | for i in list: |
|---|
| | 521 | if infoList[i] == None: |
|---|
| | 522 | if previousInfo == None: |
|---|
| | 523 | text = inputList[i] + text |
|---|
| | 524 | else: |
|---|
| | 525 | previousInfo = None |
|---|
| | 526 | text = inputList[i] |
|---|
| | 527 | del infoList[i] |
|---|
| | 528 | del inputList[i] |
|---|
| | 529 | elif previousInfo == None: |
|---|
| | 530 | previousInfo = '' |
|---|
| | 531 | infoList.insert(i + 1, None) |
|---|
| | 532 | inputList.insert(i + 1, text) |
|---|
| | 533 | if previousInfo == None: |
|---|
| | 534 | infoList.insert(0, None) |
|---|
| | 535 | inputList.insert(0, text) |
|---|
| | 536 | |
|---|
| | 537 | def reqursiveRegularSplit(inputList, infoList, info, reg, first, next): |
|---|
| | 538 | if isinstance(inputList, (list, tuple)): |
|---|
| | 539 | for i in range(len(inputList)): |
|---|
| | 540 | if not isinstance(infoList[i], str): |
|---|
| | 541 | inputList[i], infoList[i] = reqursiveRegularSplit(inputList[i], infoList[i], info, reg, first, next) |
|---|
| 540 | | reg = sre.compile(reg, sre.DOTALL | sre.IGNORECASE) |
|---|
| 541 | | inputList = sre.split(reg, inputList) |
|---|
| 542 | | if len(inputList) > 1: |
|---|
| 543 | | infoList = [] |
|---|
| 544 | | for i in range(len(inputList)): |
|---|
| 545 | | if i % next == first: |
|---|
| 546 | | infoList.append(info) |
|---|
| | 563 | baseURL = pageURL |
|---|
| | 564 | if not baseURL.endswith('/'): |
|---|
| | 565 | baseURL += '/' |
|---|
| | 566 | |
|---|
| | 567 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'src', '(<link[^>]+href=")(.*?)("[^>]+rel="stylesheet")', 2, 4) |
|---|
| | 568 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'href', '(<a[^>]+)href="(.*?)"', 2, 3) |
|---|
| | 569 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'fvars', '(<embed[^>]*?flashvars=")(.*?)(")', 2, 4) |
|---|
| | 570 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'XMLmp3', '(<voiceover[^>]+src=")(.*?)(")', 2, 4) |
|---|
| | 571 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'src', '(<[^>]+src=")(.*?)(")', 2, 4) |
|---|
| | 572 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'src', '(<param.+?name="movie".*?value=")(.*?)(")', 2, 4) |
|---|
| | 573 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'fvars', r"(<script.*?>.*?AC_FL_RunContent\(.*?'flashvars', ')(.*?)((?<!\\)'.*?</script>)", 2, 4) |
|---|
| | 574 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'jsfname', "(<script.*?>.*?AC_FL_RunContent\(.*?'movie', ')(.*?)(')", 2, 4) |
|---|
| | 575 | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'fvars', '(<param.+?name="flashvars".*?value=")(.*?)(")', 2, 4) |
|---|
| | 576 | |
|---|
| | 577 | return flatten(splitHtml), flatten(infoList), baseURL |
|---|
| | 578 | |
|---|
| | 579 | def getAbsoluteURL(url, info, baseURL=''): |
|---|
| | 580 | flash = None |
|---|
| | 581 | if info in ('src', 'jsfname', 'XMLmp3', 'href'): |
|---|
| | 582 | if not url.startswith('http://'): |
|---|
| | 583 | url = baseURL + url |
|---|
| | 584 | if info == 'jsfname': |
|---|
| | 585 | url += '.swf' |
|---|
| | 586 | elif info == 'fvars': |
|---|
| | 587 | if url.startswith('file='): |
|---|
| | 588 | url = url[5:-5] |
|---|
| | 589 | flash = 'mp3' |
|---|
| | 590 | elif url.startswith('xml='): |
|---|
| | 591 | url = url[4:] |
|---|
| | 592 | flash = 'pilot' |
|---|
| | 593 | elif url.startswith("config={videoFile: '"): |
|---|
| | 594 | url = url[20:-2] |
|---|
| | 595 | flash = 'flv1' |
|---|
| | 596 | elif url.startswith("config={videoFile: \\'"): |
|---|
| | 597 | url = url[21:-3] |
|---|
| | 598 | flash = 'flv2' |
|---|
| | 599 | url = url.replace('\\', '/') |
|---|
| | 600 | url = url.replace('at_download/', '') |
|---|
| | 601 | return url, flash |
|---|
| | 602 | |
|---|
| | 603 | def splitFileName(fileName): |
|---|
| | 604 | splitted = fileName.split('.') |
|---|
| | 605 | if len(splitted) > 1: |
|---|
| | 606 | return '.'.join(splitted[:-1]), '.' + splitted[-1] |
|---|
| | 607 | return fileName, '' |
|---|
| | 608 | |
|---|
| | 609 | def hasFileNameInSrcs(fileName, srcs): |
|---|
| | 610 | for absURL in srcs: |
|---|
| | 611 | src = srcs[absURL] |
|---|
| | 612 | if src.has_key('fileName') and src['fileName'] == fileName: |
|---|
| | 613 | return src |
|---|
| | 614 | return None |
|---|
| | 615 | |
|---|
| | 616 | def findUniqueFileName(absURL, srcs): |
|---|
| | 617 | fileName = absURL.split('?')[0].split('/')[-1] |
|---|
| | 618 | src2 = hasFileNameInSrcs(fileName, srcs) |
|---|
| | 619 | if src2 == None: |
|---|
| | 620 | return fileName |
|---|
| | 621 | else: |
|---|
| | 622 | src2['counter'] += 1 |
|---|
| | 623 | fileName, extension = splitFileName(fileName) |
|---|
| | 624 | return findUniqueFileName('%s(%d)%s' % (fileName, src2['counter'], extension), srcs) |
|---|
| | 625 | |
|---|
| | 626 | def processHtml(splitHtml, infoList, srcs, hrefs, baseURL, htmlIndex): |
|---|
| | 627 | for i in range(len(infoList)): |
|---|
| | 628 | if infoList[i] in ('src', 'jsfname', 'fvars', 'XMLmp3', 'href'): |
|---|
| | 629 | absURL, flash = getAbsoluteURL(splitHtml[i], infoList[i], baseURL) |
|---|
| | 630 | if infoList[i] != 'href' or splitHtml[i].find('at_download') != -1: |
|---|
| | 631 | if srcs.has_key(absURL): |
|---|
| | 632 | srcs[absURL]['usedBy'].add(htmlIndex) |
|---|
| 548 | | infoList.append(None) |
|---|
| 549 | | combineNones(inputList, infoList) |
|---|
| | 634 | srcs[absURL] = {} |
|---|
| | 635 | src = srcs[absURL] |
|---|
| | 636 | src['fileName'] = findUniqueFileName(absURL, srcs) |
|---|
| | 637 | src['finalURL'] = '' |
|---|
| | 638 | src['usedBy'] = set([htmlIndex]) |
|---|
| | 639 | src['counter'] = 0 |
|---|
| | 640 | if flash == 'mp3': |
|---|
| | 641 | src['extension'] = 'mp3' |
|---|
| | 642 | elif flash == 'pilot': |
|---|
| | 643 | src['extension'] = 'xml' |
|---|
| | 644 | elif flash in ('flv1', 'flv2'): |
|---|
| | 645 | src['extension'] = 'flv' |
|---|
| | 646 | elif infoList[i] == 'XMLmp3': |
|---|
| | 647 | src['extension'] = 'mp3' |
|---|
| | 648 | else: |
|---|
| | 649 | src['extension'] = '' |
|---|
| | 650 | if infoList[i] == 'href': |
|---|
| | 651 | absURL, flash = getAbsoluteURL(splitHtml[i], infoList[i], baseURL) |
|---|
| | 652 | if hrefs.has_key(splitHtml[i]): |
|---|
| | 653 | hrefs[absURL]['usedBy'].add(htmlIndex) |
|---|
| | 654 | else: |
|---|
| | 655 | hrefs[absURL] = {} |
|---|
| | 656 | href = hrefs[absURL] |
|---|
| | 657 | href['finalURL'] = '' |
|---|
| | 658 | href['type'] = 0 |
|---|
| | 659 | href['usedBy'] = set([htmlIndex]) |
|---|
| | 660 | |
|---|
| | 661 | def processPilotXMLs(srcs, baseURLs, baseDirs): |
|---|
| | 662 | xmlDatas = {} |
|---|
| | 663 | xmlList = [absURL for absURL in srcs if srcs[absURL]['extension'] == 'xml'] |
|---|
| | 664 | for absURL in xmlList: |
|---|
| | 665 | try: |
|---|
| | 666 | src = srcs[absURL] |
|---|
| | 667 | file = urlopen(absURL) |
|---|
| | 668 | xml = file.read().decode('utf_16').encode('latin_1') |
|---|
| | 669 | file.close() |
|---|
| | 670 | xmlDatas[absURL]={} |
|---|
| | 671 | xmlData = xmlDatas[absURL] |
|---|
| | 672 | xmlData['split'], xmlData['info'], baseURL = splitHTML(xml, '') |
|---|
| | 673 | xmlData['fileName'] = src['fileName'] |
|---|
| | 674 | xmlData['baseDirs'] = [] |
|---|
| | 675 | for htmlIndex in src['usedBy']: |
|---|
| | 676 | processHtml(xmlData['split'], xmlData['info'], srcs, {}, baseURLs[htmlIndex], htmlIndex) |
|---|
| | 677 | xmlData['baseDirs'].append(baseDirs[htmlIndex]) |
|---|
| | 678 | except (HTTPError, URLError): |
|---|
| | 679 | print "File download error: " + absURL |
|---|
| | 680 | return xmlDatas |
|---|
| | 681 | |
|---|
| | 682 | def downloadFiles(srcs, zip, baseDirs = []): |
|---|
| | 683 | def addExtension(fileName, extension): |
|---|
| | 684 | name, ext = splitFileName(fileName) |
|---|
| | 685 | if ext == '' and extension != '': |
|---|
| | 686 | fileName = '%s.%s' % (fileName, extension) |
|---|
| | 687 | return fileName |
|---|
| | 688 | for absURL in srcs: |
|---|
| | 689 | src = srcs[absURL] |
|---|
| | 690 | data = '' |
|---|
| | 691 | mediaType = '' |
|---|
| | 692 | subType = '' |
|---|
| | 693 | try: |
|---|
| | 694 | if src['extension'] != 'xml': |
|---|
| | 695 | file = urlopen(absURL) |
|---|
| | 696 | data = file.read() |
|---|
| | 697 | mediaType, subType = file.headers.getheader('Content-Type').split('/') |
|---|
| | 698 | file.close() |
|---|
| | 699 | subType = subType.split(';')[0] |
|---|
| | 700 | if mediaType == 'image': |
|---|
| | 701 | if subType in ('jpeg', 'pjpeg'): |
|---|
| | 702 | src['extension'] = 'jpg' |
|---|
| | 703 | elif subType == 'x-ms-bmp': |
|---|
| | 704 | src['extension'] = 'bmp' |
|---|
| | 705 | elif subType in ('png', 'gif'): |
|---|
| | 706 | src['extension'] = subType |
|---|
| | 707 | elif subType == 'x-png': |
|---|
| | 708 | src['extension'] = 'png' |
|---|
| | 709 | elif mediaType == 'application' and subType == 'x-shockwave-flash': |
|---|
| | 710 | src['extension'] = 'swf' |
|---|
| | 711 | elif mediaType == 'video': |
|---|
| | 712 | if subType in ('x-msvideo', 'avi'): |
|---|
| | 713 | src['extension'] = 'avi' |
|---|
| | 714 | elif subType == 'x-ms-wmv': |
|---|
| | 715 | src['extension'] = 'wmv' |
|---|
| | 716 | elif subType == 'mpeg': |
|---|
| | 717 | src['extension'] = 'mpg' |
|---|
| | 718 | elif subType == 'mp4': |
|---|
| | 719 | src['extension'] = subType |
|---|
| | 720 | elif subType == 'quicktime': |
|---|
| | 721 | src['extension'] = 'mov' |
|---|
| | 722 | elif mediaType == 'audio' and subType == 'mpeg': |
|---|
| | 723 | src['extension'] = 'mp3' |
|---|
| | 724 | except (HTTPError, URLError): |
|---|
| | 725 | print "File download error: " + absURL |
|---|
| | 726 | |
|---|
| | 727 | fileName = addExtension(src['fileName'], src['extension']) |
|---|
| | 728 | if len(src['usedBy']) > 1 and SHARED == 'True': |
|---|
| | 729 | src['finalURL'] = '_SharedFiles/' + fileName |
|---|
| | 730 | if src['extension'] != 'xml': |
|---|
| | 731 | zip.writestr(src['finalURL'], data) |
|---|
| 551 | | infoList = [infoList] |
|---|
| 552 | | return inputList, infoList |
|---|
| 553 | | |
|---|
| 554 | | splitHtml, infoList = reqursiveRegularSplit(htmlPage, None, 'base', '<base href="(.*?)".*?/>', 1, 2) |
|---|
| 555 | | if len(splitHtml) > 1: |
|---|
| 556 | | baseURL = splitHtml[-2] |
|---|
| 557 | | del splitHtml[1::2] |
|---|
| 558 | | del infoList[1::2] |
|---|
| 559 | | else: |
|---|
| 560 | | baseURL = pageURL |
|---|
| 561 | | if not baseURL.endswith('/'): |
|---|
| 562 | | baseURL += '/' |
|---|
| 563 | | |
|---|
| 564 | | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'src', '(<link[^>]+href=")(.*?)("[^>]+rel="stylesheet")', 2, 4) |
|---|
| 565 | | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'href', '(<a[^>]+)href="(.*?)"', 2, 3) |
|---|
| 566 | | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'fvars', '(<embed[^>]*?flashvars=")(.*?)(")', 2, 4) |
|---|
| 567 | | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'XMLmp3', '(<voiceover[^>]+src=")(.*?)(")', 2, 4) |
|---|
| 568 | | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'src', '(<[^>]+src=")(.*?)(")', 2, 4) |
|---|
| 569 | | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'src', '(<param.+?name="movie".*?value=")(.*?)(")', 2, 4) |
|---|
| 570 | | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'fvars', r"(<script.*?>.*?AC_FL_RunContent\(.*?'flashvars', ')(.*?)((?<!\\)'.*?</script>)", 2, 4) |
|---|
| 571 | | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'jsfname', "(<script.*?>.*?AC_FL_RunContent\(.*?'movie', ')(.*?)(')", 2, 4) |
|---|
| 572 | | splitHtml, infoList = reqursiveRegularSplit(splitHtml, infoList, 'fvars', '(<param.+?name="flashvars".*?value=")(.*?)(")', 2, 4) |
|---|
| 573 | | |
|---|
| 574 | | return flatten(splitHtml), flatten(infoList), baseURL |
|---|
| 575 | | |
|---|
| 576 | | def _getAbsoluteURL(self, url, info, baseURL=''): |
|---|
| 577 | | flash = None |
|---|
| 578 | | if info in ('src', 'jsfname', 'XMLmp3', 'href'): |
|---|
| 579 | | if not url.startswith('http://'): |
|---|
| 580 | | url = baseURL + url |
|---|
| 581 | | if info == 'jsfname': |
|---|
| 582 | | url += '.swf' |
|---|
| 583 | | elif info == 'fvars': |
|---|
| 584 | | if url.startswith('file='): |
|---|
| 585 | | url = url[5:-5] |
|---|
| 586 | | flash = 'mp3' |
|---|
| 587 | | elif url.startswith('xml='): |
|---|
| 588 | | url = url[4:] |
|---|
| 589 | | flash = 'pilot' |
|---|
| 590 | | elif url.startswith("config={videoFile: '"): |
|---|
| 591 | | url = url[20:-2] |
|---|
| 592 | | flash = 'flv1' |
|---|
| 593 | | elif url.startswith("config={videoFile: \\'"): |
|---|
| 594 | | url = url[21:-3] |
|---|
| 595 | | flash = 'flv2' |
|---|
| 596 | | url = url.replace('\\', '/') |
|---|
| 597 | | return url, flash |
|---|
| 598 | | |
|---|
| 599 | | def _splitFileName(self, fileName): |
|---|
| 600 | | splitted = fileName.split('.') |
|---|
| 601 | | if len(splitted) > 1: |
|---|
| 602 | | return '.'.join(splitted[:-1]), '.' + splitted[-1] |
|---|
| 603 | | return fileName, '' |
|---|
| 604 | | |
|---|
| 605 | | def _hasFileNameInSrcs(self, fileName, srcs): |
|---|
| 606 | | for absURL in srcs: |
|---|
| 607 | | src = srcs[absURL] |
|---|
| 608 | | if src.has_key('fileName') and src['fileName'] == fileName: |
|---|
| 609 | | return src |
|---|
| 610 | | return None |
|---|
| 611 | | |
|---|
| 612 | | def _findUniqueFileName(self, absURL, srcs): |
|---|
| 613 | | fileName = absURL.split('?')[0].split('/')[-1] |
|---|
| 614 | | src2 = self._hasFileNameInSrcs(fileName, srcs) |
|---|
| 615 | | if src2 == None: |
|---|
| 616 | | return fileName |
|---|
| 617 | | else: |
|---|
| 618 | | src2['counter'] += 1 |
|---|
| 619 | | fileName, extension = self._splitFileName(fileName) |
|---|
| 620 | | return self._findUniqueFileName('%s(%d)%s' % (fileName, src2['counter'], extension), srcs) |
|---|
| 621 | | |
|---|
| 622 | | def _processHtml(self, splitHtml, infoList, srcs, hrefs, baseURL, htmlIndex): |
|---|
| 623 | | for i in range(len(infoList)): |
|---|
| 624 | | if infoList[i] in ('src', 'jsfname', 'fvars', 'XMLmp3', 'href'): |
|---|
| 625 | | absURL, flash = self._getAbsoluteURL(splitHtml[i], infoList[i], baseURL) |
|---|
| 626 | | if infoList[i] != 'href' or absURL.find('at_download') != -1: |
|---|
| 627 | | if srcs.has_key(absURL): |
|---|
| 628 | | srcs[absURL]['usedBy'].add(htmlIndex) |
|---|
| 629 | | else: |
|---|
| 630 | | srcs[absURL] = {} |
|---|
| | 733 | src['finalURL'] = '_Files/' + fileName |
|---|
| | 734 | if src['extension'] != 'xml': |
|---|
| | 735 | for i in src['usedBy']: |
|---|
| | 736 | zip.writestr(baseDirs[i] + src['finalURL'], data) |
|---|
| | 737 | |
|---|
| | 738 | def processHrefs(hrefs, srcs, htmlURLs, baseDirs, portalURL): |
|---|
| | 739 | downloadURLs = [] |
|---|
| | 740 | for absURL in srcs: |
|---|
| | 741 | fileName = absURL.split('?')[0].split('/') |
|---|
| | 742 | domain = '/'.join(fileName[:-1]) |
|---|
| | 743 | fileName = fileName[-1] |
|---|
| | 744 | downloadURLs.append('%s/at_download/%s' % (domain, fileName)) |
|---|
| | 745 | for absURL in hrefs: |
|---|
| | 746 | href = hrefs[absURL] |
|---|
| | 747 | if absURL in htmlURLs: |
|---|
| | 748 | href['finalURL'] = 'href="%sindex.html"' % baseDirs[htmlURLs.index(absURL)] |
|---|
| | 749 | elif absURL + '/' in htmlURLs: |
|---|
| | 750 | href['finalURL'] = 'href="%sindex.html"' % baseDirs[htmlURLs.index(absURL + '/')] |
|---|
| | 751 | elif absURL in srcs: |
|---|
| | 752 | href['finalURL'] = srcs[absURL]['finalURL'] |
|---|
| | 753 | href['type'] = 1 |
|---|
| | 754 | elif absURL in downloadURLs: |
|---|
| | 755 | href['finalURL'] = srcs[absURL.replace('/at_download', '')]['finalURL'] |
|---|
| | 756 | href['type'] = 1 |
|---|
| | 757 | elif absURL.startswith(portalURL) and not absURL.endswith('/CollectionRSS'): |
|---|
| | 758 | href['finalURL'] = '' |
|---|
| | 759 | else: |
|---|
| | 760 | href['finalURL'] = 'href="%s"' % absURL |
|---|
| | 761 | |
|---|
| | 762 | def updateHtmls(splitHtmls, infoLists, baseURLs, baseDirs, srcs, hrefs): |
|---|
| | 763 | for i in range(len(splitHtmls)): |
|---|
| | 764 | splitHtml = splitHtmls[i] |
|---|
| | 765 | infoList = infoLists[i] |
|---|
| | 766 | for j in range(len(splitHtml)): |
|---|
| | 767 | if infoList[j] in ('src', 'jsfname', 'fvars'): |
|---|
| | 768 | absURL, flash = getAbsoluteURL(splitHtml[j], infoList[j], baseURLs[i]) |
|---|
| 645 | | src['extension'] = '' |
|---|
| 646 | | if infoList[i] == 'href': |
|---|
| 647 | | absURL, flash = self._getAbsoluteURL(splitHtml[i], infoList[i], baseURL) |
|---|
| 648 | | if hrefs.has_key(splitHtml[i]): |
|---|
| 649 | | hrefs[absURL]['usedBy'].add(htmlIndex) |
|---|
| 650 | | else: |
|---|
| 651 | | hrefs[absURL] = {} |
|---|
| 652 | | href = hrefs[absURL] |
|---|
| 653 | | href['finalURL'] = '' |
|---|
| 654 | | href['type'] = 0 |
|---|
| 655 | | href['usedBy'] = set([htmlIndex]) |
|---|
| 656 | | |
|---|
| 657 | | def _processPilotXMLs(self, srcs, baseURLs, baseDirs): |
|---|
| 658 | | xmlDatas = {} |
|---|
| 659 | | xmlList = [absURL for absURL in srcs if srcs[absURL]['extension'] == 'xml'] |
|---|
| 660 | | for absURL in xmlList: |
|---|
| 661 | | try: |
|---|
| 662 | | src = srcs[absURL] |
|---|
| 663 | | file = urlopen(absURL) |
|---|
| 664 | | xml = file.read().decode('utf-16') |
|---|
| 665 | | file.close() |
|---|
| 666 | | xmlDatas[absURL]={} |
|---|
| | 773 | finalURL = src['finalURL'] |
|---|
| | 774 | if infoList[j] == 'src': |
|---|
| | 775 | splitHtml[j] = finalURL |
|---|
| | 776 | elif infoList[j] == 'jsfname': |
|---|
| | 777 | splitHtml[j] = '.'.join(finalURL.split('.')[:-1]) |
|---|
| | 778 | elif infoList[j] == 'fvars': |
|---|
| | 779 | if flash == 'mp3': |
|---|
| | 780 | splitHtml[j] = 'file=%s' % finalURL |
|---|
| | 781 | elif flash == 'pilot': |
|---|
| | 782 | splitHtml[j] = 'xml=%s' % finalURL |
|---|
| | 783 | elif flash == 'flv1': |
|---|
| | 784 | splitHtml[j] = "config={videoFile: '../%s'}" % finalURL |
|---|
| | 785 | elif flash == 'flv2': |
|---|
| | 786 | splitHtml[j] = "config={videoFile: \\'../%s\\'}" % finalURL |
|---|
| | 787 | elif infoList[j] == 'href': |
|---|
| | 788 | absURL, flash = getAbsoluteURL(splitHtml[j], infoList[j], baseURLs[i]) |
|---|
| | 789 | href = hrefs[absURL] |
|---|
| | 790 | if href['type'] == 1: |
|---|
| | 791 | if baseDirs[i] != '' and len(href['usedBy']) > 1 and SHARED == 'True': |
|---|
| | 792 | finalURL = 'href="../../%s"' % href['finalURL'] |
|---|
| | 793 | else: |
|---|
| | 794 | finalURL = 'href="%s"' % href['finalURL'] |
|---|
| | 795 | else: |
|---|
| | 796 | finalURL = href['finalURL'] |
|---|
| | 797 | splitHtml[j] = finalURL |
|---|
| | 798 | |
|---|
| | 799 | def updatePilotXMLs(xmlDatas, baseURLs, srcs): |
|---|
| | 800 | for absURL in xmlDatas: |
|---|
| | 801 | splitXML = xmlDatas[absURL]['split'] |
|---|
| | 802 | xmlInfoList = xmlDatas[absURL]['info'] |
|---|
| | 803 | for j in range(len(splitXML)): |
|---|
| | 804 | if xmlInfoList[j] in ('src', 'XMLmp3'): |
|---|
| | 805 | absURL2, flash = getAbsoluteURL(splitXML[j], xmlInfoList[j]) |
|---|
| | 806 | src = srcs[absURL2] |
|---|
| | 807 | if len(src['usedBy']) > 1 and SHARED == 'True': |
|---|
| | 808 | splitXML[j] = '../../' + src['finalURL'] |
|---|
| | 809 | else: |
|---|
| | 810 | splitXML[j] = src['finalURL'] |
|---|
| | 811 | |
|---|
| | 812 | def addHtmlsToZip(splitHtmls, zip, baseDirs = None): |
|---|
| | 813 | i = 0 |
|---|
| | 814 | baseDir = '' |
|---|
| | 815 | for splitHtml in splitHtmls: |
|---|
| | 816 | if isinstance(baseDirs, (list, tuple)): |
|---|
| | 817 | baseDir = baseDirs[i] |
|---|
| | 818 | zip.writestr(baseDir + 'index.html', ''.join(splitHtml)) |
|---|
| | 819 | i += 1 |
|---|
| | 820 | |
|---|
| | 821 | def addPilotXMLsToZip(xmlDatas, srcs, zip): |
|---|
| | 822 | for absURL in xmlDatas: |
|---|
| 668 | | xmlData['split'], xmlData['info'], baseURL = self._splitHTML(xml, '') |
|---|
| 669 | | xmlData['fileName'] = src['fileName'] |
|---|
| 670 | | xmlData['baseDirs'] = [] |
|---|
| 671 | | for htmlIndex in src['usedBy']: |
|---|
| 672 | | self._processHtml(xmlData['split'], xmlData['info'], srcs, {}, baseURLs[htmlIndex], htmlIndex) |
|---|
| 673 | | xmlData['baseDirs'].append(baseDirs[htmlIndex]) |
|---|
| 674 | | except (HTTPError, URLError): |
|---|
| 675 | | print "File download error: " + absURL |
|---|
| 676 | | return xmlDatas |
|---|
| 677 | | |
|---|
| 678 | | def _downloadFiles(self, srcs, zip, baseDirs = []): |
|---|
| 679 | | def _addExtension(fileName, extension): |
|---|
| 680 | | name, ext = self._splitFileName(fileName) |
|---|
| 681 | | if ext == '' and extension != '': |
|---|
| 682 | | fileName = '%s.%s' % (fileName, extension) |
|---|
| 683 | | return fileName |
|---|
| 684 | | for absURL in srcs: |
|---|
| 685 | | src = srcs[absURL] |
|---|
| 686 | | data = '' |
|---|
| 687 | | mediaType = '' |
|---|
| 688 | | subType = '' |
|---|
| 689 | | try: |
|---|
| 690 | | if src['extension'] != 'xml': |
|---|
| 691 | | file = urlopen(absURL) |
|---|
| 692 | | data = file.read() |
|---|
| 693 | | mediaType, subType = file.headers.getheader('Content-Type').split('/') |
|---|
| 694 | | file.close() |
|---|
| 695 | | subType = subType.split(';')[0] |
|---|
| 696 | | if mediaType == 'image': |
|---|
| 697 | | if subType in ('jpeg', 'pjpeg'): |
|---|
| 698 | | src['extension'] = 'jpg' |
|---|
| 699 | | elif subType == 'x-ms-bmp': |
|---|
| 700 | | src['extension'] = 'bmp' |
|---|
| 701 | | elif subType in ('png', 'gif'): |
|---|
| 702 | | src['extension'] = subType |
|---|
| 703 | | elif subType == 'x-png': |
|---|
| 704 | | src['extension'] = 'png' |
|---|
| 705 | | if mediaType == 'application' and subType == 'x-shockwave-flash': |
|---|
| 706 | | src['extension'] = 'swf' |
|---|
| 707 | | if mediaType == 'video': |
|---|
| 708 | | if subType in ('x-msvideo', 'avi'): |
|---|
| 709 | | src['extension'] = 'avi' |
|---|
| 710 | | elif subType == 'x-ms-wmv': |
|---|
| 711 | | src['extension'] = 'wmv' |
|---|
| 712 | | elif subType == 'mpeg': |
|---|
| 713 | | src['extension'] = 'mpg' |
|---|
| 714 | | elif subType == 'mp4': |
|---|
| 715 | | src['extension'] = subType |
|---|
| 716 | | elif subType == 'quicktime': |
|---|
| 717 | | src['extension'] = 'mov' |
|---|
| 718 | | except (HTTPError, URLError): |
|---|
| 719 | | print "File download error: " + absURL |
|---|
| 720 | | |
|---|
| 721 | | fileName = _addExtension(src['fileName'], src['extension']) |
|---|
| 722 | | if len(src['usedBy']) > 1: |
|---|
| 723 | | src['finalURL'] = '_SharedFiles/' + fileName |
|---|
| 724 | | if src['extension'] != 'xml': |
|---|
| 725 | | zip.writestr((src['finalURL']).encode('latin-1'), data) |
|---|
| 726 | | else: |
|---|
| 727 | | src['finalURL'] = '_Files/' + fileName |
|---|
| 728 | | if src['extension'] != 'xml': |
|---|
| 729 | | for i in src['usedBy']: |
|---|
| 730 | | zip.writestr((baseDirs[i] + src['finalURL']).encode('latin-1'), data) |
|---|
| 731 | | |
|---|
| 732 | | def _processHrefs(self, hrefs, srcs, htmlURLs, baseDirs, portalURL): |
|---|
| 733 | | downloadURLs = [] |
|---|
| 734 | | for absURL in srcs: |
|---|
| 735 | | fileName = absURL.split('?')[0].split('/') |
|---|
| 736 | | domain = '/'.join(fileName[:-1]) |
|---|
| 737 | | fileName = fileName[-1] |
|---|
| 738 | | downloadURLs.append('%s/at_download/%s' % (domain, fileName)) |
|---|
| 739 | | for absURL in hrefs: |
|---|
| 740 | | href = hrefs[absURL] |
|---|
| 741 | | if absURL in htmlURLs: |
|---|
| 742 | | href['finalURL'] = 'href="%sindex.html"' % baseDirs[htmlURLs.index(absURL)] |
|---|
| 743 | | elif absURL + '/' in htmlURLs: |
|---|
| 744 | | href['finalURL'] = 'href="%sindex.html"' % baseDirs[htmlURLs.index(absURL + '/')] |
|---|
| 745 | | elif absURL in srcs: |
|---|
| 746 | | href['finalURL'] = srcs[absURL]['finalURL'] |
|---|
| 747 | | href['type'] = 1 |
|---|
| 748 | | elif absURL in downloadURLs: |
|---|
| 749 | | href['finalURL'] = srcs[absURL.replace('/at_download', '')]['finalURL'] |
|---|
| 750 | | href['type'] = 1 |
|---|
| 751 | | elif absURL.startswith(portalURL): |
|---|
| 752 | | href['finalURL'] = '' |
|---|
| 753 | | else: |
|---|
| 754 | | href['finalURL'] = 'href="%s"' % absURL |
|---|
| 755 | | |
|---|
| 756 | | def _updateHtmls(self, splitHtmls, infoLists, baseURLs, baseDirs, srcs, hrefs): |
|---|
| 757 | | for i in range(len(splitHtmls)): |
|---|
| 758 | | splitHtml = splitHtmls[i] |
|---|
| 759 | | infoList = infoLists[i] |
|---|
| 760 | | for j in range(len(splitHtml)): |
|---|
| 761 | | if infoList[j] in ('src', 'jsfname', 'fvars'): |
|---|
| 762 | | absURL, flash = self._getAbsoluteURL(splitHtml[j], infoList[j], baseURLs[i]) |
|---|
| | 824 | xml = ''.join(xmlData['split']) |
|---|
| | 825 | for baseDir in xmlData['baseDirs']: |
|---|
| | 826 | zip.writestr(baseDir + srcs[absURL]['finalURL'], xml.decode('latin_1').encode('utf_16')) |
|---|
| | 827 | break |
|---|
| | 828 | |
|---|
| | 829 | def addSCORMFiles(version, htmlList, srcs, zip): |
|---|
| | 830 | def addMetadata(manifest, obj, tabs): |
|---|
| | 831 | language = obj.Language() |
|---|
| | 832 | if language == '': |
|---|
| | 833 | language = 'en' |
|---|
| | 834 | manifest += '%s<metadata>\n' % (tabs * '\t') |
|---|
| | 835 | manifest += '%s<schema>ADL SCORM</schema>\n' % ((tabs + 1) * '\t') |
|---|
| | 836 | manifest += '%s<schemaversion>1.2</schemaversion>\n' % ((tabs + 1) * '\t') |
|---|
| | 837 | manifest += '%s<imsmd:lom>\n' % ((tabs + 1) * '\t') |
|---|
| | 838 | manifest += '%s<general>\n' % ((tabs + 2) * '\t') |
|---|
| | 839 | manifest += '%s<title>\n' % ((tabs + 3) * '\t') |
|---|
| | 840 | manifest += '%s<langstring xml:lang="%s">%s</langstring>\n' % (((tabs + 4) * '\t'), language, obj.TitleOrId()) |
|---|
| | 841 | manifest += '%s</title>\n' % ((tabs + 3) * '\t') |
|---|
| | 842 | manifest += '%s<language>%s</language>\n' % (((tabs + 3) * '\t'), language) |
|---|
| | 843 | tags = obj.getTags() |
|---|
| | 844 | for tag in tags: |
|---|
| | 845 | manifest += '%s<keyword>\n' % ((tabs + 3) * '\t') |
|---|
| | 846 | manifest += '%s<langstring xml:lang="%s">%s</langstring>\n' % (((tabs + 4) * '\t'), language, tag) |
|---|
| | 847 | manifest += '%s</keyword>\n' % ((tabs + 3) * '\t') |
|---|
| | 848 | manifest += '%s</general>\n' % ((tabs + 2) * '\t') |
|---|
| | 849 | manifest += '%s<lifecycle>\n' % ((tabs + 2) * '\t') |
|---|
| | 850 | manifest += '%s<version>\n' % ((tabs + 3) * '\t') |
|---|
| | 851 | manifest += '%s<langstring xml:lang="%s">%s</langstring>\n' % (((tabs + 4) * '\t'), language, obj.getLatestEditDate()) |
|---|
| | 852 | manifest += '%s</version>\n' % ((tabs + 3) * '\t') |
|---|
| | 853 | manifest += '%s</lifecycle>\n' % ((tabs + 2) * '\t') |
|---|
| | 854 | manifest += '%s</imsmd:lom>\n' % ((tabs + 1) * '\t') |
|---|
| | 855 | manifest += '%s</metadata>\n\n' % (tabs * '\t') |
|---|
| | 856 | return manifest |
|---|
| | 857 | |
|---|
| | 858 | manifest = '<?xml version="1.0" encoding="utf-8"?>\n' |
|---|
| | 859 | manifest += '<manifest identifier="%s" version="%s" xmlns="http://www.imsproject.org/xsd/imscp_rootv1p1p2" xmlns:imsmd="http://www.imsglobal.org/xsd/imsmd_rootv1p2p1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:adlcp="http://www.adlnet.org/xsd/adlcp_rootv1p2" xsi:schemaLocation="http://www.imsproject.org/xsd/imscp_rootv1p1p2 imscp_rootv1p1p2.xsd http://www.imsglobal.org/xsd/imsmd_rootv1p2p1 imsmd_rootv1p2p1.xsd http://www.adlnet.org/xsd/adlcp_rootv1p2 adlcp_rootv1p2.xsd">\n\n' % (htmlList[0][0].getId(), version) |
|---|
| | 860 | manifest = addMetadata(manifest, self, 1) |
|---|
| | 861 | manifest += '\t<organizations default="ORGANIZATION_%s">\n' % htmlList[0][0].getId() |
|---|
| | 862 | manifest += '\t\t<title>%s</title>\n' % htmlList[0][0].TitleOrId() |
|---|
| | 863 | manifest += '\t\t<organization identifier="ORGANIZATION_%s" structure="linear">\n' % htmlList[0][0].getId() |
|---|
| | 864 | for obj in htmlList: |
|---|
| | 865 | manifest += '\t\t\t<item identifier="ITEM_%s" identifierref="RESOURCE_%s" isvisible="true">\n' % (obj[0].getId(), obj[0].getId()) |
|---|
| | 866 | manifest += '\t\t\t\t<title>%s</title>\n' % obj[0].TitleOrId() |
|---|
| | 867 | manifest = addMetadata(manifest, obj[0], 4) |
|---|
| | 868 | manifest += '\t\t\t</item>\n' |
|---|
| | 869 | manifest += '\t\t</organization>\n' |
|---|
| | 870 | manifest += '\t</organizations>\n\n' |
|---|
| | 871 | manifest += '\t<resources>\n' |
|---|
| | 872 | for i in range(len(htmlList)): |
|---|
| | 873 | manifest += '\t\t<resource identifier="RESOURCE_%s" type="webcontent" adlcp:scormtype="sco" href="%sindex.html">\n' % (htmlList[i][0].getId(), htmlList[i][1]) |
|---|
| | 874 | manifest += '\t\t\t<file href="%sindex.html" />\n' % htmlList[i][1] |
|---|
| | 875 | for absURL in srcs: |
|---|
| 788 | | finalURL = 'href="%s"' % href['finalURL'] |
|---|
| 789 | | else: |
|---|
| 790 | | finalURL = href['finalURL'] |
|---|
| 791 | | splitHtml[j] = finalURL |
|---|
| 792 | | |
|---|
| 793 | | def _updatePilotXMLs(self, xmlDatas, baseURLs, srcs): |
|---|
| 794 | | for absURL in xmlDatas: |
|---|
| 795 | | splitXML = xmlDatas[absURL]['split'] |
|---|
| 796 | | xmlInfoList = xmlDatas[absURL]['info'] |
|---|
| 797 | | for j in range(len(splitXML)): |
|---|
| 798 | | if xmlInfoList[j] in ('src', 'XMLmp3'): |
|---|
| 799 | | absURL2, flash = self._getAbsoluteURL(splitXML[j], xmlInfoList[j]) |
|---|
| 800 | | src = srcs[absURL2] |
|---|
| 801 | | if len(src['usedBy']) > 1: |
|---|
| 802 | | splitXML[j] = '../../' + src['finalURL'] |
|---|
| 803 | | else: |
|---|
| 804 | | splitXML[j] = src['finalURL'] |
|---|
| 805 | | |
|---|
| 806 | | def _addHtmlsToZip(self, splitHtmls, zip, baseDirs = None): |
|---|
| 807 | | i = 0 |
|---|
| 808 | | baseDir = '' |
|---|
| 809 | | for splitHtml in splitHtmls: |
|---|
| 810 | | if isinstance(baseDirs, (list, tuple)): |
|---|
| 811 | | baseDir = baseDirs[i] |
|---|
| 812 | | zip.writestr(baseDir + 'index.html', ''.join(splitHtml)) |
|---|
| 813 | | i += 1 |
|---|
| 814 | | |
|---|
| 815 | | def _addPilotXMLsToZip(self, xmlDatas, srcs, zip): |
|---|
| 816 | | for absURL in xmlDatas: |
|---|
| 817 | | xmlData = xmlDatas[absURL] |
|---|
| 818 | | xml = ''.join(xmlData['split']).encode('utf-16') |
|---|
| 819 | | for baseDir in xmlData['baseDirs']: |
|---|
| 820 | | zip.writestr(baseDir + srcs[absURL]['finalURL'], xml) |
|---|
| 821 | | break |
|---|
| 822 | | |
|---|
| 823 | | def buildZip(self): |
|---|
| 824 | | """ Builds a Zip """ |
|---|
| | 881 | finalURL = htmlList[i][1] + src['finalURL'] |
|---|
| | 882 | manifest += '\t\t\t<file href="%s" />\n' % finalURL |
|---|
| | 883 | manifest += '\t\t</resource>\n' |
|---|
| | 884 | manifest += '\t</resources>\n\n' |
|---|
| | 885 | manifest += '</manifest>' |
|---|
| | 886 | zip.writestr('imsmanifest.xml', manifest) |
|---|
| | 887 | for file in ('adlcp_rootv1p2.xsd', 'ims_xml.xsd', 'imscp_rootv1p1p2.xsd', 'imsmd_rootv1p2p1.xsd'): |
|---|
| | 888 | f = open(REPOSITORY + file, 'r') |
|---|
| | 889 | zip.writestr(file, f.read()) |
|---|
| | 890 | f.close() |
|---|
| | 891 | |
|---|
| 852 | | self._downloadFiles(srcs, zip, baseDirs) |
|---|
| 853 | | self._processHrefs(hrefs, srcs, htmlURLs, baseDirs, self.portal_url()) |
|---|
| 854 | | self._updateHtmls(splitHtmls, infoLists, baseURLs, baseDirs, srcs, hrefs) |
|---|
| 855 | | self._updatePilotXMLs(xmlDatas, baseURLs, srcs) |
|---|
| 856 | | self._addHtmlsToZip(splitHtmls, zip, baseDirs) |
|---|
| 857 | | self._addPilotXMLsToZip(xmlDatas, srcs, zip) |
|---|
| | 920 | downloadFiles(srcs, zip, baseDirs) |
|---|
| | 921 | processHrefs(hrefs, srcs, htmlURLs, baseDirs, self.portal_url()) |
|---|
| | 922 | updateHtmls(splitHtmls, infoLists, baseURLs, baseDirs, srcs, hrefs) |
|---|
| | 923 | updatePilotXMLs(xmlDatas, baseURLs, srcs) |
|---|
| | 924 | addHtmlsToZip(splitHtmls, zip, baseDirs) |
|---|
| | 925 | addPilotXMLsToZip(xmlDatas, srcs, zip) |
|---|
| | 926 | if SCORM == 'True': |
|---|
| | 927 | addSCORMFiles(str(max(times)), htmlList, srcs, zip) |
|---|