| 497 | | # print self.Title() |
|---|
| 498 | | # print self.getLearningStoryText() |
|---|
| 499 | | |
|---|
| 500 | | def do_stuff(html, _dir): |
|---|
| 501 | | def get_absolute_url(url): |
|---|
| 502 | | print '//// relative url:', url |
|---|
| 503 | | try: |
|---|
| 504 | | xurl = base + '/' + url |
|---|
| 505 | | src = urlopen(xurl) |
|---|
| 506 | | except HTTPError: |
|---|
| 507 | | print '----', xurl |
|---|
| 508 | | xurl = self.absolute_url() + '/' + url |
|---|
| 509 | | print '||||', xurl |
|---|
| 510 | | src = urlopen(xurl) |
|---|
| 511 | | return xurl, src |
|---|
| 512 | | |
|---|
| 513 | | def _do_src_suff(html): # change src urls, download and put them in zip |
|---|
| 514 | | l = sre.split('(<[^>]+)src="(.*?)"', html) |
|---|
| 515 | | |
|---|
| 516 | | srcs = {} |
|---|
| 517 | | newnames = {} |
|---|
| 518 | | src_urls = l[2::3] |
|---|
| 519 | | for url in src_urls: |
|---|
| 520 | | if not srcs.has_key(url): |
|---|
| | 497 | |
|---|
| | 498 | def do_stuff(html, cache): # change src urls, download and put them in zip |
|---|
| | 499 | # get and remove base url from html |
|---|
| | 500 | split_html = sre.split('(<base href=")(.*?)(".*?/>)', html) |
|---|
| | 501 | base = split_html[2] |
|---|
| | 502 | del split_html[1:4] |
|---|
| | 503 | html = ''.join(split_html) |
|---|
| | 504 | |
|---|
| | 505 | split_html = sre.split('(<[^>]+)src="(.*?)"', html) |
|---|
| | 506 | |
|---|
| | 507 | srcs = {} |
|---|
| | 508 | newnames = {} |
|---|
| | 509 | src_urls = split_html[2::3] |
|---|
| | 510 | for url in src_urls: |
|---|
| | 511 | if not srcs.has_key(url): |
|---|
| | 512 | if cache.has_key(url): |
|---|
| | 513 | filename, content = cache[url] |
|---|
| | 514 | srcs[url] = (filename, content) |
|---|
| | 515 | print 'From cache:', url |
|---|
| | 516 | else: |
|---|
| 550 | | s = ['%s%ssrc="%s"' % (stuff, tag, srcs.get(link, (link,))[0]) for stuff, tag, link in zip(l[::3], l[1::3], l[2::3])] |
|---|
| 551 | | s.append(l[-1]) |
|---|
| 552 | | return ''.join(s), srcs |
|---|
| 553 | | |
|---|
| 554 | | def _get_refs_and_rewrite(html, regex, start_idx, step): |
|---|
| 555 | | srcs = {} |
|---|
| 556 | | l = sre.split(regex, html) |
|---|
| 557 | | for url in l[start_idx::step]: |
|---|
| 558 | | if not srcs.has_key(url): |
|---|
| 559 | | xurl = url |
|---|
| 560 | | try: |
|---|
| 561 | | if not url.startswith('http://'): |
|---|
| 562 | | xurl, src = get_absolute_url(url) |
|---|
| 563 | | else: |
|---|
| 564 | | src = urlopen(xurl) |
|---|
| 565 | | print xurl |
|---|
| 566 | | filename = src.url.split('/')[-1] |
|---|
| 567 | | srcs[url] = (filename, src.read()) |
|---|
| 568 | | except HTTPError, e: |
|---|
| 569 | | print "Failed to download %s: %s" % (xurl, e) |
|---|
| 570 | | return srcs, l |
|---|
| 571 | | |
|---|
| 572 | | def _do_css_link_stuff(html): |
|---|
| 573 | | srcs, l = _get_refs_and_rewrite(html, '(<link[^>]+)href="(.*?\.css)"', 2, 3) |
|---|
| 574 | | |
|---|
| 575 | | s = ['%s%shref="%s"' % (stuff, tag, srcs.get(link, (link,))[0]) for stuff, tag, link in zip(l[::3], l[1::3], l[2::3])] |
|---|
| 576 | | s.append(l[-1]) |
|---|
| 577 | | return ''.join(s), srcs |
|---|
| 578 | | |
|---|
| 579 | | def _do_css_import_stuff(html): |
|---|
| 580 | | srcs, l = _get_refs_and_rewrite(html, '(<style[^>]+>.*?@import url)\((.*?)\)', 2, 3) |
|---|
| 581 | | |
|---|
| 582 | | s = ['%s%s(%s)' % (stuff, tag, srcs.get(link, (link,))[0]) for stuff, tag, link in zip(l[::3], l[1::3], l[2::3])] |
|---|
| 583 | | s.append(l[-1]) |
|---|
| 584 | | return ''.join(s), srcs |
|---|
| 585 | | |
|---|
| 586 | | def _do_css_stuff(css): |
|---|
| 587 | | srcs, l = _get_refs_and_rewrite(css, 'url\([\'"]?(.*?)[\'"]?\)', 1, 2) |
|---|
| 588 | | |
|---|
| 589 | | s = ['%surl(%s)' % (stuff, srcs.get(link, (link,))[0]) for stuff, link in zip(l[::2], l[1::2])] |
|---|
| 590 | | s.append(l[-1]) |
|---|
| 591 | | return ''.join(s), srcs |
|---|
| 592 | | |
|---|
| 593 | | def _get_and_remove_base(html): |
|---|
| 594 | | l = sre.split('(<base href=")(.*?)(".*?/>)', html) |
|---|
| 595 | | base = l[2] |
|---|
| 596 | | del l[1:4] |
|---|
| 597 | | return base, ''.join(l) |
|---|
| 598 | | |
|---|
| 599 | | # FIXME srcs-ben nincs-e utkozes... |
|---|
| 600 | | # FIXME <base> tageket ki kell irtani |
|---|
| 601 | | # vagy inkabb ez alapjan osszeszedni a cuccokat |
|---|
| 602 | | base, html = _get_and_remove_base(html) |
|---|
| 603 | | print '++++', base |
|---|
| 604 | | html, srcs = _do_src_suff(html) |
|---|
| 605 | | html, new_srcs = _do_css_link_stuff(html) |
|---|
| 606 | | srcs.update(new_srcs) |
|---|
| 607 | | html, new_srcs = _do_css_import_stuff(html) |
|---|
| 608 | | srcs.update(new_srcs) |
|---|
| 609 | | for oldurl, (newurl, css) in srcs.items(): |
|---|
| 610 | | if newurl.endswith('.css'): |
|---|
| 611 | | new_css, new_srcs = _do_css_stuff(css) |
|---|
| 612 | | srcs.update(new_srcs) |
|---|
| 613 | | srcs[oldurl] = (newurl, new_css) |
|---|
| 614 | | |
|---|
| 615 | | print '\\\\\\\\ Adding content to zip' |
|---|
| 616 | | for oldname, (newname, content) in srcs.iteritems(): |
|---|
| 617 | | zf.writestr('%s/%s' % (_dir, newname), content) |
|---|
| 618 | | print oldname, '->', newname |
|---|
| 619 | | return html |
|---|
| | 547 | return srcs, split_html |
|---|
| | 548 | |
|---|
| | 549 | zipcontent = [] |
|---|
| | 550 | |
|---|
| | 551 | main_dir = self.getId() |
|---|
| | 552 | url_cache = {} |
|---|
| | 553 | zipcontent.append((main_dir, do_stuff(self.standalone_view(), url_cache))) |
|---|
| | 554 | for content in self.getResources(reftype='relatedContent'): |
|---|
| | 555 | zipcontent.append(('%s/content/%s' % (main_dir, content.getId()), do_stuff(content.standalone_view(), url_cache))) |
|---|
| | 556 | |
|---|
| | 557 | for method in self.getResources(reftype='relatedMethods'): |
|---|
| | 558 | zipcontent.append(('%s/methods/%s' % (main_dir, method.getId()), do_stuff(method.standalone_view(), url_cache))) |
|---|
| | 559 | |
|---|
| | 560 | for tool in self.getResources(reftype='relatedTools'): |
|---|
| | 561 | zipcontent.append(('%s/tools/%s' % (main_dir, tool.getId()), do_stuff(tool.standalone_view(), url_cache))) |
|---|
| | 562 | |
|---|
| | 563 | file_ref_count = {} |
|---|
| | 564 | for dir_, (srcs, split_html) in zipcontent: |
|---|
| | 565 | for url in srcs.iterkeys(): |
|---|
| | 566 | file_ref_count[url] = file_ref_count.get(url, 0) + 1 |
|---|
| 624 | | main_dir = self.getId() |
|---|
| 625 | | print '%s/index.html' % main_dir |
|---|
| 626 | | zf.writestr('%s/index.html' % main_dir, do_stuff(self.standalone_view(), main_dir)) |
|---|
| 627 | | for content in self.getResources(reftype='relatedContent'): |
|---|
| 628 | | print '%s/content/%s/index.html' % (main_dir, content.getId()) |
|---|
| 629 | | zf.writestr('%s/content/%s/index.html' % (main_dir, content.getId()), do_stuff(content.standalone_view(), '%s/content/%s' % (main_dir, content.getId()))) |
|---|
| 630 | | |
|---|
| 631 | | for method in self.getResources(reftype='relatedMethods'): |
|---|
| 632 | | print '%s/methods/%s/index.html' % (main_dir, method.getId()) |
|---|
| 633 | | zf.writestr('%s/methods/%s/index.html' % (main_dir, method.getId()), do_stuff(method.standalone_view(), '%s/methods/%s' % (main_dir, method.getId()))) |
|---|
| 634 | | |
|---|
| 635 | | for tool in self.getResources(reftype='relatedTools'): |
|---|
| 636 | | print '%s/tools/%s/index.html' % (main_dir, tool.getId()) |
|---|
| 637 | | zf.writestr('%s/tools/%s/index.html' % (main_dir, tool.getId()), do_stuff(tool.standalone_view(), '%s/tools/%s' % (main_dir, tool.getId()))) |
|---|
| 638 | | |
|---|
| 639 | | print '-------------- Done ------------' |
|---|
| | 571 | common_urls = [key for key, val in file_ref_count.iteritems() if val > 1] |
|---|
| | 572 | for dir_, (srcs, split_html) in zipcontent: |
|---|
| | 573 | for url, (newname, content) in srcs.items(): |
|---|
| | 574 | if url in common_urls: |
|---|
| | 575 | srcs[url] = ('%s/common_files/%s' % (main_dir, newname), content) |
|---|
| | 576 | else: |
|---|
| | 577 | srcs[url] = ('%s/%s' % (dir_, newname), content) |
|---|
| | 578 | |
|---|
| | 579 | def calc_url(link): |
|---|
| | 580 | url = srcs.get(link, (link,))[0] |
|---|
| | 581 | depth = dir_.count('/') |
|---|
| | 582 | return "../" * depth + url.split('/', 1)[1] |
|---|
| | 583 | |
|---|
| | 584 | new_html_l = ['%s%ssrc="%s"' % (stuff, tag, calc_url(link)) for stuff, tag, link in zip(split_html[::3], split_html[1::3], split_html[2::3])] |
|---|
| | 585 | new_html_l.append(split_html[-1]) |
|---|
| | 586 | zf.writestr(dir_ + '/index.html', ''.join(new_html_l)) |
|---|
| | 587 | |
|---|
| | 588 | in_zip = {} |
|---|
| | 589 | for dir_, (srcs, split_html) in zipcontent: |
|---|
| | 590 | for url, (newname, content) in srcs.items(): |
|---|
| | 591 | if not in_zip.has_key(newname): |
|---|
| | 592 | in_zip[newname] = content |
|---|
| | 593 | zf.writestr(newname, content) |
|---|
| | 594 | else: |
|---|
| | 595 | assert in_zip[newname] == content |
|---|
| | 596 | |
|---|
| | 597 | print '-------------- _buildZIP done ------------' |
|---|