xhtml: Simplify tmp_dir/extract_images into a single Option<str> parameter.

This commit is contained in:
Emmanuel Gil Peyrot 2018-07-04 12:25:41 +02:00
parent f0ad4b348b
commit 60ba8308fa
2 changed files with 22 additions and 27 deletions

View file

@ -274,13 +274,13 @@ class HandlerCore:
use_xhtml = config.get_by_tabname('enable_xhtml_im', use_xhtml = config.get_by_tabname('enable_xhtml_im',
message['from'].bare) message['from'].bare)
tmp_dir = None
if config.get('extract_inline_images'):
tmp_dir = config.get('tmp_image_dir') or path.join(CACHE_DIR, 'images') tmp_dir = config.get('tmp_image_dir') or path.join(CACHE_DIR, 'images')
extract_images = config.get('extract_inline_images')
body = xhtml.get_body_from_message_stanza( body = xhtml.get_body_from_message_stanza(
message, message,
use_xhtml=use_xhtml, use_xhtml=use_xhtml,
tmp_dir=tmp_dir, extract_images_to=tmp_dir)
extract_images=extract_images)
if not body: if not body:
if not self.core.xmpp.plugin['xep_0380'].has_eme(message): if not self.core.xmpp.plugin['xep_0380'].has_eme(message):
return return
@ -336,8 +336,7 @@ class HandlerCore:
body = xhtml.get_body_from_message_stanza( body = xhtml.get_body_from_message_stanza(
message, message,
use_xhtml=use_xhtml, use_xhtml=use_xhtml,
tmp_dir=tmp_dir, extract_images_to=tmp_dir)
extract_images=extract_images)
delayed, date = common.find_delayed_tag(message) delayed, date = common.find_delayed_tag(message)
def try_modify(): def try_modify():
@ -669,13 +668,13 @@ class HandlerCore:
self.core.events.trigger('muc_msg', message, tab) self.core.events.trigger('muc_msg', message, tab)
use_xhtml = config.get_by_tabname('enable_xhtml_im', room_from) use_xhtml = config.get_by_tabname('enable_xhtml_im', room_from)
tmp_dir = None
if config.get('extract_inline_images'):
tmp_dir = config.get('tmp_image_dir') or path.join(CACHE_DIR, 'images') tmp_dir = config.get('tmp_image_dir') or path.join(CACHE_DIR, 'images')
extract_images = config.get('extract_inline_images')
body = xhtml.get_body_from_message_stanza( body = xhtml.get_body_from_message_stanza(
message, message,
use_xhtml=use_xhtml, use_xhtml=use_xhtml,
tmp_dir=tmp_dir, extract_images_to=tmp_dir)
extract_images=extract_images)
if not body: if not body:
return return
@ -747,13 +746,13 @@ class HandlerCore:
room_from = jid.bare room_from = jid.bare
use_xhtml = config.get_by_tabname('enable_xhtml_im', jid.bare) use_xhtml = config.get_by_tabname('enable_xhtml_im', jid.bare)
tmp_dir = None
if config.get('extract_inline_images'):
tmp_dir = config.get('tmp_image_dir') or path.join(CACHE_DIR, 'images') tmp_dir = config.get('tmp_image_dir') or path.join(CACHE_DIR, 'images')
extract_images = config.get('extract_inline_images')
body = xhtml.get_body_from_message_stanza( body = xhtml.get_body_from_message_stanza(
message, message,
use_xhtml=use_xhtml, use_xhtml=use_xhtml,
tmp_dir=tmp_dir, extract_images_to=tmp_dir)
extract_images=extract_images)
tab = self.core.get_tab_by_name( tab = self.core.get_tab_by_name(
jid.full, jid.full,
tabs.PrivateTab) # get the tab with the private conversation tabs.PrivateTab) # get the tab with the private conversation
@ -774,8 +773,7 @@ class HandlerCore:
body = xhtml.get_body_from_message_stanza( body = xhtml.get_body_from_message_stanza(
message, message,
use_xhtml=use_xhtml, use_xhtml=use_xhtml,
tmp_dir=tmp_dir, extract_images_to=tmp_dir)
extract_images=extract_images)
if not body or not tab: if not body or not tab:
return return
replaced = False replaced = False

View file

@ -193,8 +193,7 @@ xhtml_simple_attr_re = re.compile(r'\x19\d')
def get_body_from_message_stanza(message, def get_body_from_message_stanza(message,
use_xhtml=False, use_xhtml=False,
tmp_dir=None, extract_images_to=None):
extract_images=False):
""" """
Returns a string with xhtml markups converted to Returns a string with xhtml markups converted to
poezio colors if there's an xhtml_im element, or poezio colors if there's an xhtml_im element, or
@ -209,7 +208,7 @@ def get_body_from_message_stanza(message,
if xhtml_body is None: if xhtml_body is None:
return message['body'] return message['body']
content = xhtml_to_poezio_colors( content = xhtml_to_poezio_colors(
xhtml_body, tmp_dir=tmp_dir, extract_images=extract_images) xhtml_body, tmp_dir=extract_images_to)
content = content if content else message['body'] content = content if content else message['body']
return content or " " return content or " "
@ -298,7 +297,7 @@ def get_hash(data: bytes) -> str:
class XHTMLHandler(sax.ContentHandler): class XHTMLHandler(sax.ContentHandler):
def __init__(self, force_ns=False, tmp_dir=None, extract_images=False): def __init__(self, force_ns=False, tmp_image_dir=None):
self.builder = [] self.builder = []
self.formatting = [] self.formatting = []
self.attrs = [] self.attrs = []
@ -308,8 +307,7 @@ class XHTMLHandler(sax.ContentHandler):
# do not care about xhtml-in namespace # do not care about xhtml-in namespace
self.force_ns = force_ns self.force_ns = force_ns
self.tmp_dir = tmp_dir self.tmp_image_dir = tmp_image_dir
self.extract_images = extract_images
self.enable_css_parsing = config.get('enable_css_parsing') self.enable_css_parsing = config.get('enable_css_parsing')
@property @property
@ -357,13 +355,13 @@ class XHTMLHandler(sax.ContentHandler):
elif name == 'em': elif name == 'em':
self.append_formatting('\x19i') self.append_formatting('\x19i')
elif name == 'img': elif name == 'img':
if re.match(xhtml_data_re, attrs['src']) and self.extract_images: if re.match(xhtml_data_re, attrs['src']) and self.tmp_image_dir is not None:
type_, data = [ type_, data = [
i for i in re.split(xhtml_data_re, attrs['src']) if i i for i in re.split(xhtml_data_re, attrs['src']) if i
] ]
bin_data = b64decode(unquote(data)) bin_data = b64decode(unquote(data))
filename = get_hash(bin_data) + '.' + type_ filename = get_hash(bin_data) + '.' + type_
filepath = path.join(self.tmp_dir, filename) filepath = path.join(self.tmp_image_dir, filename)
if not path.exists(filepath): if not path.exists(filepath):
try: try:
with open(filepath, 'wb') as fd: with open(filepath, 'wb') as fd:
@ -435,15 +433,14 @@ class XHTMLHandler(sax.ContentHandler):
builder.append(' [' + attrs['title'] + ']') builder.append(' [' + attrs['title'] + ']')
def xhtml_to_poezio_colors(xml, force=False, tmp_dir=None, def xhtml_to_poezio_colors(xml, force=False, tmp_dir=None):
extract_images=None):
if isinstance(xml, str): if isinstance(xml, str):
xml = xml.encode('utf8') xml = xml.encode('utf8')
elif not isinstance(xml, bytes): elif not isinstance(xml, bytes):
xml = ET.tostring(xml) xml = ET.tostring(xml)
handler = XHTMLHandler( handler = XHTMLHandler(
force_ns=force, tmp_dir=tmp_dir, extract_images=extract_images) force_ns=force, tmp_image_dir=tmp_dir)
parser = sax.make_parser() parser = sax.make_parser()
parser.setFeature(sax.handler.feature_namespaces, True) parser.setFeature(sax.handler.feature_namespaces, True)
parser.setContentHandler(handler) parser.setContentHandler(handler)