Extract XHTML-IM inline imags by default
- Add two new options: tmp_image_dir and extract_inline_images - tmp_image_dir is $XDG_CACHE_HOME(usually ~/.cache)/poezio/images if unset - Name the images from a SHA-1 of their data and their mimetype - Output file:// links inside the message
This commit is contained in:
parent
d4590949f7
commit
a9f642f743
6 changed files with 104 additions and 14 deletions
|
@ -379,6 +379,14 @@ ack_message_receipts = true
|
|||
# Ask for message delivery receipts (XEP-0184)
|
||||
request_message_receipts = true
|
||||
|
||||
# Extract base64 images received in XHTML-IM messages
|
||||
# if true.
|
||||
extract_inline_images = true
|
||||
|
||||
# The directory where the images will be saved; if unset,
|
||||
# defaults to $XDG_CACHE_HOME/poezio/images.
|
||||
tmp_image_dir =
|
||||
|
||||
# Receive the tune notifications or not (in order to display informations
|
||||
# in the roster).
|
||||
# If this is set to false, then the display_tune_notifications
|
||||
|
|
|
@ -851,6 +851,25 @@ Other
|
|||
|
||||
The lang some automated entities will use when replying to you.
|
||||
|
||||
extract_inline_images
|
||||
|
||||
**Default value:** ``true``
|
||||
|
||||
Some clients send inline images in base64 inside some messages, which results in
|
||||
an useless wall of text. If this option is ``true``, then that base64 text will
|
||||
be replaced with a :file:`file://` link to the image file extracted in
|
||||
:term:`tmp_image_dir` or :file:`$XDG_CACHE_HOME/poezio/images` by default, which
|
||||
is usually :file:`~/.cache/poezio/images`
|
||||
|
||||
tmp_image_dir
|
||||
|
||||
**Default value:** ``[empty]``
|
||||
|
||||
The directory where poezio will save the images received, if
|
||||
:term:`extract_inline_images` is set to true. If unset, poezio
|
||||
will default to :file:`$XDG_CACHE_HOME/poezio/images` which is
|
||||
usually :file:`~/.cache/poezio/images`.
|
||||
|
||||
muc_history_length
|
||||
|
||||
**Default value:** ``50``
|
||||
|
|
|
@ -361,7 +361,6 @@ def file_ok(filepath):
|
|||
def check_create_config_dir():
|
||||
"""
|
||||
create the configuration directory if it doesn't exist
|
||||
and copy the default config in it
|
||||
"""
|
||||
CONFIG_HOME = environ.get("XDG_CONFIG_HOME")
|
||||
if not CONFIG_HOME:
|
||||
|
@ -374,6 +373,23 @@ def check_create_config_dir():
|
|||
pass
|
||||
return CONFIG_PATH
|
||||
|
||||
def check_create_cache_dir():
|
||||
"""
|
||||
create the cache directory if it doesn't exist
|
||||
also create the subdirectories
|
||||
"""
|
||||
global CACHE_DIR
|
||||
CACHE_HOME = environ.get("XDG_CACHE_HOME")
|
||||
if not CACHE_HOME:
|
||||
CACHE_HOME = path.join(environ.get('HOME'), '.cache')
|
||||
CACHE_DIR = path.join(CACHE_HOME, 'poezio')
|
||||
|
||||
try:
|
||||
makedirs(CACHE_DIR)
|
||||
makedirs(path.join(CACHE_DIR, 'images'))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def run_cmdline_args(CONFIG_PATH):
|
||||
"Parse the command line arguments"
|
||||
global options
|
||||
|
@ -495,3 +511,6 @@ safeJID = None
|
|||
|
||||
# the global log dir
|
||||
LOG_DIR = ''
|
||||
|
||||
# the global cache dir
|
||||
CACHE_DIR = ''
|
||||
|
|
|
@ -10,6 +10,7 @@ import ssl
|
|||
import time
|
||||
from hashlib import sha1, sha512
|
||||
from gettext import gettext as _
|
||||
from os import path
|
||||
|
||||
from sleekxmpp import InvalidJID
|
||||
from sleekxmpp.stanza import Message
|
||||
|
@ -24,7 +25,7 @@ import windows
|
|||
import xhtml
|
||||
import multiuserchat as muc
|
||||
from common import safeJID
|
||||
from config import config
|
||||
from config import config, CACHE_DIR
|
||||
from contact import Resource
|
||||
from logger import logger
|
||||
from roster import roster
|
||||
|
@ -178,7 +179,11 @@ def on_normal_message(self, message):
|
|||
return self.information('%s says: %s' % (message['from'], message['body']), 'Headline')
|
||||
|
||||
use_xhtml = config.get('enable_xhtml_im', True)
|
||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
|
||||
tmp_dir = config.get('tmp_image_dir', '') or path.join(CACHE_DIR, 'images')
|
||||
extract_images = config.get('extract_inline_images', True)
|
||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
|
||||
tmp_dir=tmp_dir,
|
||||
extract_images=extract_images)
|
||||
if not body:
|
||||
return
|
||||
|
||||
|
@ -223,7 +228,9 @@ def on_normal_message(self, message):
|
|||
self.events.trigger('conversation_msg', message, conversation)
|
||||
if not message['body']:
|
||||
return
|
||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
|
||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
|
||||
tmp_dir=tmp_dir,
|
||||
extract_images=extract_images)
|
||||
delayed, date = common.find_delayed_tag(message)
|
||||
|
||||
def try_modify():
|
||||
|
@ -441,7 +448,11 @@ def on_groupchat_message(self, message):
|
|||
|
||||
self.events.trigger('muc_msg', message, tab)
|
||||
use_xhtml = config.get('enable_xhtml_im', True)
|
||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
|
||||
tmp_dir = config.get('tmp_image_dir', '') or path.join(CACHE_DIR, 'images')
|
||||
extract_images = config.get('extract_inline_images', True)
|
||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
|
||||
tmp_dir=tmp_dir,
|
||||
extract_images=extract_images)
|
||||
if not body:
|
||||
return
|
||||
|
||||
|
@ -498,7 +509,11 @@ def on_groupchat_private_message(self, message):
|
|||
|
||||
room_from = jid.bare
|
||||
use_xhtml = config.get('enable_xhtml_im', True)
|
||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
|
||||
tmp_dir = config.get('tmp_image_dir', '') or path.join(CACHE_DIR, 'images')
|
||||
extract_images = config.get('extract_inline_images', True)
|
||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
|
||||
tmp_dir=tmp_dir,
|
||||
extract_images=extract_images)
|
||||
tab = self.get_tab_by_name(jid.full, tabs.PrivateTab) # get the tab with the private conversation
|
||||
ignore = config.get_by_tabname('ignore_private', False, room_from)
|
||||
if not tab: # It's the first message we receive: create the tab
|
||||
|
@ -511,7 +526,9 @@ def on_groupchat_private_message(self, message):
|
|||
self.xmpp.send_message(mto=jid.full, mbody=msg, mtype='chat')
|
||||
return
|
||||
self.events.trigger('private_msg', message, tab)
|
||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
|
||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
|
||||
tmp_dir=tmp_dir,
|
||||
extract_images=extract_images)
|
||||
if not body or not tab:
|
||||
return
|
||||
replaced_id = message['replace']['id']
|
||||
|
|
|
@ -30,6 +30,7 @@ def main():
|
|||
config.run_cmdline_args(config_path)
|
||||
config.create_global_config()
|
||||
config.check_create_log_dir()
|
||||
config.check_create_cache_dir()
|
||||
config.setup_logging()
|
||||
config.post_logging_setup()
|
||||
|
||||
|
|
40
src/xhtml.py
40
src/xhtml.py
|
@ -12,9 +12,13 @@ xhtml code to shell colors,
|
|||
poezio colors to xhtml code
|
||||
"""
|
||||
|
||||
import re
|
||||
import base64
|
||||
import curses
|
||||
import hashlib
|
||||
import re
|
||||
from os import path
|
||||
from sleekxmpp.xmlstream import ET
|
||||
from urllib.parse import unquote
|
||||
|
||||
from io import BytesIO
|
||||
from xml import sax
|
||||
|
@ -178,10 +182,12 @@ colors = {
|
|||
whitespace_re = re.compile(r'\s+')
|
||||
|
||||
xhtml_attr_re = re.compile(r'\x19-?\d[^}]*}|\x19[buaio]')
|
||||
xhtml_data_re = re.compile(r'data:image/([a-z]+);base64,(.+)')
|
||||
|
||||
xhtml_simple_attr_re = re.compile(r'\x19\d')
|
||||
|
||||
def get_body_from_message_stanza(message, use_xhtml=False):
|
||||
def get_body_from_message_stanza(message, use_xhtml=False,
|
||||
tmp_dir=None, extract_images=False):
|
||||
"""
|
||||
Returns a string with xhtml markups converted to
|
||||
poezio colors if there's an xhtml_im element, or
|
||||
|
@ -191,7 +197,8 @@ def get_body_from_message_stanza(message, use_xhtml=False):
|
|||
xhtml = message['html'].xml
|
||||
xhtml_body = xhtml.find('{http://www.w3.org/1999/xhtml}body')
|
||||
if xhtml_body:
|
||||
content = xhtml_to_poezio_colors(xhtml_body)
|
||||
content = xhtml_to_poezio_colors(xhtml_body, tmp_dir=tmp_dir,
|
||||
extract_images=extract_images)
|
||||
content = content if content else message['body']
|
||||
return content or " "
|
||||
return message['body']
|
||||
|
@ -281,7 +288,7 @@ def trim(string):
|
|||
return re.sub(whitespace_re, ' ', string)
|
||||
|
||||
class XHTMLHandler(sax.ContentHandler):
|
||||
def __init__(self, force_ns=False):
|
||||
def __init__(self, force_ns=False, tmp_dir=None, extract_images=False):
|
||||
self.builder = []
|
||||
self.formatting = []
|
||||
self.attrs = []
|
||||
|
@ -291,6 +298,9 @@ class XHTMLHandler(sax.ContentHandler):
|
|||
# do not care about xhtml-in namespace
|
||||
self.force_ns = force_ns
|
||||
|
||||
self.tmp_dir = tmp_dir
|
||||
self.extract_images = extract_images
|
||||
|
||||
@property
|
||||
def result(self):
|
||||
return ''.join(self.builder).strip()
|
||||
|
@ -331,7 +341,22 @@ class XHTMLHandler(sax.ContentHandler):
|
|||
elif name == 'em':
|
||||
self.append_formatting('\x19i')
|
||||
elif name == 'img':
|
||||
builder.append(trim(attrs['src']))
|
||||
if re.match(xhtml_data_re, attrs['src']) and self.extract_images:
|
||||
type_, data = [i for i in re.split(xhtml_data_re, attrs['src']) if i]
|
||||
bin_data = base64.b64decode(unquote(data))
|
||||
filename = hashlib.sha1(bin_data).hexdigest() + '.' + type_
|
||||
filepath = path.join(self.tmp_dir, filename)
|
||||
if not path.exists(filepath):
|
||||
try:
|
||||
with open(filepath, 'wb') as fd:
|
||||
fd.write(bin_data)
|
||||
builder.append('file://%s' % filepath)
|
||||
except Exception as e:
|
||||
builder.append('[Error while saving image: %s]' % e)
|
||||
else:
|
||||
builder.append('file://%s' % filepath)
|
||||
else:
|
||||
builder.append(trim(attrs['src']))
|
||||
if 'alt' in attrs:
|
||||
builder.append(' (%s)' % trim(attrs['alt']))
|
||||
elif name == 'ul':
|
||||
|
@ -389,13 +414,14 @@ class XHTMLHandler(sax.ContentHandler):
|
|||
if 'title' in attrs:
|
||||
builder.append(' [' + attrs['title'] + ']')
|
||||
|
||||
def xhtml_to_poezio_colors(xml, force=False):
|
||||
def xhtml_to_poezio_colors(xml, force=False, tmp_dir=None, extract_images=None):
|
||||
if isinstance(xml, str):
|
||||
xml = xml.encode('utf8')
|
||||
elif not isinstance(xml, bytes):
|
||||
xml = ET.tostring(xml)
|
||||
|
||||
handler = XHTMLHandler(force_ns=force)
|
||||
handler = XHTMLHandler(force_ns=force, tmp_dir=tmp_dir,
|
||||
extract_images=extract_images)
|
||||
parser = sax.make_parser()
|
||||
parser.setFeature(sax.handler.feature_namespaces, True)
|
||||
parser.setContentHandler(handler)
|
||||
|
|
Loading…
Reference in a new issue