Extract XHTML-IM inline imags by default
- Add two new options: tmp_image_dir and extract_inline_images - tmp_image_dir is $XDG_CACHE_HOME(usually ~/.cache)/poezio/images if unset - Name the images from a SHA-1 of their data and their mimetype - Output file:// links inside the message
This commit is contained in:
parent
d4590949f7
commit
a9f642f743
6 changed files with 104 additions and 14 deletions
|
@ -379,6 +379,14 @@ ack_message_receipts = true
|
||||||
# Ask for message delivery receipts (XEP-0184)
|
# Ask for message delivery receipts (XEP-0184)
|
||||||
request_message_receipts = true
|
request_message_receipts = true
|
||||||
|
|
||||||
|
# Extract base64 images received in XHTML-IM messages
|
||||||
|
# if true.
|
||||||
|
extract_inline_images = true
|
||||||
|
|
||||||
|
# The directory where the images will be saved; if unset,
|
||||||
|
# defaults to $XDG_CACHE_HOME/poezio/images.
|
||||||
|
tmp_image_dir =
|
||||||
|
|
||||||
# Receive the tune notifications or not (in order to display informations
|
# Receive the tune notifications or not (in order to display informations
|
||||||
# in the roster).
|
# in the roster).
|
||||||
# If this is set to false, then the display_tune_notifications
|
# If this is set to false, then the display_tune_notifications
|
||||||
|
|
|
@ -851,6 +851,25 @@ Other
|
||||||
|
|
||||||
The lang some automated entities will use when replying to you.
|
The lang some automated entities will use when replying to you.
|
||||||
|
|
||||||
|
extract_inline_images
|
||||||
|
|
||||||
|
**Default value:** ``true``
|
||||||
|
|
||||||
|
Some clients send inline images in base64 inside some messages, which results in
|
||||||
|
an useless wall of text. If this option is ``true``, then that base64 text will
|
||||||
|
be replaced with a :file:`file://` link to the image file extracted in
|
||||||
|
:term:`tmp_image_dir` or :file:`$XDG_CACHE_HOME/poezio/images` by default, which
|
||||||
|
is usually :file:`~/.cache/poezio/images`
|
||||||
|
|
||||||
|
tmp_image_dir
|
||||||
|
|
||||||
|
**Default value:** ``[empty]``
|
||||||
|
|
||||||
|
The directory where poezio will save the images received, if
|
||||||
|
:term:`extract_inline_images` is set to true. If unset, poezio
|
||||||
|
will default to :file:`$XDG_CACHE_HOME/poezio/images` which is
|
||||||
|
usually :file:`~/.cache/poezio/images`.
|
||||||
|
|
||||||
muc_history_length
|
muc_history_length
|
||||||
|
|
||||||
**Default value:** ``50``
|
**Default value:** ``50``
|
||||||
|
|
|
@ -361,7 +361,6 @@ def file_ok(filepath):
|
||||||
def check_create_config_dir():
|
def check_create_config_dir():
|
||||||
"""
|
"""
|
||||||
create the configuration directory if it doesn't exist
|
create the configuration directory if it doesn't exist
|
||||||
and copy the default config in it
|
|
||||||
"""
|
"""
|
||||||
CONFIG_HOME = environ.get("XDG_CONFIG_HOME")
|
CONFIG_HOME = environ.get("XDG_CONFIG_HOME")
|
||||||
if not CONFIG_HOME:
|
if not CONFIG_HOME:
|
||||||
|
@ -374,6 +373,23 @@ def check_create_config_dir():
|
||||||
pass
|
pass
|
||||||
return CONFIG_PATH
|
return CONFIG_PATH
|
||||||
|
|
||||||
|
def check_create_cache_dir():
|
||||||
|
"""
|
||||||
|
create the cache directory if it doesn't exist
|
||||||
|
also create the subdirectories
|
||||||
|
"""
|
||||||
|
global CACHE_DIR
|
||||||
|
CACHE_HOME = environ.get("XDG_CACHE_HOME")
|
||||||
|
if not CACHE_HOME:
|
||||||
|
CACHE_HOME = path.join(environ.get('HOME'), '.cache')
|
||||||
|
CACHE_DIR = path.join(CACHE_HOME, 'poezio')
|
||||||
|
|
||||||
|
try:
|
||||||
|
makedirs(CACHE_DIR)
|
||||||
|
makedirs(path.join(CACHE_DIR, 'images'))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
def run_cmdline_args(CONFIG_PATH):
|
def run_cmdline_args(CONFIG_PATH):
|
||||||
"Parse the command line arguments"
|
"Parse the command line arguments"
|
||||||
global options
|
global options
|
||||||
|
@ -495,3 +511,6 @@ safeJID = None
|
||||||
|
|
||||||
# the global log dir
|
# the global log dir
|
||||||
LOG_DIR = ''
|
LOG_DIR = ''
|
||||||
|
|
||||||
|
# the global cache dir
|
||||||
|
CACHE_DIR = ''
|
||||||
|
|
|
@ -10,6 +10,7 @@ import ssl
|
||||||
import time
|
import time
|
||||||
from hashlib import sha1, sha512
|
from hashlib import sha1, sha512
|
||||||
from gettext import gettext as _
|
from gettext import gettext as _
|
||||||
|
from os import path
|
||||||
|
|
||||||
from sleekxmpp import InvalidJID
|
from sleekxmpp import InvalidJID
|
||||||
from sleekxmpp.stanza import Message
|
from sleekxmpp.stanza import Message
|
||||||
|
@ -24,7 +25,7 @@ import windows
|
||||||
import xhtml
|
import xhtml
|
||||||
import multiuserchat as muc
|
import multiuserchat as muc
|
||||||
from common import safeJID
|
from common import safeJID
|
||||||
from config import config
|
from config import config, CACHE_DIR
|
||||||
from contact import Resource
|
from contact import Resource
|
||||||
from logger import logger
|
from logger import logger
|
||||||
from roster import roster
|
from roster import roster
|
||||||
|
@ -178,7 +179,11 @@ def on_normal_message(self, message):
|
||||||
return self.information('%s says: %s' % (message['from'], message['body']), 'Headline')
|
return self.information('%s says: %s' % (message['from'], message['body']), 'Headline')
|
||||||
|
|
||||||
use_xhtml = config.get('enable_xhtml_im', True)
|
use_xhtml = config.get('enable_xhtml_im', True)
|
||||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
|
tmp_dir = config.get('tmp_image_dir', '') or path.join(CACHE_DIR, 'images')
|
||||||
|
extract_images = config.get('extract_inline_images', True)
|
||||||
|
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
|
||||||
|
tmp_dir=tmp_dir,
|
||||||
|
extract_images=extract_images)
|
||||||
if not body:
|
if not body:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -223,7 +228,9 @@ def on_normal_message(self, message):
|
||||||
self.events.trigger('conversation_msg', message, conversation)
|
self.events.trigger('conversation_msg', message, conversation)
|
||||||
if not message['body']:
|
if not message['body']:
|
||||||
return
|
return
|
||||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
|
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
|
||||||
|
tmp_dir=tmp_dir,
|
||||||
|
extract_images=extract_images)
|
||||||
delayed, date = common.find_delayed_tag(message)
|
delayed, date = common.find_delayed_tag(message)
|
||||||
|
|
||||||
def try_modify():
|
def try_modify():
|
||||||
|
@ -441,7 +448,11 @@ def on_groupchat_message(self, message):
|
||||||
|
|
||||||
self.events.trigger('muc_msg', message, tab)
|
self.events.trigger('muc_msg', message, tab)
|
||||||
use_xhtml = config.get('enable_xhtml_im', True)
|
use_xhtml = config.get('enable_xhtml_im', True)
|
||||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
|
tmp_dir = config.get('tmp_image_dir', '') or path.join(CACHE_DIR, 'images')
|
||||||
|
extract_images = config.get('extract_inline_images', True)
|
||||||
|
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
|
||||||
|
tmp_dir=tmp_dir,
|
||||||
|
extract_images=extract_images)
|
||||||
if not body:
|
if not body:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -498,7 +509,11 @@ def on_groupchat_private_message(self, message):
|
||||||
|
|
||||||
room_from = jid.bare
|
room_from = jid.bare
|
||||||
use_xhtml = config.get('enable_xhtml_im', True)
|
use_xhtml = config.get('enable_xhtml_im', True)
|
||||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
|
tmp_dir = config.get('tmp_image_dir', '') or path.join(CACHE_DIR, 'images')
|
||||||
|
extract_images = config.get('extract_inline_images', True)
|
||||||
|
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
|
||||||
|
tmp_dir=tmp_dir,
|
||||||
|
extract_images=extract_images)
|
||||||
tab = self.get_tab_by_name(jid.full, tabs.PrivateTab) # get the tab with the private conversation
|
tab = self.get_tab_by_name(jid.full, tabs.PrivateTab) # get the tab with the private conversation
|
||||||
ignore = config.get_by_tabname('ignore_private', False, room_from)
|
ignore = config.get_by_tabname('ignore_private', False, room_from)
|
||||||
if not tab: # It's the first message we receive: create the tab
|
if not tab: # It's the first message we receive: create the tab
|
||||||
|
@ -511,7 +526,9 @@ def on_groupchat_private_message(self, message):
|
||||||
self.xmpp.send_message(mto=jid.full, mbody=msg, mtype='chat')
|
self.xmpp.send_message(mto=jid.full, mbody=msg, mtype='chat')
|
||||||
return
|
return
|
||||||
self.events.trigger('private_msg', message, tab)
|
self.events.trigger('private_msg', message, tab)
|
||||||
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml)
|
body = xhtml.get_body_from_message_stanza(message, use_xhtml=use_xhtml,
|
||||||
|
tmp_dir=tmp_dir,
|
||||||
|
extract_images=extract_images)
|
||||||
if not body or not tab:
|
if not body or not tab:
|
||||||
return
|
return
|
||||||
replaced_id = message['replace']['id']
|
replaced_id = message['replace']['id']
|
||||||
|
|
|
@ -30,6 +30,7 @@ def main():
|
||||||
config.run_cmdline_args(config_path)
|
config.run_cmdline_args(config_path)
|
||||||
config.create_global_config()
|
config.create_global_config()
|
||||||
config.check_create_log_dir()
|
config.check_create_log_dir()
|
||||||
|
config.check_create_cache_dir()
|
||||||
config.setup_logging()
|
config.setup_logging()
|
||||||
config.post_logging_setup()
|
config.post_logging_setup()
|
||||||
|
|
||||||
|
|
40
src/xhtml.py
40
src/xhtml.py
|
@ -12,9 +12,13 @@ xhtml code to shell colors,
|
||||||
poezio colors to xhtml code
|
poezio colors to xhtml code
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import base64
|
||||||
import curses
|
import curses
|
||||||
|
import hashlib
|
||||||
|
import re
|
||||||
|
from os import path
|
||||||
from sleekxmpp.xmlstream import ET
|
from sleekxmpp.xmlstream import ET
|
||||||
|
from urllib.parse import unquote
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from xml import sax
|
from xml import sax
|
||||||
|
@ -178,10 +182,12 @@ colors = {
|
||||||
whitespace_re = re.compile(r'\s+')
|
whitespace_re = re.compile(r'\s+')
|
||||||
|
|
||||||
xhtml_attr_re = re.compile(r'\x19-?\d[^}]*}|\x19[buaio]')
|
xhtml_attr_re = re.compile(r'\x19-?\d[^}]*}|\x19[buaio]')
|
||||||
|
xhtml_data_re = re.compile(r'data:image/([a-z]+);base64,(.+)')
|
||||||
|
|
||||||
xhtml_simple_attr_re = re.compile(r'\x19\d')
|
xhtml_simple_attr_re = re.compile(r'\x19\d')
|
||||||
|
|
||||||
def get_body_from_message_stanza(message, use_xhtml=False):
|
def get_body_from_message_stanza(message, use_xhtml=False,
|
||||||
|
tmp_dir=None, extract_images=False):
|
||||||
"""
|
"""
|
||||||
Returns a string with xhtml markups converted to
|
Returns a string with xhtml markups converted to
|
||||||
poezio colors if there's an xhtml_im element, or
|
poezio colors if there's an xhtml_im element, or
|
||||||
|
@ -191,7 +197,8 @@ def get_body_from_message_stanza(message, use_xhtml=False):
|
||||||
xhtml = message['html'].xml
|
xhtml = message['html'].xml
|
||||||
xhtml_body = xhtml.find('{http://www.w3.org/1999/xhtml}body')
|
xhtml_body = xhtml.find('{http://www.w3.org/1999/xhtml}body')
|
||||||
if xhtml_body:
|
if xhtml_body:
|
||||||
content = xhtml_to_poezio_colors(xhtml_body)
|
content = xhtml_to_poezio_colors(xhtml_body, tmp_dir=tmp_dir,
|
||||||
|
extract_images=extract_images)
|
||||||
content = content if content else message['body']
|
content = content if content else message['body']
|
||||||
return content or " "
|
return content or " "
|
||||||
return message['body']
|
return message['body']
|
||||||
|
@ -281,7 +288,7 @@ def trim(string):
|
||||||
return re.sub(whitespace_re, ' ', string)
|
return re.sub(whitespace_re, ' ', string)
|
||||||
|
|
||||||
class XHTMLHandler(sax.ContentHandler):
|
class XHTMLHandler(sax.ContentHandler):
|
||||||
def __init__(self, force_ns=False):
|
def __init__(self, force_ns=False, tmp_dir=None, extract_images=False):
|
||||||
self.builder = []
|
self.builder = []
|
||||||
self.formatting = []
|
self.formatting = []
|
||||||
self.attrs = []
|
self.attrs = []
|
||||||
|
@ -291,6 +298,9 @@ class XHTMLHandler(sax.ContentHandler):
|
||||||
# do not care about xhtml-in namespace
|
# do not care about xhtml-in namespace
|
||||||
self.force_ns = force_ns
|
self.force_ns = force_ns
|
||||||
|
|
||||||
|
self.tmp_dir = tmp_dir
|
||||||
|
self.extract_images = extract_images
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def result(self):
|
def result(self):
|
||||||
return ''.join(self.builder).strip()
|
return ''.join(self.builder).strip()
|
||||||
|
@ -331,7 +341,22 @@ class XHTMLHandler(sax.ContentHandler):
|
||||||
elif name == 'em':
|
elif name == 'em':
|
||||||
self.append_formatting('\x19i')
|
self.append_formatting('\x19i')
|
||||||
elif name == 'img':
|
elif name == 'img':
|
||||||
builder.append(trim(attrs['src']))
|
if re.match(xhtml_data_re, attrs['src']) and self.extract_images:
|
||||||
|
type_, data = [i for i in re.split(xhtml_data_re, attrs['src']) if i]
|
||||||
|
bin_data = base64.b64decode(unquote(data))
|
||||||
|
filename = hashlib.sha1(bin_data).hexdigest() + '.' + type_
|
||||||
|
filepath = path.join(self.tmp_dir, filename)
|
||||||
|
if not path.exists(filepath):
|
||||||
|
try:
|
||||||
|
with open(filepath, 'wb') as fd:
|
||||||
|
fd.write(bin_data)
|
||||||
|
builder.append('file://%s' % filepath)
|
||||||
|
except Exception as e:
|
||||||
|
builder.append('[Error while saving image: %s]' % e)
|
||||||
|
else:
|
||||||
|
builder.append('file://%s' % filepath)
|
||||||
|
else:
|
||||||
|
builder.append(trim(attrs['src']))
|
||||||
if 'alt' in attrs:
|
if 'alt' in attrs:
|
||||||
builder.append(' (%s)' % trim(attrs['alt']))
|
builder.append(' (%s)' % trim(attrs['alt']))
|
||||||
elif name == 'ul':
|
elif name == 'ul':
|
||||||
|
@ -389,13 +414,14 @@ class XHTMLHandler(sax.ContentHandler):
|
||||||
if 'title' in attrs:
|
if 'title' in attrs:
|
||||||
builder.append(' [' + attrs['title'] + ']')
|
builder.append(' [' + attrs['title'] + ']')
|
||||||
|
|
||||||
def xhtml_to_poezio_colors(xml, force=False):
|
def xhtml_to_poezio_colors(xml, force=False, tmp_dir=None, extract_images=None):
|
||||||
if isinstance(xml, str):
|
if isinstance(xml, str):
|
||||||
xml = xml.encode('utf8')
|
xml = xml.encode('utf8')
|
||||||
elif not isinstance(xml, bytes):
|
elif not isinstance(xml, bytes):
|
||||||
xml = ET.tostring(xml)
|
xml = ET.tostring(xml)
|
||||||
|
|
||||||
handler = XHTMLHandler(force_ns=force)
|
handler = XHTMLHandler(force_ns=force, tmp_dir=tmp_dir,
|
||||||
|
extract_images=extract_images)
|
||||||
parser = sax.make_parser()
|
parser = sax.make_parser()
|
||||||
parser.setFeature(sax.handler.feature_namespaces, True)
|
parser.setFeature(sax.handler.feature_namespaces, True)
|
||||||
parser.setContentHandler(handler)
|
parser.setContentHandler(handler)
|
||||||
|
|
Loading…
Reference in a new issue