plugins/untrackme: new plugin, based on remove_get_trackers

Thanks Perdu for the original work!

Signed-off-by: Maxime “pep” Buquet <pep@bouah.net>
This commit is contained in:
Maxime “pep” Buquet 2020-08-14 01:05:25 +02:00
parent a72a484509
commit a1f69a53d7

133
plugins/untrackme.py Normal file
View file

@ -0,0 +1,133 @@
"""
UntrackMe wannabe plugin
"""
from typing import Callable, Dict, List, Tuple, Union
import re
import logging
from slixmpp import Message
from poezio import tabs
from poezio.plugin import BasePlugin
from urllib.parse import quote as urlquote
log = logging.getLogger(__name__)
ChatTabs = Union[
tabs.MucTab,
tabs.DynamicConversationTab,
tabs.StaticConversationTab,
tabs.PrivateTab,
]
RE_URL: re.Pattern = re.compile('https?://(?P<host>[^/]+)(?P<rest>[^ ]*)')
SERVICES: Dict[str, Tuple[str, bool]] = { # host: (service, proxy)
'm.youtube.com': ('invidious', False),
'www.youtube.com': ('invidious', False),
'youtube.com': ('invidious', False),
'youtu.be': ('invidious', False),
'youtube-nocookie.com': ('invidious', False),
'mobile.twitter.com': ('nitter', False),
'www.twitter.com': ('nitter', False),
'twitter.com': ('nitter', False),
'pic.twitter.com': ('nitter_img', True),
'pbs.twimg.com': ('nitter_img', True),
'instagram.com': ('bibliogram', False),
'www.instagram.com': ('bibliogram', False),
'm.instagram.com': ('bibliogram', False),
}
def proxy(service: str) -> Callable[[str], str]:
"""Some services require the original url"""
def inner(origin: str) -> str:
return service + urlquote(origin)
return inner
class Plugin(BasePlugin):
"""UntrackMe"""
default_config: Dict[str, str] = {
'default': {
'cleanup': True,
'redirect': True,
'display_corrections': True,
},
'services': {
'invidious': 'https://invidio.us',
'nitter': 'https://nitter.net',
'bibliogram': 'https://bibliogram.art',
},
}
def init(self):
nitter_img = self.config.get('nitter', section='services') + '/pic/'
self.config.set('nitter_img', nitter_img, section='services')
self.api.add_event_handler('muc_say', self.handle_msg)
self.api.add_event_handler('conversation_say', self.handle_msg)
self.api.add_event_handler('private_say', self.handle_msg)
self.api.add_event_handler('muc_msg', self.handle_msg)
self.api.add_event_handler('conversation_msg', self.handle_msg)
self.api.add_event_handler('private_msg', self.handle_msg)
def map_services(self, match: re.Match) -> str:
host = match.group('host')
dest = SERVICES.get(host)
if dest is None:
return match.group(0)
destname, proxy = dest
replaced = self.config.get(destname, section='services')
result = replaced + match.group('rest')
if proxy:
url = urlquote(match.group(0))
result = replaced + url
# TODO: count parenthesis?
# Removes comma at the end of a link.
if result[-3] == '%2C':
result = result[:-3] + ','
return result
def handle_msg(self, msg: Message, tab: ChatTabs) -> None:
orig = msg['body']
if self.config.get('cleanup', section='default'):
msg['body'] = self.cleanup_url(msg['body'])
if self.config.get('redirect', section='default'):
msg['body'] = self.redirect_url(msg['body'])
log.debug(
'UntrackMe in tab \'%s\':\nOriginal: %s\nModified: %s',
tab.name, orig, msg['body'],
)
if self.config.get('display_corrections', section='default') and \
msg['body'] != orig:
self.api.information(
'UntrackMe in tab \'{}\':\nOriginal: {}\nModified: {}'.format(
tab.name, orig, msg['body']
),
'Info',
)
def cleanup_url(self, txt: str) -> str:
# fbclid: used globally (Facebook)
# utm_*: used globally https://en.wikipedia.org/wiki/UTM_parameters
# ncid: DoubleClick (Google)
# ref_src, ref_url: twitter
# Others exist but are excluded because they are not common.
# See https://en.wikipedia.org/wiki/UTM_parameters
return re.sub('(https?://[^ ]+)&?(fbclid|dclid|ncid|utm_source|utm_medium|utm_campaign|utm_term|utm_content|ref_src|ref_url)=[^ &#]*',
r'\1',
txt)
def redirect_url(self, txt: str) -> str:
return RE_URL.sub(self.map_services, txt)