Stop using SHA-1 for generated filenames.

This commit is contained in:
Emmanuel Gil Peyrot 2017-02-24 17:05:29 +00:00
parent 5ef524d3b1
commit 23927711c1

View file

@ -12,10 +12,10 @@ xhtml code to shell colors,
poezio colors to xhtml code poezio colors to xhtml code
""" """
import base64
import curses import curses
import hashlib import hashlib
import re import re
from base64 import b64encode, b64decode
from os import path from os import path
from slixmpp.xmlstream import ET from slixmpp.xmlstream import ET
from urllib.parse import unquote from urllib.parse import unquote
@ -292,6 +292,11 @@ def parse_css(css):
def trim(string): def trim(string):
return re.sub(whitespace_re, ' ', string) return re.sub(whitespace_re, ' ', string)
def get_hash(data: bytes) -> bytes:
# Currently using SHA-256, this might change in the future.
# base64 gives shorter hashes than hex, so use that.
return b64encode(hashlib.sha256(data).digest()).rstrip(b'=')
class XHTMLHandler(sax.ContentHandler): class XHTMLHandler(sax.ContentHandler):
def __init__(self, force_ns=False, tmp_dir=None, extract_images=False): def __init__(self, force_ns=False, tmp_dir=None, extract_images=False):
self.builder = [] self.builder = []
@ -349,8 +354,8 @@ class XHTMLHandler(sax.ContentHandler):
elif name == 'img': elif name == 'img':
if re.match(xhtml_data_re, attrs['src']) and self.extract_images: if re.match(xhtml_data_re, attrs['src']) and self.extract_images:
type_, data = [i for i in re.split(xhtml_data_re, attrs['src']) if i] type_, data = [i for i in re.split(xhtml_data_re, attrs['src']) if i]
bin_data = base64.b64decode(unquote(data)) bin_data = b64decode(unquote(data))
filename = hashlib.sha1(bin_data).hexdigest() + '.' + type_ filename = get_hash(bin_data) + '.' + type_
filepath = path.join(self.tmp_dir, filename) filepath = path.join(self.tmp_dir, filename)
if not path.exists(filepath): if not path.exists(filepath):
try: try: