Source code for publiforge.lib.rsync
"""Provides a high-level interface to some librsync functions.
This is a python wrapper around the lower-level _librsync module,
which is written in C.
"""
# pylint: disable = c-extension-no-member
import types
import array
from os.path import getsize
from ...lib.rsync import _librsync
BLOCKSIZE = _librsync.RS_JOB_BLOCKSIZE
# =============================================================================
[docs]class LikeFile(object):
"""File-like object used by SigFile, DeltaFile, and PatchFile."""
# This will be replaced in subclasses by an object with
# appropriate cycle() method
maker = None
mode = 'rb'
# -------------------------------------------------------------------------
def __init__(self, infile, need_seek=None):
"""LikeFile initializer - zero buffers, set eofs off."""
self.check_file(infile, need_seek)
self.infile = infile
self.closed = self.infile_closed = None
self.inbuf = ""
self.outbuf = array.array('c')
self.eof = self.infile_eof = None
# -------------------------------------------------------------------------
[docs] @classmethod
def check_file(cls, likefile, need_seek=None):
"""Raise type error if ``likefile`` doesn't have necessary attributes.
"""
if not hasattr(likefile, 'read'):
raise TypeError('Basis file must have a read() method')
if not hasattr(likefile, 'close'):
raise TypeError('Basis file must have a close() method')
if need_seek and not hasattr(likefile, 'seek'):
raise TypeError('Basis file must have a seek() method')
# -------------------------------------------------------------------------
[docs] def read(self, length=-1):
"""Build up self.outbuf, return first length bytes."""
if length == -1:
while not self.eof:
self._add_to_outbuf_once()
real_len = len(self.outbuf)
else:
while not self.eof and len(self.outbuf) < length:
self._add_to_outbuf_once()
real_len = min(length, len(self.outbuf))
return_val = self.outbuf[:real_len].tostring()
del self.outbuf[:real_len]
return return_val
# -------------------------------------------------------------------------
def _add_to_outbuf_once(self):
"""Add one cycle's worth of output to self.outbuf."""
if not self.infile_eof:
self._add_to_inbuf()
try:
self.eof, len_inbuf_read, cycle_out = self.maker.cycle(self.inbuf)
except _librsync.librsyncError as err:
raise LibRSyncError(str(err))
self.inbuf = self.inbuf[len_inbuf_read:]
self.outbuf.fromstring(cycle_out)
# -------------------------------------------------------------------------
def _add_to_inbuf(self):
"""Make sure len(self.inbuf) >= BLOCKSIZE."""
assert not self.infile_eof
while len(self.inbuf) < BLOCKSIZE:
new_in = self.infile.read(BLOCKSIZE)
if not new_in:
self.infile_eof = 1
assert not self.infile.close()
self.infile_closed = 1
break
self.inbuf += new_in
# -------------------------------------------------------------------------
[docs] def close(self):
"""Close infile."""
if not self.infile_closed:
assert not self.infile.close()
self.closed = 1
# =============================================================================
[docs]class SigFile(LikeFile):
"""File-like object which incrementally generates a librsync signature"""
# pylint: disable = too-few-public-methods
# -------------------------------------------------------------------------
def __init__(self, infile, blocksize=BLOCKSIZE):
"""SigFile initializer - takes basis file.
basis file only needs to have read() and close() methods. It
will be closed when we come to the end of the signature.
"""
LikeFile.__init__(self, infile)
try:
self.maker = _librsync.new_sigmaker(blocksize)
except _librsync.librsyncError as err:
raise LibRSyncError(str(err))
# =============================================================================
[docs]class DeltaFile(LikeFile):
"""File-like object which incrementally generates a librsync delta."""
# pylint: disable = too-few-public-methods
# -------------------------------------------------------------------------
def __init__(self, signature, new_file):
"""DeltaFile initializer - call with signature and new file
Signature can either be a string or a file with read() and
close() methods. New_file also only needs to have read() and
close() methods. It will be closed when self is closed.
"""
LikeFile.__init__(self, new_file)
if isinstance(signature, types.StringType):
sig_string = signature
else:
self.check_file(signature)
sig_string = signature.read()
assert not signature.close()
try:
self.maker = _librsync.new_deltamaker(sig_string)
except _librsync.librsyncError as err:
raise LibRSyncError(str(err))
# =============================================================================
[docs]class PatchedFile(LikeFile):
"""File-like object which applies a librsync delta incrementally."""
# pylint: disable = too-few-public-methods
# -------------------------------------------------------------------------
def __init__(self, basis_file, delta_file):
"""PatchedFile initializer - call with basis delta
Here basis_file must be a true Python file, because we may
need to seek() around in it a lot, and this is done in C.
delta_file only needs read() and close() methods.
"""
LikeFile.__init__(self, delta_file)
if not isinstance(basis_file, types.FileType):
raise TypeError('basis_file must be a true file')
try:
self.maker = _librsync.new_patchmaker(basis_file)
except _librsync.librsyncError as err:
raise LibRSyncError(str(err))
# =============================================================================
[docs]class LibRSyncError(Exception):
"""Signifies error in internal librsync processing (bad signature, etc.)
underlying _librsync.librsyncError's are regenerated using this
class because the C-created exceptions are by default
unPickleable. There is probably a way to fix this in _librsync,
but this scheme was easier.
"""
pass
# =============================================================================
[docs]def get_block_size(filename):
"""
Return a reasonable block size to use on files of length file_len
If the block size is too big, deltas will be bigger than is
necessary. If the block size is too small, making deltas and
patching can take a really long time.
"""
file_len = getsize(filename)
if file_len < 2048000:
return 5120L # set minimum of 5120 bytes
# Split file into about 2000 pieces, rounding to 5120
file_blocksize = long((file_len / (2000 * 5120)) * 5120)
return max(min(file_blocksize, 10240L), 5120L)