[or-cvs] r17629: {updater} Implement lengths in thandy objects, mostly: Accept them, an (in updater/trunk: . lib/thandy specs)
nickm at seul.org
nickm at seul.org
Mon Dec 15 21:18:19 UTC 2008
Author: nickm
Date: 2008-12-15 16:18:19 -0500 (Mon, 15 Dec 2008)
New Revision: 17629
Modified:
updater/trunk/TODO
updater/trunk/lib/thandy/SignerCLI.py
updater/trunk/lib/thandy/download.py
updater/trunk/lib/thandy/formats.py
updater/trunk/lib/thandy/repository.py
updater/trunk/specs/thandy-spec.txt
Log:
Implement lengths in thandy objects, mostly:
Accept them, and when they're present, don't fetch more bytes than
specified, since that would be dangerous. Include lengths in every
generated object type except for the timestamp, since that would break
exising code.
Modified: updater/trunk/TODO
===================================================================
--- updater/trunk/TODO 2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/TODO 2008-12-15 21:18:19 UTC (rev 17629)
@@ -28,7 +28,18 @@
any cached yet.
- Security stuff that we should do that needs format changes.
- 2 Whenever we list a hash in a metafile, also list a file length.
+ . Whenever we list a hash in a metafile, also list a file length.
+ o Implement parsing; use length, when present, as a maximum
+ believable value to make sure we don't download too much
+ o Include lengths in generated packages and bundles
+ . Specify use of length field.
+ - Once everybody has been wanted to update their clients, include
+ lengths in timestamp files.
+ - Make lengths mandatory
+ - Maybe make lengths enforced for purposes other than a maximum
+ during fetch.
+ - Maybe stop early if Content-Length is greater than the expected
+ length.
- Think more about issues 4, 7(A,B,C)
Modified: updater/trunk/lib/thandy/SignerCLI.py
===================================================================
--- updater/trunk/lib/thandy/SignerCLI.py 2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/lib/thandy/SignerCLI.py 2008-12-15 21:18:19 UTC (rev 17629)
@@ -92,17 +92,20 @@
configFile = args[0]
packages = {}
+ packageLen = {}
for pkgFile in args[1:]:
print "Loading", pkgFile
f = open(pkgFile, 'r')
p = json.load(f)
f.close()
+ packageLen = os.stat(pkgFile).st_size
_, r, _ = thandy.formats.checkSignedObj(p)
if r != 'package':
print pkgFile, "was not a package"
packages[p['signed']['name']] = p['signed']
- bundleObj = thandy.formats.makeBundleObj(configFile, packages.__getitem__)
+ bundleObj = thandy.formats.makeBundleObj(configFile, packages.__getitem__,
+ packageLen.__getitem__)
signable = thandy.formats.makeSignable(bundleObj)
ks = getKeyStore()
Modified: updater/trunk/lib/thandy/download.py
===================================================================
--- updater/trunk/lib/thandy/download.py 2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/lib/thandy/download.py 2008-12-15 21:18:19 UTC (rev 17629)
@@ -345,8 +345,8 @@
class DownloadJob:
"""Abstract base class. Represents a thing to be downloaded, and the
knowledge of how to download it."""
- def __init__(self, targetPath, tmpPath, wantHash=None, repoFile=None,
- useTor=False):
+ def __init__(self, targetPath, tmpPath, wantHash=None,
+ repoFile=None, useTor=False, wantLength=None):
"""Create a new DownloadJob. When it is finally downloaded,
store it in targetPath. Store partial results in tmpPath;
if there is already a file in tmpPath, assume that it is an
@@ -357,6 +357,7 @@
self._destPath = targetPath
self._tmpPath = tmpPath
self._wantHash = wantHash
+ self._wantLength = wantLength
self._repoFile = repoFile
self._useTor = useTor
@@ -470,6 +471,12 @@
have_length = os.stat(self._tmpPath).st_size
logging.info("Have stalled file for %s with %s bytes", url,
have_length)
+ if self._wantLength != None:
+ if self._wantLength >= have_length:
+ logging.warn("Stalled file is too long; removing it")
+ self._removeTmpFile()
+ haveStalled = False
+ have_length = None
else:
have_length = None
@@ -506,7 +513,14 @@
total += len(c)
logging.debug("Got %s/%s bytes from %s",
total, expectLength, url)
+ if self._wantLength != None and total > self._wantLength:
+ logging.warn("Read too many bytes from %s; got %s, but "
+ "wanted %s", url, total, self._wantLength)
+ break
+ if self._wantLength != None and total != self._wantLength:
+ logging.warn("Length wrong on file %s", url)
+
finally:
if f_in is not None:
f_in.close()
Modified: updater/trunk/lib/thandy/formats.py
===================================================================
--- updater/trunk/lib/thandy/formats.py 2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/lib/thandy/formats.py 2008-12-15 21:18:19 UTC (rev 17629)
@@ -375,6 +375,7 @@
RELPATH_SCHEMA = PATH_PATTERN_SCHEMA = S.AnyStr()
URL_SCHEMA = S.AnyStr()
VERSION_SCHEMA = S.ListOf(S.Any()) #XXXX WRONG
+LENGTH_SCHEMA = S.Int(lo=0)
# A single signature of an object. Indicates the signature, the id of the
# signing key, and the signing method.
@@ -392,7 +393,7 @@
# A role: indicates that a key is allowed to certify a kind of
# document at a certain place in the repo.
-ROLE_SCHEMA = S.Struct([ROLENAME_SCHEMA, PATH_PATTERN_SCHEMA])
+ROLE_SCHEMA = S.Struct([ROLENAME_SCHEMA, PATH_PATTERN_SCHEMA], allowMore=True)
# A Keylist: indicates a list of live keys and their roles.
KEYLIST_SCHEMA = S.Obj(
@@ -415,12 +416,12 @@
TIMESTAMP_SCHEMA = S.Obj(
_type = S.Str("Timestamp"),
at = TIME_SCHEMA,
- m = S.Struct([TIME_SCHEMA, HASH_SCHEMA]),
- k = S.Struct([TIME_SCHEMA, HASH_SCHEMA]),
+ m = S.Struct([TIME_SCHEMA, HASH_SCHEMA], [LENGTH_SCHEMA], allowMore=True),
+ k = S.Struct([TIME_SCHEMA, HASH_SCHEMA], [LENGTH_SCHEMA], allowMore=True),
b = S.DictOf(keySchema=S.AnyStr(),
valSchema=
- S.Struct([ VERSION_SCHEMA, RELPATH_SCHEMA, TIME_SCHEMA, HASH_SCHEMA ]))
- )
+ S.Struct([ VERSION_SCHEMA, RELPATH_SCHEMA, TIME_SCHEMA, HASH_SCHEMA ], [LENGTH_SCHEMA], allowMore=True))
+ )
# A Bundle: lists a bunch of packages that should be updated in tandem
BUNDLE_SCHEMA = S.Obj(
@@ -436,6 +437,7 @@
version=VERSION_SCHEMA,
path=RELPATH_SCHEMA,
hash=HASH_SCHEMA,
+ length=S.Opt(LENGTH_SCHEMA),
order=S.Struct([S.Int(), S.Int(), S.Int()]),
optional=S.Opt(S.Bool()),
gloss=S.DictOf(S.AnyStr(), S.AnyStr()),
@@ -479,7 +481,8 @@
ITEM_INFO_SCHEMA = S.AllOf([CHECK_ITEM_SCHEMA, INSTALL_ITEM_SCHEMA])
-ITEM_SCHEMA = S.Struct([RELPATH_SCHEMA, HASH_SCHEMA], [ITEM_INFO_SCHEMA],
+ITEM_SCHEMA = S.Struct([RELPATH_SCHEMA, HASH_SCHEMA],
+ [ITEM_INFO_SCHEMA, LENGTH_SCHEMA],
allowMore=True)
def checkPackageFormatConsistency(obj):
@@ -578,17 +581,18 @@
self.addKey(key)
class StampedInfo:
- def __init__(self, ts, hash, version=None, relpath=None):
+ def __init__(self, ts, hash, version=None, relpath=None, length=None):
self._ts = ts
self._hash = hash
self._version = version
self._relpath = relpath
+ self._length = length
@staticmethod
- def fromJSonFields(timeStr, hashStr):
+ def fromJSonFields(timeStr, hashStr, length=None):
t = parseTime(timeStr)
h = parseHash(hashStr)
- return StampedInfo(t, h)
+ return StampedInfo(t, h, length=length)
def getHash(self):
return self._hash
@@ -596,6 +600,9 @@
def getRelativePath(self):
return self._relpath
+ def getLength(self):
+ return self._length
+
class TimestampFile:
def __init__(self, at, mirrorlistinfo, keylistinfo, bundleinfo):
self._time = at
@@ -607,15 +614,18 @@
def fromJSon(obj):
# must be validated.
at = parseTime(obj['at'])
- m = StampedInfo.fromJSonFields(*obj['m'][:2])
- k = StampedInfo.fromJSonFields(*obj['k'][:2])
+ m = StampedInfo.fromJSonFields(*obj['m'][:3])
+ k = StampedInfo.fromJSonFields(*obj['k'][:3])
b = {}
for name, bundle in obj['b'].iteritems():
v = bundle[0]
rp = bundle[1]
t = parseTime(bundle[2])
h = parseHash(bundle[3])
- b[name] = StampedInfo(t, h, v, rp)
+ ln = None
+ if len(bundle) > 4:
+ ln = bundle[4]
+ b[name] = StampedInfo(t, h, v, rp, ln)
return TimestampFile(at, m, k, b)
@@ -672,6 +682,8 @@
f = open(package_fname, 'rb')
digest = getFileDigest(f)
+ f.close()
+ f_len = os.stat(package_fname).st_size
# Check fields!
extra = {}
@@ -681,7 +693,7 @@
'location' : r['location'], #DOCDOC
'version' : r['version'],
'format' : r['format'],
- 'files' : [ [ r['relpath'], formatHash(digest), extra ] ],
+ 'files' : [ [ r['relpath'], formatHash(digest), extra, f_len ] ],
'shortdesc' : shortDescs,
'longdesc' : longDescs
}
@@ -725,7 +737,7 @@
return result
-def makeBundleObj(config_fname, getPackage):
+def makeBundleObj(config_fname, getPackage, getPackageLength):
packages = []
def ShortGloss(lang, val): packages[-1]['gloss'][lang] = val
def LongGloss(lang, val): packages[-1]['longgloss'][lang] = val
@@ -763,6 +775,9 @@
raise thandy.FormatException("No such package as %s"%p['name'])
p['hash'] = formatHash(getDigest(pkginfo))
+ length = getPackageLength(p['name'])
+ if length != None:
+ p['length'] = length
if p['path'] == None:
p['path'] = pkginfo['location']
if p['version'] == None:
Modified: updater/trunk/lib/thandy/repository.py
===================================================================
--- updater/trunk/lib/thandy/repository.py 2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/lib/thandy/repository.py 2008-12-15 21:18:19 UTC (rev 17629)
@@ -33,6 +33,9 @@
self._signedFormat = signedFormat
self._needSigs = needSigs
+ # The length of the file as stored on disk.
+ self._length = None
+
# The contents of the file, parsed. None if we haven't loaded
# the file.
self._main_obj = None
@@ -70,6 +73,7 @@
f = None
fd = os.open(fname, os.O_RDONLY)
try:
+ self._length = os.fstat(fd).st_size
f = os.fdopen(fd, 'r')
except:
os.close(fd)
@@ -295,7 +299,8 @@
return None
def getFilesToUpdate(self, now=None, trackingBundles=(), hashDict=None,
- usePackageSystem=True, installableDict=None):
+ lengthDict=None, usePackageSystem=True,
+ installableDict=None):
"""Return a set of relative paths for all files that we need
to fetch. Assumes that we care about the bundles
'trackingBundles'.
@@ -312,6 +317,9 @@
if installableDict == None:
installableDict = {}
+ if lengthDict == None:
+ lengthDict = {}
+
pkgItems = None
need = set()
@@ -370,6 +378,10 @@
ts.getKeylistInfo().getHash()
hashDict[self._mirrorlistFile.getRelativePath()] = \
ts.getMirrorlistInfo().getHash()
+ lengthDict[self._keylistFile.getRelativePath()] = \
+ ts.getKeylistInfo().getLength()
+ lengthDict[self._mirrorlistFile.getRelativePath()] = \
+ ts.getMirrorlistInfo().getLength()
h_kf = thandy.formats.getDigest(self._keylistFile.get())
h_expected = ts.getKeylistInfo().getHash()
@@ -407,6 +419,7 @@
rp = binfo.getRelativePath()
h_expected = binfo.getHash()
hashDict[rp] = h_expected
+ lengthDict[rp] = binfo.getLength()
bfile = self.getBundleFile(rp)
try:
bfile.load()
@@ -440,6 +453,7 @@
pfile = self.getPackageFile(rp)
h_expected = thandy.formats.parseHash(pkginfo['hash'])
hashDict[rp] = h_expected
+ lengthDict[rp] = pkginfo.get('length')
try:
pfile.load()
except OSError:
@@ -498,6 +512,8 @@
h_expected = thandy.formats.parseHash(h)
hashDict[rp] = h_expected
+ if len(f) > 3:
+ lengthDict[rp] = h[3]
fn = self.getFilename(rp)
try:
h_got = thandy.formats.getFileDigest(fn)
Modified: updater/trunk/specs/thandy-spec.txt
===================================================================
--- updater/trunk/specs/thandy-spec.txt 2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/specs/thandy-spec.txt 2008-12-15 21:18:19 UTC (rev 17629)
@@ -397,10 +397,10 @@
{ "_type" : Timestamp,
"at" : TIME,
- "m" : [ TIME, HASH ],
- "k" : [ TIME, HASH ],
+ "m" : [ TIME, HASH, LENGTH ],
+ "k" : [ TIME, HASH, LENGTH ],
"b" : { NAME :
- [ [ Version, Path, Time, Hash ] ] }
+ [ [ Version, Path, Time, Hash, (Length) ] ] }
}
TIME is when the timestamp was signed. MIRRORLISTHASH is the digest
@@ -409,6 +409,8 @@
bundles and their locations and hashes. The "name" of a bundle (in
this context) is the directory component of the bundle's path.
+ The LENGTH field may be absent on very old timestamp files.
+
3.6. File formats: bundle files
{ "_type" : "Bundle",
@@ -423,6 +425,7 @@
"version" : VERSION,
"path" : PATH,
"hash" : HASH,
+ ("length" : LENGTH),
"order" : [ INST, UPDATE, REMOVE ],
("optional : BOOL, )
"gloss" : { LANG : TEXT },
@@ -446,6 +449,9 @@
language. The UI should display the must appropriate language to the
user.
+ The LENGTH field is required on all new bundles, but may be absent
+ on very old ones.
+
3.7. File formats: package files
{ "_type" : "Package",
@@ -454,13 +460,14 @@
"version" : VERSION,
"format" : FMT,
"ts" : TIME,
- "files" : [ [ PATH, HASH, INFO ], ... ],
+ "files" : [ [ PATH, HASH, INFO, (LENGTH) ], ... ],
"shortdesc" : { LANG : DESC, ... },
"longdesc" : { LANG : DESC, ... },
}
Most elements are self-explanatory. To interpret the 'INFO' entry
- for each installable file, see section 6.
+ for each installable file, see section 6. The LENGTH field is
+ required on all new packages, but may be absent on very old ones.
No two package files in the same repository should have the same
name and version. If a package needs to be changed, the version
@@ -508,6 +515,10 @@
Clients SHOULD cache at least the latest versions they have received
of all files.
+ When dowloading a file, if the client knows what that file's length
+ should be, it SHOULD NOT accept a longer file, and SHOULD NOT
+ continue the download past the file length.
+
4.1.1. Download preferences
Users should be able to specify that packages must be only
More information about the tor-commits
mailing list