[tor-commits] [stem/master] Utils for human readable time and size metrics

Sat Oct 20 23:02:10 UTC 2012

commit 70227efa08d77f65b1eea069ae93bd0ba0bf1993
Author: Damian Johnson <atagar at torproject.org>
Date:   Fri Oct 19 09:41:34 2012 -0700

    Utils for human readable time and size metrics
    
    Snagging the utilities from arm's src/util/uiTools.py for making human readable
    labels of time and size metrics. They'll not only be handy for stem's users,
    but also our exception messages and something I want to use in our tutorials.
    
    I've simplified these functions a bit and added unit tests for the pydoc
    examples. More tests are needed, though...
---
 run_tests.py                |    2 +
 stem/util/enum.py           |   24 +----
 stem/util/str_tools.py      |  244 +++++++++++++++++++++++++++++++++++++++++++
 stem/util/term.py           |    3 +-
 test/unit/util/__init__.py  |    1 +
 test/unit/util/enum.py      |   16 ---
 test/unit/util/str_tools.py |   53 +++++++++
 7 files changed, 304 insertions(+), 39 deletions(-)

diff --git a/run_tests.py b/run_tests.py
index c891bf6..8cd393c 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -37,6 +37,7 @@ import test.unit.util.conf
 import test.unit.util.connection
 import test.unit.util.enum
 import test.unit.util.proc
+import test.unit.util.str_tools
 import test.unit.util.system
 import test.unit.util.tor_tools
 import test.unit.exit_policy.policy
@@ -114,6 +115,7 @@ UNIT_TESTS = (
   test.unit.util.connection.TestConnection,
   test.unit.util.conf.TestConf,
   test.unit.util.proc.TestProc,
+  test.unit.util.str_tools.TestStrTools,
   test.unit.util.system.TestSystem,
   test.unit.util.tor_tools.TestTorTools,
   test.unit.descriptor.export.TestExport,
diff --git a/stem/util/enum.py b/stem/util/enum.py
index 50f6728..72c4443 100644
--- a/stem/util/enum.py
+++ b/stem/util/enum.py
@@ -24,7 +24,6 @@ with overwritten string counterparts:
 
 ::
 
-  to_camel_case - converts a string to camel case
   UppercaseEnum - Provides an enum instance with capitalized values.
   Enum - Provides a basic, ordered  enumeration.
     |- keys - string representation of our enum keys
@@ -35,26 +34,7 @@ with overwritten string counterparts:
     +- __iter__ - iterator over our enum keys
 """
 
-def to_camel_case(label, word_divider = " "):
-  """
-  Converts the given string to camel case, ie:
-  
-  ::
-  
-    >>> to_camel_case("I_LIKE_PEPPERJACK!")
-    'I Like Pepperjack!'
-  
-  :param str label: input string to be converted
-  :param str word_divider: string used to replace underscores
-  """
-  
-  words = []
-  for entry in label.split("_"):
-    if len(entry) == 0: words.append("")
-    elif len(entry) == 1: words.append(entry.upper())
-    else: words.append(entry[0].upper() + entry[1:].lower())
-  
-  return word_divider.join(words)
+import stem.util.str_tools
 
 def UppercaseEnum(*args):
   """
@@ -86,7 +66,7 @@ class Enum(object):
     
     for entry in args:
       if isinstance(entry, str):
-        key, val = entry, to_camel_case(entry)
+        key, val = entry, stem.util.str_tools.to_camel_case(entry)
       elif isinstance(entry, tuple) and len(entry) == 2:
         key, val = entry
       else: raise ValueError("Unrecognized input: %s" % args)
diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py
new file mode 100644
index 0000000..b874425
--- /dev/null
+++ b/stem/util/str_tools.py
@@ -0,0 +1,244 @@
+"""
+Toolkit for various string activity.
+
+**Module Overview:**
+
+::
+
+  to_camel_case - converts a string to camel case
+  get_size_label - human readable label for a number of bytes
+  get_time_label - human readable label for a number of seconds
+  get_time_labels - human readable labels for each time unit
+  get_short_time_label - condensed time label output
+  parse_short_time_label - seconds represented by a short time label
+"""
+
+# label conversion tuples of the form...
+# (bits / bytes / seconds, short label, long label)
+SIZE_UNITS_BITS = (
+  (140737488355328.0, " Pb", " Petabit"),
+  (137438953472.0, " Tb", " Terabit"),
+  (134217728.0, " Gb", " Gigabit"),
+  (131072.0, " Mb", " Megabit"),
+  (128.0, " Kb", " Kilobit"),
+  (0.125, " b", " Bit"),
+)
+
+SIZE_UNITS_BYTES = (
+  (1125899906842624.0, " PB", " Petabyte"),
+  (1099511627776.0, " TB", " Terabyte"),
+  (1073741824.0, " GB", " Gigabyte"),
+  (1048576.0, " MB", " Megabyte"),
+  (1024.0, " KB", " Kilobyte"),
+  (1.0, " B", " Byte"),
+)
+
+TIME_UNITS = (
+  (86400.0, "d", " day"),
+  (3600.0, "h", " hour"),
+  (60.0, "m", " minute"),
+  (1.0, "s", " second"),
+)
+
+def to_camel_case(label, word_divider = " "):
+  """
+  Converts the given string to camel case, ie:
+  
+  ::
+  
+    >>> to_camel_case("I_LIKE_PEPPERJACK!")
+    'I Like Pepperjack!'
+  
+  :param str label: input string to be converted
+  :param str word_divider: string used to replace underscores
+  """
+  
+  words = []
+  for entry in label.split("_"):
+    if len(entry) == 0: words.append("")
+    elif len(entry) == 1: words.append(entry.upper())
+    else: words.append(entry[0].upper() + entry[1:].lower())
+  
+  return word_divider.join(words)
+
+def get_size_label(byte_count, decimal = 0, is_long = False, is_bytes = True):
+  """
+  Converts a number of bytes into a human readable label in its most
+  significant units. For instance, 7500 bytes would return "7 KB". If the
+  is_long option is used this expands unit labels to be the properly pluralized
+  full word (for instance 'Kilobytes' rather than 'KB'). Units go up through
+  petabytes.
+  
+  ::
+  
+    >>> get_size_label(2000000)
+    '1 MB'
+    
+    >>> get_size_label(1050, 2)
+    '1.02 KB'
+    
+    >>> get_size_label(1050, 3, True)
+    '1.025 Kilobytes'
+  
+  :param int byte_count: number of bytes to be converted
+  :param int decimal: number of decimal digits to be included
+  :param bool is_long: expands units label
+  :param bool is_bytes: provides units in bytes if true, bits otherwise
+  """
+  
+  if is_bytes: return _get_label(SIZE_UNITS_BYTES, byte_count, decimal, is_long)
+  else: return _get_label(SIZE_UNITS_BITS, byte_count, decimal, is_long)
+
+def get_time_label(seconds, decimal = 0, is_long = False):
+  """
+  Converts seconds into a time label truncated to its most significant units.
+  For instance, 7500 seconds would return "2h". Units go up through days.
+  
+  This defaults to presenting single character labels, but if the is_long
+  option is used this expands labels to be the full word (space included and
+  properly pluralized). For instance, "4h" would be "4 hours" and "1m" would
+  become "1 minute".
+  
+  ::
+  
+    >>> get_time_label(10000)
+    '2h'
+    
+    >>> get_time_label(61, 1, True)
+    '1.0 minute'
+    
+    >>> get_time_label(61, 2, True)
+    '1.01 minutes'
+  
+  :param int seconds: number of seconds to be converted
+  :param int decimal: number of decimal digits to be included
+  :param bool is_long: expands units label
+  """
+  
+  return _get_label(TIME_UNITS, seconds, decimal, is_long)
+
+def get_time_labels(seconds, is_long = False):
+  """
+  Provides a list of label conversions for each time unit, starting with its
+  most significant units on down. Any counts that evaluate to zero are omitted.
+  For example...
+  
+  ::
+  
+    >>> get_time_labels(400)
+    ['6m', '40s']
+    
+    >>> get_time_labels(3640, True)
+    ['1 hour', '40 seconds']
+  
+  :param int seconds: number of seconds to be converted
+  :param bool is_long: expands units label
+  """
+  
+  time_labels = []
+  
+  for count_per_unit, _, _ in TIME_UNITS:
+    if seconds >= count_per_unit:
+      time_labels.append(_get_label(TIME_UNITS, seconds, 0, is_long))
+      seconds %= count_per_unit
+  
+  return time_labels
+
+def get_short_time_label(seconds):
+  """
+  Provides a time in the following format:
+  [[dd-]hh:]mm:ss
+  
+  :param int seconds: number of seconds to be converted
+  """
+  
+  time_comp = {}
+  
+  for amount, _, label in TIME_UNITS:
+    count = int(seconds / amount)
+    seconds %= amount
+    time_comp[label.strip()] = count
+  
+  label = "%02i:%02i" % (time_comp["minute"], time_comp["second"])
+  
+  if time_comp["day"]:
+    label = "%i-%02i:%s" % (time_comp["day"], time_comp["hour"], label)
+  elif time_comp["hour"]:
+    label = "%02i:%s" % (time_comp["hour"], label)
+  
+  return label
+
+def parse_short_time_label(label):
+  """
+  Provides the number of seconds corresponding to the formatting used for the
+  cputime and etime fields of ps:
+  [[dd-]hh:]mm:ss or mm:ss.ss
+  
+  :param str label: time entry to be parsed
+  
+  :raises: ValueError if input is malformed
+  """
+  
+  days, hours, minutes, seconds = '0', '0', '0', '0'
+  
+  if '-' in label:
+    days, label = label.split('-', 1)
+  
+  time_comp = label.split(":")
+  
+  if len(time_comp) == 3:
+    hours, minutes, seconds = time_comp
+  elif len(time_comp) == 2:
+    minutes, seconds = time_comp
+  else:
+    raise ValueError("Invalid time format, we expected '[[dd-]hh:]mm:ss' or 'mm:ss.ss': %s" % label)
+  
+  try:
+    time_sum = int(float(seconds))
+    time_sum += int(minutes) * 60
+    time_sum += int(hours) * 3600
+    time_sum += int(days) * 86400
+    return time_sum
+  except ValueError:
+    raise ValueError("Non-numeric value in time entry: %s" % label)
+
+def _get_label(units, count, decimal, is_long):
+  """
+  Provides label corresponding to units of the highest significance in the
+  provided set. This rounds down (ie, integer truncation after visible units).
+  
+  :param tuple units: type of units to be used for conversion, containing (count_per_unit, short_label, long_label)
+  :param int count: number of base units being converted
+  :param int decimal: decimal precision of label
+  :param bool is_long: uses the long label if true, short label otherwise
+  """
+  
+  # formatted string for the requested number of digits
+  label_format = "%%.%if" % decimal
+  
+  # for zero or negative values use the smallest units
+  if count < 1:
+    units_label = units[-1][2] + "s" if is_long else units[-1][1]
+    return "%s%s" % (label_format % count, units_label)
+  
+  for count_per_unit, short_label, long_label in units:
+    if count >= count_per_unit:
+      # Rounding down with a '%f' is a little clunky. Reducing the count so
+      # it'll divide evenly as the rounded down value.
+      
+      count -= count % (count_per_unit / (10 ** decimal))
+      count_label = label_format % (count / count_per_unit)
+      
+      if is_long:
+        # Pluralize if any of the visible units make it greater than one. For
+        # instance 1.0003 is plural but 1.000 isn't.
+        
+        if decimal > 0:
+          is_plural = count > count_per_unit
+        else:
+          is_plural = count >= count_per_unit * 2
+        
+        return count_label + long_label + ("s" if is_plural else "")
+      else:
+        return count_label + short_label
+
diff --git a/stem/util/term.py b/stem/util/term.py
index fd4c22e..556b481 100644
--- a/stem/util/term.py
+++ b/stem/util/term.py
@@ -3,6 +3,7 @@ Utilities for working with the terminal.
 """
 
 import stem.util.enum
+import stem.util.str_tools
 
 TERM_COLORS = ("BLACK", "RED", "GREEN", "YELLOW", "BLUE", "MAGENTA", "CYAN", "WHITE")
 
@@ -41,7 +42,7 @@ def format(msg, *attr):
   
   encodings = []
   for text_attr in attr:
-    text_attr, encoding = stem.util.enum.to_camel_case(text_attr), None
+    text_attr, encoding = stem.util.str_tools.to_camel_case(text_attr), None
     encoding = FG_ENCODING.get(text_attr, encoding)
     encoding = BG_ENCODING.get(text_attr, encoding)
     encoding = ATTR_ENCODING.get(text_attr, encoding)
diff --git a/test/unit/util/__init__.py b/test/unit/util/__init__.py
index d0eeda3..33b81d3 100644
--- a/test/unit/util/__init__.py
+++ b/test/unit/util/__init__.py
@@ -7,6 +7,7 @@ __all__ = [
   "connection",
   "enum",
   "proc",
+  "str_tools",
   "system",
   "tor_tools",
 ]
diff --git a/test/unit/util/enum.py b/test/unit/util/enum.py
index 5a28f62..88469dd 100644
--- a/test/unit/util/enum.py
+++ b/test/unit/util/enum.py
@@ -6,22 +6,6 @@ import unittest
 import stem.util.enum
 
 class TestEnum(unittest.TestCase):
-  def test_to_camel_case(self):
-    """
-    Checks the stem.util.enum.to_camel_case function.
-    """
-    
-    # test the pydoc example
-    self.assertEquals("I Like Pepperjack!", stem.util.enum.to_camel_case("I_LIKE_PEPPERJACK!"))
-    
-    # check a few edge cases
-    self.assertEquals("", stem.util.enum.to_camel_case(""))
-    self.assertEquals("Hello", stem.util.enum.to_camel_case("hello"))
-    self.assertEquals("Hello", stem.util.enum.to_camel_case("HELLO"))
-    self.assertEquals("Hello  World", stem.util.enum.to_camel_case("hello__world"))
-    self.assertEquals("Hello\tworld", stem.util.enum.to_camel_case("hello\tWORLD"))
-    self.assertEquals("Hello\t\tWorld", stem.util.enum.to_camel_case("hello__world", "\t"))
-  
   def test_enum_examples(self):
     """
     Checks that the pydoc examples are accurate.
diff --git a/test/unit/util/str_tools.py b/test/unit/util/str_tools.py
new file mode 100644
index 0000000..ce5539f
--- /dev/null
+++ b/test/unit/util/str_tools.py
@@ -0,0 +1,53 @@
+"""
+Unit tests for the stem.util.str_tools functions.
+"""
+
+import unittest
+from stem.util import str_tools
+
+class TestStrTools(unittest.TestCase):
+  def test_to_camel_case(self):
+    """
+    Checks the to_camel_case() function.
+    """
+    
+    # test the pydoc example
+    self.assertEquals("I Like Pepperjack!", str_tools.to_camel_case("I_LIKE_PEPPERJACK!"))
+    
+    # check a few edge cases
+    self.assertEquals("", str_tools.to_camel_case(""))
+    self.assertEquals("Hello", str_tools.to_camel_case("hello"))
+    self.assertEquals("Hello", str_tools.to_camel_case("HELLO"))
+    self.assertEquals("Hello  World", str_tools.to_camel_case("hello__world"))
+    self.assertEquals("Hello\tworld", str_tools.to_camel_case("hello\tWORLD"))
+    self.assertEquals("Hello\t\tWorld", str_tools.to_camel_case("hello__world", "\t"))
+  
+  def test_get_size_label(self):
+    """
+    Checks the get_size_label() function.
+    """
+    
+    # test the pydoc examples
+    self.assertEquals('1 MB', str_tools.get_size_label(2000000))
+    self.assertEquals('1.02 KB', str_tools.get_size_label(1050, 2))
+    self.assertEquals('1.025 Kilobytes', str_tools.get_size_label(1050, 3, True))
+  
+  def test_get_time_label(self):
+    """
+    Checks the get_time_label() function.
+    """
+    
+    # test the pydoc examples
+    self.assertEquals('2h', str_tools.get_time_label(10000))
+    self.assertEquals('1.0 minute', str_tools.get_time_label(61, 1, True))
+    self.assertEquals('1.01 minutes', str_tools.get_time_label(61, 2, True))
+  
+  def test_get_time_labels(self):
+    """
+    Checks the get_time_labels() function.
+    """
+    
+    # test the pydoc examples
+    self.assertEquals(['6m', '40s'], str_tools.get_time_labels(400))
+    self.assertEquals(['1 hour', '40 seconds'], str_tools.get_time_labels(3640, True))
+