[tor-commits] [stegotorus/master] changed pdfwrap and pdfunwrap to use zlib to encode data in stream objects

Fri Jul 20 23:17:08 UTC 2012

commit e1024c2d7e14fad7f832454326a4a9ea94e1e736
Author: Steven Cheung <steven.cheung at sri.com>
Date:   Sun Jun 10 06:49:45 2012 -0700

    changed pdfwrap and pdfunwrap to use zlib to encode data in stream objects
---
 src/steg/pdfSteg.cc |  180 ++++++++++++++++++++++++++++++++------------------
 1 files changed, 115 insertions(+), 65 deletions(-)

diff --git a/src/steg/pdfSteg.cc b/src/steg/pdfSteg.cc
index 658c319..2f67051 100644
--- a/src/steg/pdfSteg.cc
+++ b/src/steg/pdfSteg.cc
@@ -7,19 +7,21 @@
 #include "connections.h"
 #include "payloads.h"
 #include <event2/buffer.h>
+#include "compression.h"
 
 /* pdfSteg: A PDF-based steganography module */
 
 #define PDF_DELIMITER    '?'
 #define PDF_DELIMITER2   '.'
 
-#define STREAM_BEGIN       ">>stream"
-#define STREAM_BEGIN_SIZE  8
+#define STREAM_BEGIN       "stream"
+#define STREAM_BEGIN_SIZE  6
 #define STREAM_END         "endstream"
 #define STREAM_END_SIZE    9
 
 #define DEBUG
 
+
 /*
  * pdf_add_delimiter processes the input buffer (inbuf) of length
  * inbuflen, copies it to output buffer (outbuf) of size outbufsize,
@@ -158,6 +160,38 @@ pdf_remove_delimiter(const char *inbuf, size_t inbuflen,
   return cnt;
 }
 
+
+/*
+ * strInBinaryRewind looks for char array pattern of length patternLen
+ * in a char array blob of length blobLen in the *reverse* direction
+ *
+ * return a pointer for the first occurrence of pattern in blob,
+ * starting from the end of blob, if found; otherwise, return NULL
+ *
+ */
+char *
+strInBinaryRewind (const char *pattern, unsigned int patternLen,
+             const char *blob, unsigned int blobLen) {
+  int found = 0;
+  char *cp;
+
+  if (patternLen < 1 || blobLen < 1) return 0;
+  cp = (char *) blob + blobLen - 1;
+  while (cp >= blob) {
+    if (cp - (patternLen-1) < blob) break;
+    if (*cp == pattern[patternLen-1]) {
+      if (memcmp(cp-(patternLen-1), pattern, patternLen-1) == 0) {
+        found = 1;
+        break;
+      }
+    }
+    cp--;
+  }
+  if (found) return (cp-(patternLen-1));
+  else return NULL;
+}
+
+
 /*
  * pdf_wrap embeds data of length dlen inside the stream objects of the PDF
  * document (length plen) that appears in the body of a HTTP msg, and
@@ -172,29 +206,30 @@ pdf_wrap(const char *data, size_t dlen,
          const char *pdfTemplate, size_t plen,
          char *outbuf, size_t outbufsize)
 {
-  char data2[dlen*2+2];
-  const char *tp, *dp, *plimit;
-  char *op, *streamStart, *streamEnd;
-  size_t data2len, cnt, size, size2;
-  ssize_t rv;
+  int data2size = 2*dlen+10; 
+  // see rfc 1950 for zlib format, in addition to compressed data, we have
+  // 2-byte compression method and flags +
+  // 4-byte dict ID +
+  // 4-byte ADLER32 checksum
+  char data2[data2size];
+  const char *tp, *plimit;
+  char *op, *streamStart, *streamEnd, *filterStart;
+  size_t data2len, size;
+  int np;
 
   if (dlen > SIZE_T_CEILING || plen > SIZE_T_CEILING ||
       outbufsize > SIZE_T_CEILING)
     return -1;
 
-  // assumption: pdf_wrap is length-preserving
-  if (outbufsize < plen) return -1;
-
-  rv = pdf_add_delimiter(data, dlen, data2, HTTP_MSG_BUF_SIZE,
-                               PDF_DELIMITER, PDF_DELIMITER2);
-  if (rv < 1)
+  data2len = compress((const uint8_t *)data, dlen, 
+                      (uint8_t *)data2, data2size, c_format_zlib);
+  if ((int)data2len < 0) {
+    log_warn("compress failed and returned %lu", data2len);
     return -1;
-  data2len = rv;
+  }
 
   op = outbuf;       // current pointer for output buffer
   tp = pdfTemplate;  // current pointer for http msg template
-  dp = data2;        // current pointer for data2
-  cnt = 0;           // number of data char encoded
   plimit = pdfTemplate+plen;
 
   while (tp < plimit) {
@@ -205,43 +240,47 @@ pdf_wrap(const char *data, size_t dlen,
       return -1;
     }
 
-    // copy everything between tp and "stream" (inclusive) to outbuf
-    size = streamStart - tp + STREAM_BEGIN_SIZE;
-    memcpy(op, tp, size);
-    op += size;
-    tp = streamStart + STREAM_BEGIN_SIZE;
-
     streamEnd = strInBinary(STREAM_END, STREAM_END_SIZE, tp, plimit-tp);
     if (streamEnd == NULL) {
       log_warn("Cannot find endstream in pdf");
       return -1;
     }
 
-    // count the number of usable char between tp and streamEnd
-    size = streamEnd-tp;
-
-    // encoding data in the stream obj
-    if (size > 0) {
-        size2 = data2len - cnt;
-        if (size < size2) {
-          memcpy(op, dp, size);
-          op += size; tp += size; dp += size;
-          memcpy(op, tp, STREAM_END_SIZE);
-          op += STREAM_END_SIZE; tp += STREAM_END_SIZE;
-          cnt += size;
-        } else { // done encoding data
-          memcpy(op, dp, size2);
-          op += size2; tp += size2; dp += size2;
-          cnt += size2;
-          break;
-        }
-        log_debug("Encoded %lu bytes in pdf", (unsigned long)size);
-    } else { // empty stream
-      memcpy(op, tp, STREAM_END_SIZE);
-      op += STREAM_END_SIZE; tp += STREAM_END_SIZE;
+    filterStart = strInBinaryRewind(" obj", 4, tp, streamStart-tp);
+    if (filterStart == NULL) {
+      log_warn("Cannot find obj\n");
+      return -1;
+    } else {
+      // copy everything between tp and up and and including "obj" to outbuf
+      size = filterStart - tp + 4;
+      memcpy(op, tp, size);
+      op[size] = 0;
+      op += size;
+
+      // write meta-data for stream object
+      np = sprintf(op, " <<\n/Length %d\n/Filter /FlateDecode\n>>\nstream\n", (int)data2len);
+      if (np < 0) {
+        log_warn("sprintf failed\n");
+        return -1;
+      }
+      op += np;
+
+      // copy compressed data to outbuf 
+      memcpy(op, data2, data2len);
+      op += data2len;
+
+      // write endstream to outbuf
+      np = sprintf(op, "\nendstream");
+      if (np < 0) {
+        log_warn("sprintf failed\n");
+        return -1;
+      }
+      op += np;
     }
 
-    if (cnt >= data2len) break; // this shouldn't happen ...
+    // done with encoding data
+    tp = streamEnd+STREAM_END_SIZE;
+    break;
   }
 
   // copy the rest of pdfTemplate to outbuf
@@ -261,9 +300,11 @@ pdf_unwrap(const char *data, size_t dlen,
            char *outbuf, size_t outbufsize)
 {
   const char *dp, *dlimit;
-  char *op, *streamStart, *streamEnd, *olimit;
+  char *op, *streamStart, *streamEnd;
   size_t cnt, size, size2;
-  bool endFlag, escape = false;
+
+  int streamObjStartSkip=0;
+  int streamObjEndSkip=0;
 
   if (dlen > SIZE_T_CEILING || outbufsize > SIZE_T_CEILING)
     return -1;
@@ -272,8 +313,8 @@ pdf_unwrap(const char *data, size_t dlen,
   op = outbuf; // current pointer for outbuf
   cnt = 0;     // number of char decoded
   dlimit = data+dlen;
-  olimit = outbuf+outbufsize;
 
+   
   while (dp < dlimit) {
     // find the next stream obj
     streamStart = strInBinary(STREAM_BEGIN, STREAM_BEGIN_SIZE, dp, dlimit-dp);
@@ -283,31 +324,40 @@ pdf_unwrap(const char *data, size_t dlen,
     }
 
     dp = streamStart + STREAM_BEGIN_SIZE;
+
+    // streamObjStartSkip = size of end-of-line (EOL) char(s) after ">>stream"
+    if ( *dp == '\r' && *(dp+1) == '\n' ) { // Windows-style EOL
+      streamObjStartSkip = 2;
+    } else if ( *dp == '\n' ) { // Unix-style EOL
+      streamObjStartSkip = 1;
+    }
+
+    dp = dp + streamObjStartSkip;
+
     streamEnd = strInBinary(STREAM_END, STREAM_END_SIZE, dp, dlimit-dp);
     if (streamEnd == NULL) {
       log_warn("Cannot find endstream in pdf");
       return -1;
     }
 
-    // count the number of usable char between tp and streamEnd
-    size = streamEnd-dp;
+    // streamObjEndSkip = size of end-of-line (EOL) char(s) at the end of stream obj
+    if (*(streamEnd-2) == '\r' && *(streamEnd-1) == '\n') {
+      streamObjEndSkip = 2;
+    } else if (*(streamEnd-1) == '\n') {
+      streamObjEndSkip = 1;
+    }
 
-    if (size > 0) {
-      ssize_t rv = pdf_remove_delimiter(dp, size, op, olimit-op, PDF_DELIMITER,
-                                        &endFlag, &escape);
-      if (rv < 0)
-        return -1;
+    // compute the size of stream obj payload
+    size = (streamEnd-streamObjEndSkip) - dp;
 
-      size2 = rv;
-      cnt += size2;
-      if (endFlag) { // Done decoding
-        break;
-      } else { // Continue decoding
-        op += size2;
-        dp = streamEnd + STREAM_END_SIZE;
-      }
-    } else { // empty stream obj
-      dp = streamEnd + STREAM_END_SIZE;
+    size2 = decompress((const uint8_t *)dp, size, (uint8_t *)op, outbufsize);
+    if ((int)size2 < 0) {
+      log_warn("decompress failed; size2 = %d\n", (int)size2);
+      return -1;
+    } else {
+      op += size2;
+      cnt = size2;
+      break;  // done decoding
     }
   }