[mlpack-svn] r10091 - in mlpack/trunk/src/mlpack: . core/file

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Oct 31 12:07:01 EDT 2011


Author: jcline3
Date: 2011-10-31 12:07:01 -0400 (Mon, 31 Oct 2011)
New Revision: 10091

Added:
   mlpack/trunk/src/mlpack/core/file/textfile.cpp
   mlpack/trunk/src/mlpack/core/file/textfile.hpp
   mlpack/trunk/src/mlpack/core/file/textfile_test.cpp
Removed:
   mlpack/trunk/src/mlpack/core/file/textfile.cc
   mlpack/trunk/src/mlpack/core/file/textfile.h
   mlpack/trunk/src/mlpack/core/file/textfile_test.cc
Modified:
   mlpack/trunk/src/mlpack/core.h
   mlpack/trunk/src/mlpack/core/file/CMakeLists.txt
Log:
core/file to hpp & cpp


Modified: mlpack/trunk/src/mlpack/core/file/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/core/file/CMakeLists.txt	2011-10-31 15:54:31 UTC (rev 10090)
+++ mlpack/trunk/src/mlpack/core/file/CMakeLists.txt	2011-10-31 16:07:01 UTC (rev 10091)
@@ -3,8 +3,8 @@
 # Define the files that we need to compile.
 # Anything not in this list will not be compiled into MLPACK.
 set(SOURCES
-   textfile.h
-   textfile.cc
+   textfile.hpp
+   textfile.cpp
 )
 
 # add directory name to sources
@@ -18,7 +18,7 @@
 
 # test executable
 add_executable(textfile_test
-  textfile_test.cc
+  textfile_test.cpp
 )
 # dependencies of test executable
 target_link_libraries(textfile_test

Deleted: mlpack/trunk/src/mlpack/core/file/textfile.cc
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile.cc	2011-10-31 15:54:31 UTC (rev 10090)
+++ mlpack/trunk/src/mlpack/core/file/textfile.cc	2011-10-31 16:07:01 UTC (rev 10091)
@@ -1,418 +0,0 @@
-/**
- * @file textfile.cc
- *
- * Implementations for the text-based file I/O helper classes.
- *
- * @bug These routines fail when trying to read files linewise that use the Mac
- * eol '\r'.  Both Windows and Unix eol ("\r\n" and '\n') work.  Use the
- * programs 'dos2unix' or 'tr' to convert the '\r's to '\n's.
- *
- */
-
-#include "textfile.h"
-
-#include "../io/cli.hpp"
-#include "../io/log.hpp"
-#include <ctype.h>
-#include <iostream>
-
-/*
-char *TextTokenizer::ReadLine() {
-  char *buf = NULL;
-  size_t size = 0;
-  size_t len = 0;
-  const size_t extra = 64;
-  int c;
-
-  for (;;) {
-    c = getc(f_);
-
-    if (c == '\r') {
-      c = getc(f_);
-      if (c != '\n') {
-        ungetc(c, f_);
-      }
-      break;
-    } else if ((c == '\n') {
-      break;
-    } else if (c == EOF) {
-      if (len == 0) {
-        return NULL;
-      } else {
-        break;
-      }
-    }
-
-    len++;
-
-    if (size <= len) {
-      size = len * 2 + extra;
-      buf = mem::Realloc(buf, size);
-    }
-
-    buf[len-1] = c;
-  }
-
-  if (len == 0) {
-    // special case: empty line
-    buf = mem::Alloc<char>(1);
-  }
-
-  buf[len] = '\0';
-
-  return buf;
-}
-*/
-
-void TextLineReader::Error(const char *format, ...) {
-  va_list vl;
-
-  // TODO: Use a warning propagation system
-  fprintf(stderr, ".| %d: %s\nX|  `-> ", line_num_, line_.c_str());
-
-  va_start(vl, format);
-  vfprintf(stderr, format, vl);
-  va_end(vl);
-
-  fprintf(stderr, "\n");
-}
-
-bool TextLineReader::Open(const char *fname) {
-  f_ = fopen(fname, "r");
-  fname_ = fname;
-  line_num_ = 0;
-  has_line_ = false;
-
-  if (f_ == NULL) {
-    return false;
-  } else {
-    Gobble();
-    return true;
-  }
-}
-
-bool TextLineReader::Gobble() {
-  char *ptr = ReadLine_();
-
-  if (ptr != NULL) {
-    line_ = ptr;
-    has_line_ = true;
-    line_num_++;
-    delete[] ptr;
-    return true;
-  } else {
-    has_line_ = false;
-    delete[] ptr;
-    return false;
-  }
-}
-
-char *TextLineReader::ReadLine_() {
-  char *buf = NULL;
-  size_t size = 1;
-  size_t len = 0;
-#ifdef DEBUG
-  const size_t extra = 10;
-#else
-  const size_t extra = 80;
-#endif
-
-  for (;;) {
-    size = size * 2 + extra;
-
-    // Reallocate
-    char* newbuf = new char[size];
-    if (buf != NULL) {
-      memcpy(newbuf, buf, len * sizeof(char));
-      delete[] buf;
-    }
-    buf = newbuf;
-
-    //! doesn't handle mac eol - OK?
-    char *result = ::fgets(buf + len, size - len, f_);
-    if (len == 0 && result == NULL) {
-      delete[] buf;
-      return NULL;
-    }
-    len += strlen(buf + len);
-    if (buf[len - 1] == '\r') { // is there a following \n we didn't pick up?
-      // we'll peek at the next character and keep it if it's \n, but move the
-      // pointer back a position if it isn't
-      char tmp = fgetc(f_);
-      if(tmp == '\n') { // append to end
-        size++;
-
-        char* newbuf = new char[size * sizeof(char)];
-        memcpy(newbuf, buf, (size - 1) * sizeof(char));
-        delete[] buf;
-        buf = newbuf;
-        buf[len] = tmp;
-      } else {
-        // go back a character
-        fseek(f_, -1, SEEK_CUR);
-      }
-    }
-
-    if (len < size - 1 || buf[len - 1] == '\r' || buf[len - 1] == '\n') {
-      while (len && (buf[len-1] == '\r' || buf[len-1] == '\n')) {
-        len--;
-      }
-      buf[len] = '\0';
-      return buf;
-    }
-  }
-}
-
-bool TextTokenizer::Open(const char *fname,
-    const char *comment_chars_in, const char *ident_extra_in,
-    int features_in) {
-  next_ = "";
-  cur_ = "";
-  next_type_ = END;
-  cur_type_ = END;
-  comment_start_ = comment_chars_in;
-  features_ = features_in;
-  ident_extra_ = ident_extra_in;
-  line_ = 1;
-
-  f_ = fopen(fname, "r");
-
-  if (f_ == NULL) {
-    return false;
-  } else {
-    Gobble();
-    return true;
-  }
-}
-
-char TextTokenizer::NextChar_() {
-  int c = GetChar_();
-
-  if (c != EOF && (strchr(comment_start_, c) != NULL)) {
-    do {
-      c = GetChar_();
-    } while (c != EOF && c != '\r' && c != '\n');
-  }
-
-  if (c == EOF) {
-    c = 0;
-  }
-
-  return c;
-}
-
-char TextTokenizer::NextChar_(std::vector<char>& token) {
-  char c = NextChar_();
-
-  token.push_back(c);
-
-  return c;
-}
-
-char TextTokenizer::Skip_(std::vector<char>& token) {
-  int c;
-
-  while (1) {
-    c = NextChar_();
-    if (!isspace(c)) {
-      break;
-    }
-
-    if (c == '\r' || c == '\n') {
-      if (c == '\r') {
-        c = NextChar_();
-        if (c != '\n') {
-          Unget_(c);
-        }
-      }
-      line_++;
-      if ((features_ & WANT_NEWLINE)) {
-        c = '\n';
-        break;
-      }
-    }
-  }
-
-  token.push_back(char(c));
-
-  return char(c);
-}
-
-void TextTokenizer::UndoNextChar_(std::vector<char>& token) {
-  char c;
-  c = token.back();
-  token.pop_back();
-  if (c != 0) { /* don't put EOF back on the stream */
-    Unget_(c);
-  }
-}
-
-void Sanitize(const std::string& src, std::string& dest) {
-  for (size_t i = 0; i < src.length(); i++) {
-    char c = src[i];
-
-    if (isgraph(c) || c == ' ' || c == '\t') {
-      dest += c;
-    } else if (isspace(c)) {
-      dest += "<whitespace>";
-    } else {
-      dest += "<nonprint>";
-    }
-  }
-}
-
-void TextTokenizer::Error(const char *format, ...) {
-  va_list vl;
-  std::string cur_sanitized;
-  std::string next_sanitized;
-
-  Sanitize(cur_, cur_sanitized);
-  Sanitize(next_, next_sanitized);
-
-  // TODO: Use a warning propagation system
-  fprintf(stderr, ".| %d: %s <-HERE-> %s\nX|  `-> ", line_,
-      cur_sanitized.c_str(), next_sanitized.c_str());
-
-  va_start(vl, format);
-  vfprintf(stderr, format, vl);
-  va_end(vl);
-
-  fprintf(stderr, "\n");
-}
-
-void TextTokenizer::Error_(const char *msg, const std::vector<char>& token) {
-  next_type_ = INVALID;
-
-  printf("size is %zu, token[0] = %d\n", token.size(), token[0]);
-  Error("%s", msg);
-  next_.clear();
-}
-
-void TextTokenizer::ScanNumber_(char c, std::vector<char>& token) {
-  bool dot = false;
-  bool floating = false;
-
-  while (1) {
-    if (c == '.') {
-      /* handle a period */
-      if (dot) {
-        Error_("Multiple decimal points in a float", token);
-        return;
-      }
-      dot = true;
-      floating = true;
-    } else if (isdigit(c)) {
-      /* keep on processing digits */
-    } else if (c == 'e' || c == 'E') {
-      /* exponent - read exponent and finish */
-      c = NextChar_(token);
-      if (c == '+' || c == '-') {
-        c = NextChar_(token);
-      }
-      while (isdigit(c)) {
-        c = NextChar_(token);
-      }
-      floating = true;
-      break;
-    } else {
-      /* non numeric */
-      break;
-    }
-
-    c = NextChar_(token);
-  }
-
-  if (c == 'f' || c == 'F') {
-    // It's labelled a float.  Gobble and go.
-    floating = true;
-  } else if (isspace(c) || ispunct(c)) {
-    UndoNextChar_(token);
-  } else {
-    Error_("Invalid character while parsing number", token);
-  }
-
-  if (floating) {
-    next_type_ = DOUBLE;
-  } else {
-    next_type_ = INTEGER;
-  }
-}
-
-void TextTokenizer::ScanString_(char ending, std::vector<char>& token) {
-  int c;
-
-  while (1) {
-    c = NextChar_(token);
-
-    if (c == 0) {
-      Error_("Unterminated String", token);
-      UndoNextChar_(token);
-      return;
-    }
-
-    if (c == ending) {
-      next_type_ = STRING;
-      return;
-    }
-  }
-}
-
-void TextTokenizer::Scan_(std::vector<char>& token) {
-  char c = Skip_(token);
-
-  if (c == 0) {
-    // Makes token's capacity = 0
-    token.clear();
-    next_type_ = END;
-    return;
-  } else if (c == '.' || isdigit(c)) {
-    ScanNumber_(c, token);
-  } else if (isident_begin_(c)) {
-    while (isident_rest_(NextChar_(token))) {}
-    UndoNextChar_(token);
-    next_type_ = IDENTIFIER;
-  } else if (ispunct(c) || isspace(c)) {
-    if (c == '"' || c == '\'') {
-      ScanString_(c, token);
-    } else if (c == '+' || c == '-') {
-      c = NextChar_(token);
-      if (c == '.' || isdigit(c)) {
-        ScanNumber_(c, token);
-      } else {
-        UndoNextChar_(token);
-      }
-    } else {
-      next_type_ = PUNCT;
-    }
-  } else {
-    Error_("Unknown Character", token);
-  }
-}
-
-void TextTokenizer::Gobble() {
-
-  cur_.assign(next_);
-  next_.clear();
-  cur_type_ = next_type_;
-
-  std::vector<char> token;
-  Scan_(token);
-  token.push_back('\0');
-
-  next_ = &token.front();
-
-  mlpack::Log::Assert(next_.length() == strlen(next_.c_str()));
-}
-
-bool TextWriter::Printf(const char *format, ...) {
-  int rv;
-
-  va_list vl;
-
-  va_start(vl, format);
-  rv = vfprintf(f_, format, vl);
-  va_end(vl);
-
-  return (rv > 0);
-}
-

Copied: mlpack/trunk/src/mlpack/core/file/textfile.cpp (from rev 10083, mlpack/trunk/src/mlpack/core/file/textfile.cc)
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile.cpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/file/textfile.cpp	2011-10-31 16:07:01 UTC (rev 10091)
@@ -0,0 +1,420 @@
+/**
+ * @file textfile.cpp
+ *
+ * Implementations for the text-based file I/O helper classes.
+ *
+ * @bug These routines fail when trying to read files linewise that use the Mac
+ * eol '\r'.  Both Windows and Unix eol ("\r\n" and '\n') work.  Use the
+ * programs 'dos2unix' or 'tr' to convert the '\r's to '\n's.
+ *
+ */
+
+#include "textfile.hpp"
+
+#include "../io/cli.hpp"
+#include "../io/log.hpp"
+#include <ctype.h>
+#include <iostream>
+
+using namespace mlpack;
+
+/*
+char *TextTokenizer::ReadLine() {
+  char *buf = NULL;
+  size_t size = 0;
+  size_t len = 0;
+  const size_t extra = 64;
+  int c;
+
+  for (;;) {
+    c = getc(f_);
+
+    if (c == '\r') {
+      c = getc(f_);
+      if (c != '\n') {
+        ungetc(c, f_);
+      }
+      break;
+    } else if ((c == '\n') {
+      break;
+    } else if (c == EOF) {
+      if (len == 0) {
+        return NULL;
+      } else {
+        break;
+      }
+    }
+
+    len++;
+
+    if (size <= len) {
+      size = len * 2 + extra;
+      buf = mem::Realloc(buf, size);
+    }
+
+    buf[len-1] = c;
+  }
+
+  if (len == 0) {
+    // special case: empty line
+    buf = mem::Alloc<char>(1);
+  }
+
+  buf[len] = '\0';
+
+  return buf;
+}
+*/
+
+void TextLineReader::Error(const char *format, ...) {
+  va_list vl;
+
+  // TODO: Use a warning propagation system
+  fprintf(stderr, ".| %d: %s\nX|  `-> ", line_num_, line_.c_str());
+
+  va_start(vl, format);
+  vfprintf(stderr, format, vl);
+  va_end(vl);
+
+  fprintf(stderr, "\n");
+}
+
+bool TextLineReader::Open(const char *fname) {
+  f_ = fopen(fname, "r");
+  fname_ = fname;
+  line_num_ = 0;
+  has_line_ = false;
+
+  if (f_ == NULL) {
+    return false;
+  } else {
+    Gobble();
+    return true;
+  }
+}
+
+bool TextLineReader::Gobble() {
+  char *ptr = ReadLine_();
+
+  if (ptr != NULL) {
+    line_ = ptr;
+    has_line_ = true;
+    line_num_++;
+    delete[] ptr;
+    return true;
+  } else {
+    has_line_ = false;
+    delete[] ptr;
+    return false;
+  }
+}
+
+char *TextLineReader::ReadLine_() {
+  char *buf = NULL;
+  size_t size = 1;
+  size_t len = 0;
+#ifdef DEBUG
+  const size_t extra = 10;
+#else
+  const size_t extra = 80;
+#endif
+
+  for (;;) {
+    size = size * 2 + extra;
+
+    // Reallocate
+    char* newbuf = new char[size];
+    if (buf != NULL) {
+      memcpy(newbuf, buf, len * sizeof(char));
+      delete[] buf;
+    }
+    buf = newbuf;
+
+    //! doesn't handle mac eol - OK?
+    char *result = ::fgets(buf + len, size - len, f_);
+    if (len == 0 && result == NULL) {
+      delete[] buf;
+      return NULL;
+    }
+    len += strlen(buf + len);
+    if (buf[len - 1] == '\r') { // is there a following \n we didn't pick up?
+      // we'll peek at the next character and keep it if it's \n, but move the
+      // pointer back a position if it isn't
+      char tmp = fgetc(f_);
+      if(tmp == '\n') { // append to end
+        size++;
+
+        char* newbuf = new char[size * sizeof(char)];
+        memcpy(newbuf, buf, (size - 1) * sizeof(char));
+        delete[] buf;
+        buf = newbuf;
+        buf[len] = tmp;
+      } else {
+        // go back a character
+        fseek(f_, -1, SEEK_CUR);
+      }
+    }
+
+    if (len < size - 1 || buf[len - 1] == '\r' || buf[len - 1] == '\n') {
+      while (len && (buf[len-1] == '\r' || buf[len-1] == '\n')) {
+        len--;
+      }
+      buf[len] = '\0';
+      return buf;
+    }
+  }
+}
+
+bool TextTokenizer::Open(const char *fname,
+    const char *comment_chars_in, const char *ident_extra_in,
+    int features_in) {
+  next_ = "";
+  cur_ = "";
+  next_type_ = END;
+  cur_type_ = END;
+  comment_start_ = comment_chars_in;
+  features_ = features_in;
+  ident_extra_ = ident_extra_in;
+  line_ = 1;
+
+  f_ = fopen(fname, "r");
+
+  if (f_ == NULL) {
+    return false;
+  } else {
+    Gobble();
+    return true;
+  }
+}
+
+char TextTokenizer::NextChar_() {
+  int c = GetChar_();
+
+  if (c != EOF && (strchr(comment_start_, c) != NULL)) {
+    do {
+      c = GetChar_();
+    } while (c != EOF && c != '\r' && c != '\n');
+  }
+
+  if (c == EOF) {
+    c = 0;
+  }
+
+  return c;
+}
+
+char TextTokenizer::NextChar_(std::vector<char>& token) {
+  char c = NextChar_();
+
+  token.push_back(c);
+
+  return c;
+}
+
+char TextTokenizer::Skip_(std::vector<char>& token) {
+  int c;
+
+  while (1) {
+    c = NextChar_();
+    if (!isspace(c)) {
+      break;
+    }
+
+    if (c == '\r' || c == '\n') {
+      if (c == '\r') {
+        c = NextChar_();
+        if (c != '\n') {
+          Unget_(c);
+        }
+      }
+      line_++;
+      if ((features_ & WANT_NEWLINE)) {
+        c = '\n';
+        break;
+      }
+    }
+  }
+
+  token.push_back(char(c));
+
+  return char(c);
+}
+
+void TextTokenizer::UndoNextChar_(std::vector<char>& token) {
+  char c;
+  c = token.back();
+  token.pop_back();
+  if (c != 0) { /* don't put EOF back on the stream */
+    Unget_(c);
+  }
+}
+
+void Sanitize(const std::string& src, std::string& dest) {
+  for (size_t i = 0; i < src.length(); i++) {
+    char c = src[i];
+
+    if (isgraph(c) || c == ' ' || c == '\t') {
+      dest += c;
+    } else if (isspace(c)) {
+      dest += "<whitespace>";
+    } else {
+      dest += "<nonprint>";
+    }
+  }
+}
+
+void TextTokenizer::Error(const char *format, ...) {
+  va_list vl;
+  std::string cur_sanitized;
+  std::string next_sanitized;
+
+  Sanitize(cur_, cur_sanitized);
+  Sanitize(next_, next_sanitized);
+
+  // TODO: Use a warning propagation system
+  fprintf(stderr, ".| %d: %s <-HERE-> %s\nX|  `-> ", line_,
+      cur_sanitized.c_str(), next_sanitized.c_str());
+
+  va_start(vl, format);
+  vfprintf(stderr, format, vl);
+  va_end(vl);
+
+  fprintf(stderr, "\n");
+}
+
+void TextTokenizer::Error_(const char *msg, const std::vector<char>& token) {
+  next_type_ = INVALID;
+
+  printf("size is %zu, token[0] = %d\n", token.size(), token[0]);
+  Error("%s", msg);
+  next_.clear();
+}
+
+void TextTokenizer::ScanNumber_(char c, std::vector<char>& token) {
+  bool dot = false;
+  bool floating = false;
+
+  while (1) {
+    if (c == '.') {
+      /* handle a period */
+      if (dot) {
+        Error_("Multiple decimal points in a float", token);
+        return;
+      }
+      dot = true;
+      floating = true;
+    } else if (isdigit(c)) {
+      /* keep on processing digits */
+    } else if (c == 'e' || c == 'E') {
+      /* exponent - read exponent and finish */
+      c = NextChar_(token);
+      if (c == '+' || c == '-') {
+        c = NextChar_(token);
+      }
+      while (isdigit(c)) {
+        c = NextChar_(token);
+      }
+      floating = true;
+      break;
+    } else {
+      /* non numeric */
+      break;
+    }
+
+    c = NextChar_(token);
+  }
+
+  if (c == 'f' || c == 'F') {
+    // It's labelled a float.  Gobble and go.
+    floating = true;
+  } else if (isspace(c) || ispunct(c)) {
+    UndoNextChar_(token);
+  } else {
+    Error_("Invalid character while parsing number", token);
+  }
+
+  if (floating) {
+    next_type_ = DOUBLE;
+  } else {
+    next_type_ = INTEGER;
+  }
+}
+
+void TextTokenizer::ScanString_(char ending, std::vector<char>& token) {
+  int c;
+
+  while (1) {
+    c = NextChar_(token);
+
+    if (c == 0) {
+      Error_("Unterminated String", token);
+      UndoNextChar_(token);
+      return;
+    }
+
+    if (c == ending) {
+      next_type_ = STRING;
+      return;
+    }
+  }
+}
+
+void TextTokenizer::Scan_(std::vector<char>& token) {
+  char c = Skip_(token);
+
+  if (c == 0) {
+    // Makes token's capacity = 0
+    token.clear();
+    next_type_ = END;
+    return;
+  } else if (c == '.' || isdigit(c)) {
+    ScanNumber_(c, token);
+  } else if (isident_begin_(c)) {
+    while (isident_rest_(NextChar_(token))) {}
+    UndoNextChar_(token);
+    next_type_ = IDENTIFIER;
+  } else if (ispunct(c) || isspace(c)) {
+    if (c == '"' || c == '\'') {
+      ScanString_(c, token);
+    } else if (c == '+' || c == '-') {
+      c = NextChar_(token);
+      if (c == '.' || isdigit(c)) {
+        ScanNumber_(c, token);
+      } else {
+        UndoNextChar_(token);
+      }
+    } else {
+      next_type_ = PUNCT;
+    }
+  } else {
+    Error_("Unknown Character", token);
+  }
+}
+
+void TextTokenizer::Gobble() {
+
+  cur_.assign(next_);
+  next_.clear();
+  cur_type_ = next_type_;
+
+  std::vector<char> token;
+  Scan_(token);
+  token.push_back('\0');
+
+  next_ = &token.front();
+
+  mlpack::Log::Assert(next_.length() == strlen(next_.c_str()));
+}
+
+bool TextWriter::Printf(const char *format, ...) {
+  int rv;
+
+  va_list vl;
+
+  va_start(vl, format);
+  rv = vfprintf(f_, format, vl);
+  va_end(vl);
+
+  return (rv > 0);
+}
+

Deleted: mlpack/trunk/src/mlpack/core/file/textfile.h
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile.h	2011-10-31 15:54:31 UTC (rev 10090)
+++ mlpack/trunk/src/mlpack/core/file/textfile.h	2011-10-31 16:07:01 UTC (rev 10091)
@@ -1,387 +0,0 @@
-/**
- * @file textfile.h
- *
- * Small wrappers for text files.
- * The most useful thing here is the ReadLine function.
- *
- * @bug These routines fail when trying to read files linewise that use the Mac
- * eol '\r'.  Both Windows and Unix eol ("\r\n" and '\n') work.  Use the
- * programs 'dos2unix' or 'tr' to convert the '\r's to '\n's.
- *
- */
-
-#ifndef FILE_TEXTFILE_H
-#define FILE_TEXTFILE_H
-
-#include "../io/cli.hpp"
-#include "../io/log.hpp"
-
-#include <cstdio>
-#include <ctype.h>
-#include <stdarg.h>
-
-#include <string>
-#include <vector>
-
-/**
- * Helper for reading text files.
- *
- * Files are closed automatically when they fall out of scope, though
- * you may choose to close it yourself at no harm.
- */
-class TextLineReader {
-
- private:
-  FILE *f_;
-  std::string line_;
-  int line_num_;
-  bool has_line_;
-  std::string fname_;
-
- public:
-  /** Creates an unitialized object. */
-  TextLineReader() {
-    f_ = NULL;
-  }
-
-  /**
-   * Automatically closes the file.
-   */
-  ~TextLineReader() {
-    if (f_)
-      ::fclose(f_);
-  }
-
-  /**
-   * Opens a file.
-   *
-   * @return success value
-   */
-  bool Open(const char *fname);
-
-  /**
-   * Closes the file.
-   *
-   * (No need for a return value since you are only reading the file.)
-   */
-  void Close() {
-    (void)fclose(f_);
-    f_ = NULL;
-  }
-
-  /**
-   * Return the name of the file we are working with.
-   * This will return NULL if no file has been opened yet.
-   */
-  const std::string& filename() const {
-    return fname_;
-  }
-
-  /**
-   * Are there more lines left?
-   */
-  bool MoreLines() {
-    return has_line_;
-  }
-
-  /**
-   * Returns the currnet line number.
-   */
-  int line_num() const {
-    return line_num_;
-  }
-
-  /**
-   * Gets the current line.
-   */
-  std::string& Peek() {
-    return line_;
-  }
-
-  /**
-   * Gets the current line.
-   */
-  const std::string& Peek() const {
-    return line_;
-  }
-
-  /**
-   * Tries to read one line from a file.
-   *
-   * @return true if a line was returned, false if end of file
-   */
-  bool Gobble();
-
-  /**
-   * Prints a formatted error message with line number info.
-   */
-  __attribute__((format(printf, 2, 3))) void Error(const char *msg, ...);
-
- private:
-  char *ReadLine_();
-};
-
-/**
- * Simple text tokenizer.
- *
- * This tokenizes the input stream.  It will identify the general type of
- * token, and ignore whitespace and different kinds of comments.
- *
- * This has the concept of 'next' token which you can peek at, and the
- * current token.  The current token is always initialized to empty at
- * the very beginning, because you are encouraged to peek ahead one token.
- *
- * Example:
- *
- * @code
- * TextTokenizer tokenizer;
- * tokenizer.Open("file.txt");
- *
- * if (tokenizer->Match("count")) {
- *   if (tokenizer->Match(TextTokenizer::INTEGER)) {
- *     printf("Found number: %d\n", atoi(tokenizer->Current()));
- *   } else { Error(); }
- * } else { Error(); }
- * @endcode
- */
-class TextTokenizer {
- public:
-  enum TokenType {
-    INVALID = -1,
-    END,
-    PUNCT,
-    IDENTIFIER,
-    STRING,
-    DOUBLE,
-    INTEGER
-  };
-
-  enum Features {
-    WANT_NEWLINE = 0x01
-  };
-
- private:
-  FILE *f_;
-  std::string next_;
-  TokenType next_type_;
-  std::string cur_;
-  TokenType cur_type_;
-  const char *comment_start_;
-  const char *ident_extra_;
-  int features_;
-  int line_;
-
- public:
-  TextTokenizer() {
-    f_ = NULL;
-  }
-  ~TextTokenizer() {
-    if (f_ != NULL) {
-      (void) fclose(f_);
-    }
-    f_ = NULL;
-  }
-
-  bool Open(const char *fname,
-      const char *comment_chars = "", const char *ident_extra = "",
-      int features = 0);
-
-
-  const std::string& Peek() const {
-    return next_;
-  }
-
-  TokenType PeekType() const {
-    return next_type_;
-  }
-
-  const std::string& Current() const {
-    return cur_;
-  }
-
-  TokenType CurrentType() const {
-    return cur_type_;
-  }
-
-  void Gobble();
-
-  bool MoreTokens() const {
-    return next_type_ != END;
-  }
-
-  bool Match(const std::string exact) {
-    if (next_ == exact) {
-      Gobble();
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  bool MatchInteger() {
-    return MatchType(INTEGER);
-  }
-
-  bool MatchDouble() {
-    return MatchType(DOUBLE);
-  }
-
-  bool MatchNumber() {
-    return MatchInteger() || MatchDouble();
-  }
-
-  bool MatchIdentifier() {
-    return MatchType(IDENTIFIER);
-  }
-
-  bool MatchQuasiIdentifier() {
-    return MatchIdentifier() || MatchNumber();
-  }
-
-  bool MatchString() {
-    return MatchType(STRING);
-  }
-
-  bool MatchPunct() {
-    return MatchType(PUNCT);
-  }
-
-  bool MatchType(TokenType type) {
-    if (next_type_ == type) {
-      Gobble();
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  int line() const {
-    return line_;
-  }
-
-  __attribute__((format(printf, 2, 3))) void Error(const char *msg, ...);
-
- private:
-  int GetChar_() {
-    return ::getc(f_);
-  }
-
-  void Unget_(int c) {
-    ::ungetc(c, f_);
-  }
-
-  bool IsEOF_() {
-    return ::feof(f_);
-  }
-
-  char Skip_(std::vector<char>& token);
-
-  char NextChar_(std::vector<char>& token);
-
-  char NextChar_();
-
-  void UndoNextChar_(std::vector<char>& token);
-
-  void Error_(const char *msg, const std::vector<char>& token);
-
-  bool isident_begin_(int c) const {
-    return isalpha(c) || (c == '_');
-  }
-
-  bool isident_rest_(int c) const {
-    return isalnum(c) || (c == '_') || (c != 0 && strchr(ident_extra_, c));
-  }
-
-  void ScanNumber_(char c, std::vector<char>& token);
-
-  void ScanString_(char ending, std::vector<char>& token);
-
-  void Scan_(std::vector<char>& token);
-};
-
-/**
- * Helper for writing text fo a file.
- */
-class TextWriter {
-
- private:
-  FILE *f_;
-
- public:
-  /**
-   * Creates an uninitialized text writer (you must initialize it).
-   */
-  TextWriter() {
-    f_ = NULL;
-  }
-
-  /**
-   * Automatically closes the file when it gets out of scope; for
-   * best error handling, you should call Close first.
-   *
-   * If you do not explicitly close the file beforehand, this will abort the
-   * program on a write error.
-   */
-  ~TextWriter() {
-    if (f_) {
-      mlpack::Log::Assert(fclose(f_) >= 0);
-      mlpack::Log::Assert(fclose(f_) >= 0, "File close failed!");
-    }
-    f_ = NULL;
-  }
-
-  /**
-   * Opens a file by name (initializer).
-   *
-   * @return success or failure
-   */
-  bool Open(const char *fname) {
-    f_ = ::fopen(fname, "w");
-    return (!f_) ? false : true;
-  }
-
-  /**
-   * Explicitly closes the file.
-   */
-  bool Close() {
-    int rv = fclose(f_);
-    f_ = NULL;
-    return (rv < 0) ? false : true;
-  }
-
-  bool Printf(const char *format, ...);
-
-  bool Write(const char *s) {
-    return (fputs(s, f_) > 0);
-  }
-
-  bool Write(int i) {
-    return (fprintf(f_, "%d", i) > 0);
-  }
-
-  bool Write(unsigned int i) {
-    return (fprintf(f_, "%u", i) > 0);
-  }
-
-  bool Write(long i) {
-    return (fprintf(f_, "%ld", i) > 0);
-  }
-
-  bool Write(unsigned long i) {
-    return (fprintf(f_, "%lu", i) > 0);
-  }
-
-  bool Write(long long i) {
-    return (fprintf(f_, "%lld", i) > 0);
-  }
-
-  bool Write(unsigned long long i) {
-    return (fprintf(f_, "%llu", i) > 0);
-  }
-
-  bool Write(double d) {
-    return (fprintf(f_, "%.15e", d) > 0);
-  }
-};
-
-#endif

Copied: mlpack/trunk/src/mlpack/core/file/textfile.hpp (from rev 10083, mlpack/trunk/src/mlpack/core/file/textfile.h)
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/file/textfile.hpp	2011-10-31 16:07:01 UTC (rev 10091)
@@ -0,0 +1,391 @@
+/**
+ * @file textfile.hpp
+ *
+ * Small wrappers for text files.
+ * The most useful thing here is the ReadLine function.
+ *
+ * @bug These routines fail when trying to read files linewise that use the Mac
+ * eol '\r'.  Both Windows and Unix eol ("\r\n" and '\n') work.  Use the
+ * programs 'dos2unix' or 'tr' to convert the '\r's to '\n's.
+ *
+ */
+
+#ifndef __MLPACK_CORE_FILE_TEXTFILE_HPP
+#define __MLPACK_CORE_FILE_TEXTFILE_HPP
+
+#include "../io/cli.hpp"
+#include "../io/log.hpp"
+
+#include <cstdio>
+#include <ctype.h>
+#include <stdarg.h>
+
+#include <string>
+#include <vector>
+
+namespace mlpack {
+
+/**
+ * Helper for reading text files.
+ *
+ * Files are closed automatically when they fall out of scope, though
+ * you may choose to close it yourself at no harm.
+ */
+class TextLineReader {
+
+ private:
+  FILE *f_;
+  std::string line_;
+  int line_num_;
+  bool has_line_;
+  std::string fname_;
+
+ public:
+  /** Creates an unitialized object. */
+  TextLineReader() {
+    f_ = NULL;
+  }
+
+  /**
+   * Automatically closes the file.
+   */
+  ~TextLineReader() {
+    if (f_)
+      ::fclose(f_);
+  }
+
+  /**
+   * Opens a file.
+   *
+   * @return success value
+   */
+  bool Open(const char *fname);
+
+  /**
+   * Closes the file.
+   *
+   * (No need for a return value since you are only reading the file.)
+   */
+  void Close() {
+    (void)fclose(f_);
+    f_ = NULL;
+  }
+
+  /**
+   * Return the name of the file we are working with.
+   * This will return NULL if no file has been opened yet.
+   */
+  const std::string& filename() const {
+    return fname_;
+  }
+
+  /**
+   * Are there more lines left?
+   */
+  bool MoreLines() {
+    return has_line_;
+  }
+
+  /**
+   * Returns the currnet line number.
+   */
+  int line_num() const {
+    return line_num_;
+  }
+
+  /**
+   * Gets the current line.
+   */
+  std::string& Peek() {
+    return line_;
+  }
+
+  /**
+   * Gets the current line.
+   */
+  const std::string& Peek() const {
+    return line_;
+  }
+
+  /**
+   * Tries to read one line from a file.
+   *
+   * @return true if a line was returned, false if end of file
+   */
+  bool Gobble();
+
+  /**
+   * Prints a formatted error message with line number info.
+   */
+  __attribute__((format(printf, 2, 3))) void Error(const char *msg, ...);
+
+ private:
+  char *ReadLine_();
+};
+
+/**
+ * Simple text tokenizer.
+ *
+ * This tokenizes the input stream.  It will identify the general type of
+ * token, and ignore whitespace and different kinds of comments.
+ *
+ * This has the concept of 'next' token which you can peek at, and the
+ * current token.  The current token is always initialized to empty at
+ * the very beginning, because you are encouraged to peek ahead one token.
+ *
+ * Example:
+ *
+ * @code
+ * TextTokenizer tokenizer;
+ * tokenizer.Open("file.txt");
+ *
+ * if (tokenizer->Match("count")) {
+ *   if (tokenizer->Match(TextTokenizer::INTEGER)) {
+ *     printf("Found number: %d\n", atoi(tokenizer->Current()));
+ *   } else { Error(); }
+ * } else { Error(); }
+ * @endcode
+ */
+class TextTokenizer {
+ public:
+  enum TokenType {
+    INVALID = -1,
+    END,
+    PUNCT,
+    IDENTIFIER,
+    STRING,
+    DOUBLE,
+    INTEGER
+  };
+
+  enum Features {
+    WANT_NEWLINE = 0x01
+  };
+
+ private:
+  FILE *f_;
+  std::string next_;
+  TokenType next_type_;
+  std::string cur_;
+  TokenType cur_type_;
+  const char *comment_start_;
+  const char *ident_extra_;
+  int features_;
+  int line_;
+
+ public:
+  TextTokenizer() {
+    f_ = NULL;
+  }
+  ~TextTokenizer() {
+    if (f_ != NULL) {
+      (void) fclose(f_);
+    }
+    f_ = NULL;
+  }
+
+  bool Open(const char *fname,
+      const char *comment_chars = "", const char *ident_extra = "",
+      int features = 0);
+
+
+  const std::string& Peek() const {
+    return next_;
+  }
+
+  TokenType PeekType() const {
+    return next_type_;
+  }
+
+  const std::string& Current() const {
+    return cur_;
+  }
+
+  TokenType CurrentType() const {
+    return cur_type_;
+  }
+
+  void Gobble();
+
+  bool MoreTokens() const {
+    return next_type_ != END;
+  }
+
+  bool Match(const std::string exact) {
+    if (next_ == exact) {
+      Gobble();
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  bool MatchInteger() {
+    return MatchType(INTEGER);
+  }
+
+  bool MatchDouble() {
+    return MatchType(DOUBLE);
+  }
+
+  bool MatchNumber() {
+    return MatchInteger() || MatchDouble();
+  }
+
+  bool MatchIdentifier() {
+    return MatchType(IDENTIFIER);
+  }
+
+  bool MatchQuasiIdentifier() {
+    return MatchIdentifier() || MatchNumber();
+  }
+
+  bool MatchString() {
+    return MatchType(STRING);
+  }
+
+  bool MatchPunct() {
+    return MatchType(PUNCT);
+  }
+
+  bool MatchType(TokenType type) {
+    if (next_type_ == type) {
+      Gobble();
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  int line() const {
+    return line_;
+  }
+
+  __attribute__((format(printf, 2, 3))) void Error(const char *msg, ...);
+
+ private:
+  int GetChar_() {
+    return ::getc(f_);
+  }
+
+  void Unget_(int c) {
+    ::ungetc(c, f_);
+  }
+
+  bool IsEOF_() {
+    return ::feof(f_);
+  }
+
+  char Skip_(std::vector<char>& token);
+
+  char NextChar_(std::vector<char>& token);
+
+  char NextChar_();
+
+  void UndoNextChar_(std::vector<char>& token);
+
+  void Error_(const char *msg, const std::vector<char>& token);
+
+  bool isident_begin_(int c) const {
+    return isalpha(c) || (c == '_');
+  }
+
+  bool isident_rest_(int c) const {
+    return isalnum(c) || (c == '_') || (c != 0 && strchr(ident_extra_, c));
+  }
+
+  void ScanNumber_(char c, std::vector<char>& token);
+
+  void ScanString_(char ending, std::vector<char>& token);
+
+  void Scan_(std::vector<char>& token);
+};
+
+/**
+ * Helper for writing text fo a file.
+ */
+class TextWriter {
+
+ private:
+  FILE *f_;
+
+ public:
+  /**
+   * Creates an uninitialized text writer (you must initialize it).
+   */
+  TextWriter() {
+    f_ = NULL;
+  }
+
+  /**
+   * Automatically closes the file when it gets out of scope; for
+   * best error handling, you should call Close first.
+   *
+   * If you do not explicitly close the file beforehand, this will abort the
+   * program on a write error.
+   */
+  ~TextWriter() {
+    if (f_) {
+      mlpack::Log::Assert(fclose(f_) >= 0);
+      mlpack::Log::Assert(fclose(f_) >= 0, "File close failed!");
+    }
+    f_ = NULL;
+  }
+
+  /**
+   * Opens a file by name (initializer).
+   *
+   * @return success or failure
+   */
+  bool Open(const char *fname) {
+    f_ = ::fopen(fname, "w");
+    return (!f_) ? false : true;
+  }
+
+  /**
+   * Explicitly closes the file.
+   */
+  bool Close() {
+    int rv = fclose(f_);
+    f_ = NULL;
+    return (rv < 0) ? false : true;
+  }
+
+  bool Printf(const char *format, ...);
+
+  bool Write(const char *s) {
+    return (fputs(s, f_) > 0);
+  }
+
+  bool Write(int i) {
+    return (fprintf(f_, "%d", i) > 0);
+  }
+
+  bool Write(unsigned int i) {
+    return (fprintf(f_, "%u", i) > 0);
+  }
+
+  bool Write(long i) {
+    return (fprintf(f_, "%ld", i) > 0);
+  }
+
+  bool Write(unsigned long i) {
+    return (fprintf(f_, "%lu", i) > 0);
+  }
+
+  bool Write(long long i) {
+    return (fprintf(f_, "%lld", i) > 0);
+  }
+
+  bool Write(unsigned long long i) {
+    return (fprintf(f_, "%llu", i) > 0);
+  }
+
+  bool Write(double d) {
+    return (fprintf(f_, "%.15e", d) > 0);
+  }
+};
+
+}; // namespace mlpack
+
+#endif // __MLPACK_CORE_FILE_TEXTFILE_HPP

Deleted: mlpack/trunk/src/mlpack/core/file/textfile_test.cc
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile_test.cc	2011-10-31 15:54:31 UTC (rev 10090)
+++ mlpack/trunk/src/mlpack/core/file/textfile_test.cc	2011-10-31 16:07:01 UTC (rev 10091)
@@ -1,58 +0,0 @@
-#include "textfile.h"
-#include <math.h>
-
-#define BOOST_TEST_MODULE TextFileTest
-#include <boost/test/unit_test.hpp>
-
-/*void Test1() {
-  TextTokenizer scanner;
-  const char *input = xrun_param_str("input");
-  
-  scanner.Open(input, "#", "", TextTokenizer::WANT_NEWLINE);
-  
-  while (scanner.PeekType() != TextTokenizer::END) {
-    fprintf(stderr, "Got: %d, [%s]\n", scanner.PeekType(), scanner.Peek().c_str());
-    scanner.Gobble();
-  }
-}*/
-
-BOOST_AUTO_TEST_CASE(Test2) {
-  const char *fname = "tmpfile.txt";
-  TextWriter writer;
-  
-  writer.Open(fname);
-  writer.Printf("@begin(1, 1.0, 1.0e-31, abc-123, \"123\", '123') # comment here\r");
-  writer.Printf("@end(2.0e-21)\r\nabc");
-  writer.Close();
-  
-  TextTokenizer scanner;
-  scanner.Open(fname, "#", "-", TextTokenizer::WANT_NEWLINE);
-
-  BOOST_REQUIRE(scanner.MatchPunct());
-  BOOST_REQUIRE(scanner.MatchIdentifier());
-  BOOST_REQUIRE(scanner.MatchPunct());
-  BOOST_REQUIRE(scanner.MatchInteger());
-  BOOST_REQUIRE(scanner.Match(","));
-  BOOST_REQUIRE(scanner.MatchDouble());
-  BOOST_REQUIRE(scanner.Match(","));
-  BOOST_REQUIRE(scanner.MatchDouble());
-  BOOST_REQUIRE(scanner.Match(","));
-  BOOST_REQUIRE(scanner.MatchType(TextTokenizer::IDENTIFIER));
-  BOOST_REQUIRE(scanner.Match(","));
-  BOOST_REQUIRE(scanner.MatchString());
-  BOOST_REQUIRE(scanner.Match(","));
-  BOOST_REQUIRE(scanner.MatchString());
-  BOOST_REQUIRE(scanner.MatchPunct());
-  BOOST_REQUIRE(scanner.Match("\n"));
-  BOOST_REQUIRE(scanner.Match("@"));
-  BOOST_REQUIRE(scanner.Match("end"));
-  BOOST_REQUIRE(scanner.Match("("));
-  BOOST_REQUIRE(scanner.Match("2.0e-21"));
-  //assert(scanner.Current() == "2.0e-21");
-  BOOST_REQUIRE(scanner.Current() == "2.0e-21");
-  //assert(fabs(strtod(scanner.Current().c_str(), NULL) - 2.0e-21) < 1.0e-30);
-  BOOST_REQUIRE_CLOSE(strtod(scanner.Current().c_str(), NULL),2.0e-21, 1e-5);
-  BOOST_REQUIRE(scanner.Match(")"));
-  BOOST_REQUIRE(scanner.Match("\n"));
-  BOOST_REQUIRE(scanner.Match("abc"));
-}

Copied: mlpack/trunk/src/mlpack/core/file/textfile_test.cpp (from rev 10083, mlpack/trunk/src/mlpack/core/file/textfile_test.cc)
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile_test.cpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/file/textfile_test.cpp	2011-10-31 16:07:01 UTC (rev 10091)
@@ -0,0 +1,59 @@
+#include "textfile.hpp"
+#include <math.h>
+
+#define BOOST_TEST_MODULE TextFileTest
+#include <boost/test/unit_test.hpp>
+
+/*void Test1() {
+  TextTokenizer scanner;
+  const char *input = xrun_param_str("input");
+  
+  scanner.Open(input, "#", "", TextTokenizer::WANT_NEWLINE);
+  
+  while (scanner.PeekType() != TextTokenizer::END) {
+    fprintf(stderr, "Got: %d, [%s]\n", scanner.PeekType(), scanner.Peek().c_str());
+    scanner.Gobble();
+  }
+}*/
+
+BOOST_AUTO_TEST_CASE(Test2) {
+  using namespace mlpack;
+  const char *fname = "tmpfile.txt";
+  TextWriter writer;
+  
+  writer.Open(fname);
+  writer.Printf("@begin(1, 1.0, 1.0e-31, abc-123, \"123\", '123') # comment here\r");
+  writer.Printf("@end(2.0e-21)\r\nabc");
+  writer.Close();
+  
+  TextTokenizer scanner;
+  scanner.Open(fname, "#", "-", TextTokenizer::WANT_NEWLINE);
+
+  BOOST_REQUIRE(scanner.MatchPunct());
+  BOOST_REQUIRE(scanner.MatchIdentifier());
+  BOOST_REQUIRE(scanner.MatchPunct());
+  BOOST_REQUIRE(scanner.MatchInteger());
+  BOOST_REQUIRE(scanner.Match(","));
+  BOOST_REQUIRE(scanner.MatchDouble());
+  BOOST_REQUIRE(scanner.Match(","));
+  BOOST_REQUIRE(scanner.MatchDouble());
+  BOOST_REQUIRE(scanner.Match(","));
+  BOOST_REQUIRE(scanner.MatchType(TextTokenizer::IDENTIFIER));
+  BOOST_REQUIRE(scanner.Match(","));
+  BOOST_REQUIRE(scanner.MatchString());
+  BOOST_REQUIRE(scanner.Match(","));
+  BOOST_REQUIRE(scanner.MatchString());
+  BOOST_REQUIRE(scanner.MatchPunct());
+  BOOST_REQUIRE(scanner.Match("\n"));
+  BOOST_REQUIRE(scanner.Match("@"));
+  BOOST_REQUIRE(scanner.Match("end"));
+  BOOST_REQUIRE(scanner.Match("("));
+  BOOST_REQUIRE(scanner.Match("2.0e-21"));
+  //assert(scanner.Current() == "2.0e-21");
+  BOOST_REQUIRE(scanner.Current() == "2.0e-21");
+  //assert(fabs(strtod(scanner.Current().c_str(), NULL) - 2.0e-21) < 1.0e-30);
+  BOOST_REQUIRE_CLOSE(strtod(scanner.Current().c_str(), NULL),2.0e-21, 1e-5);
+  BOOST_REQUIRE(scanner.Match(")"));
+  BOOST_REQUIRE(scanner.Match("\n"));
+  BOOST_REQUIRE(scanner.Match("abc"));
+}

Modified: mlpack/trunk/src/mlpack/core.h
===================================================================
--- mlpack/trunk/src/mlpack/core.h	2011-10-31 15:54:31 UTC (rev 10090)
+++ mlpack/trunk/src/mlpack/core.h	2011-10-31 16:07:01 UTC (rev 10091)
@@ -93,6 +93,6 @@
 #include <mlpack/core/math/math_misc.hpp>
 #include <mlpack/core/math/range.hpp>
 #include <mlpack/core/utilities/save_restore_utility.hpp>
-#include <mlpack/core/file/textfile.h>
+#include <mlpack/core/file/textfile.hpp>
 
 #endif




More information about the mlpack-svn mailing list