[mlpack-svn] r10091 - in mlpack/trunk/src/mlpack: . core/file
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Oct 31 12:07:01 EDT 2011
Author: jcline3
Date: 2011-10-31 12:07:01 -0400 (Mon, 31 Oct 2011)
New Revision: 10091
Added:
mlpack/trunk/src/mlpack/core/file/textfile.cpp
mlpack/trunk/src/mlpack/core/file/textfile.hpp
mlpack/trunk/src/mlpack/core/file/textfile_test.cpp
Removed:
mlpack/trunk/src/mlpack/core/file/textfile.cc
mlpack/trunk/src/mlpack/core/file/textfile.h
mlpack/trunk/src/mlpack/core/file/textfile_test.cc
Modified:
mlpack/trunk/src/mlpack/core.h
mlpack/trunk/src/mlpack/core/file/CMakeLists.txt
Log:
core/file to hpp & cpp
Modified: mlpack/trunk/src/mlpack/core/file/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/core/file/CMakeLists.txt 2011-10-31 15:54:31 UTC (rev 10090)
+++ mlpack/trunk/src/mlpack/core/file/CMakeLists.txt 2011-10-31 16:07:01 UTC (rev 10091)
@@ -3,8 +3,8 @@
# Define the files that we need to compile.
# Anything not in this list will not be compiled into MLPACK.
set(SOURCES
- textfile.h
- textfile.cc
+ textfile.hpp
+ textfile.cpp
)
# add directory name to sources
@@ -18,7 +18,7 @@
# test executable
add_executable(textfile_test
- textfile_test.cc
+ textfile_test.cpp
)
# dependencies of test executable
target_link_libraries(textfile_test
Deleted: mlpack/trunk/src/mlpack/core/file/textfile.cc
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile.cc 2011-10-31 15:54:31 UTC (rev 10090)
+++ mlpack/trunk/src/mlpack/core/file/textfile.cc 2011-10-31 16:07:01 UTC (rev 10091)
@@ -1,418 +0,0 @@
-/**
- * @file textfile.cc
- *
- * Implementations for the text-based file I/O helper classes.
- *
- * @bug These routines fail when trying to read files linewise that use the Mac
- * eol '\r'. Both Windows and Unix eol ("\r\n" and '\n') work. Use the
- * programs 'dos2unix' or 'tr' to convert the '\r's to '\n's.
- *
- */
-
-#include "textfile.h"
-
-#include "../io/cli.hpp"
-#include "../io/log.hpp"
-#include <ctype.h>
-#include <iostream>
-
-/*
-char *TextTokenizer::ReadLine() {
- char *buf = NULL;
- size_t size = 0;
- size_t len = 0;
- const size_t extra = 64;
- int c;
-
- for (;;) {
- c = getc(f_);
-
- if (c == '\r') {
- c = getc(f_);
- if (c != '\n') {
- ungetc(c, f_);
- }
- break;
- } else if ((c == '\n') {
- break;
- } else if (c == EOF) {
- if (len == 0) {
- return NULL;
- } else {
- break;
- }
- }
-
- len++;
-
- if (size <= len) {
- size = len * 2 + extra;
- buf = mem::Realloc(buf, size);
- }
-
- buf[len-1] = c;
- }
-
- if (len == 0) {
- // special case: empty line
- buf = mem::Alloc<char>(1);
- }
-
- buf[len] = '\0';
-
- return buf;
-}
-*/
-
-void TextLineReader::Error(const char *format, ...) {
- va_list vl;
-
- // TODO: Use a warning propagation system
- fprintf(stderr, ".| %d: %s\nX| `-> ", line_num_, line_.c_str());
-
- va_start(vl, format);
- vfprintf(stderr, format, vl);
- va_end(vl);
-
- fprintf(stderr, "\n");
-}
-
-bool TextLineReader::Open(const char *fname) {
- f_ = fopen(fname, "r");
- fname_ = fname;
- line_num_ = 0;
- has_line_ = false;
-
- if (f_ == NULL) {
- return false;
- } else {
- Gobble();
- return true;
- }
-}
-
-bool TextLineReader::Gobble() {
- char *ptr = ReadLine_();
-
- if (ptr != NULL) {
- line_ = ptr;
- has_line_ = true;
- line_num_++;
- delete[] ptr;
- return true;
- } else {
- has_line_ = false;
- delete[] ptr;
- return false;
- }
-}
-
-char *TextLineReader::ReadLine_() {
- char *buf = NULL;
- size_t size = 1;
- size_t len = 0;
-#ifdef DEBUG
- const size_t extra = 10;
-#else
- const size_t extra = 80;
-#endif
-
- for (;;) {
- size = size * 2 + extra;
-
- // Reallocate
- char* newbuf = new char[size];
- if (buf != NULL) {
- memcpy(newbuf, buf, len * sizeof(char));
- delete[] buf;
- }
- buf = newbuf;
-
- //! doesn't handle mac eol - OK?
- char *result = ::fgets(buf + len, size - len, f_);
- if (len == 0 && result == NULL) {
- delete[] buf;
- return NULL;
- }
- len += strlen(buf + len);
- if (buf[len - 1] == '\r') { // is there a following \n we didn't pick up?
- // we'll peek at the next character and keep it if it's \n, but move the
- // pointer back a position if it isn't
- char tmp = fgetc(f_);
- if(tmp == '\n') { // append to end
- size++;
-
- char* newbuf = new char[size * sizeof(char)];
- memcpy(newbuf, buf, (size - 1) * sizeof(char));
- delete[] buf;
- buf = newbuf;
- buf[len] = tmp;
- } else {
- // go back a character
- fseek(f_, -1, SEEK_CUR);
- }
- }
-
- if (len < size - 1 || buf[len - 1] == '\r' || buf[len - 1] == '\n') {
- while (len && (buf[len-1] == '\r' || buf[len-1] == '\n')) {
- len--;
- }
- buf[len] = '\0';
- return buf;
- }
- }
-}
-
-bool TextTokenizer::Open(const char *fname,
- const char *comment_chars_in, const char *ident_extra_in,
- int features_in) {
- next_ = "";
- cur_ = "";
- next_type_ = END;
- cur_type_ = END;
- comment_start_ = comment_chars_in;
- features_ = features_in;
- ident_extra_ = ident_extra_in;
- line_ = 1;
-
- f_ = fopen(fname, "r");
-
- if (f_ == NULL) {
- return false;
- } else {
- Gobble();
- return true;
- }
-}
-
-char TextTokenizer::NextChar_() {
- int c = GetChar_();
-
- if (c != EOF && (strchr(comment_start_, c) != NULL)) {
- do {
- c = GetChar_();
- } while (c != EOF && c != '\r' && c != '\n');
- }
-
- if (c == EOF) {
- c = 0;
- }
-
- return c;
-}
-
-char TextTokenizer::NextChar_(std::vector<char>& token) {
- char c = NextChar_();
-
- token.push_back(c);
-
- return c;
-}
-
-char TextTokenizer::Skip_(std::vector<char>& token) {
- int c;
-
- while (1) {
- c = NextChar_();
- if (!isspace(c)) {
- break;
- }
-
- if (c == '\r' || c == '\n') {
- if (c == '\r') {
- c = NextChar_();
- if (c != '\n') {
- Unget_(c);
- }
- }
- line_++;
- if ((features_ & WANT_NEWLINE)) {
- c = '\n';
- break;
- }
- }
- }
-
- token.push_back(char(c));
-
- return char(c);
-}
-
-void TextTokenizer::UndoNextChar_(std::vector<char>& token) {
- char c;
- c = token.back();
- token.pop_back();
- if (c != 0) { /* don't put EOF back on the stream */
- Unget_(c);
- }
-}
-
-void Sanitize(const std::string& src, std::string& dest) {
- for (size_t i = 0; i < src.length(); i++) {
- char c = src[i];
-
- if (isgraph(c) || c == ' ' || c == '\t') {
- dest += c;
- } else if (isspace(c)) {
- dest += "<whitespace>";
- } else {
- dest += "<nonprint>";
- }
- }
-}
-
-void TextTokenizer::Error(const char *format, ...) {
- va_list vl;
- std::string cur_sanitized;
- std::string next_sanitized;
-
- Sanitize(cur_, cur_sanitized);
- Sanitize(next_, next_sanitized);
-
- // TODO: Use a warning propagation system
- fprintf(stderr, ".| %d: %s <-HERE-> %s\nX| `-> ", line_,
- cur_sanitized.c_str(), next_sanitized.c_str());
-
- va_start(vl, format);
- vfprintf(stderr, format, vl);
- va_end(vl);
-
- fprintf(stderr, "\n");
-}
-
-void TextTokenizer::Error_(const char *msg, const std::vector<char>& token) {
- next_type_ = INVALID;
-
- printf("size is %zu, token[0] = %d\n", token.size(), token[0]);
- Error("%s", msg);
- next_.clear();
-}
-
-void TextTokenizer::ScanNumber_(char c, std::vector<char>& token) {
- bool dot = false;
- bool floating = false;
-
- while (1) {
- if (c == '.') {
- /* handle a period */
- if (dot) {
- Error_("Multiple decimal points in a float", token);
- return;
- }
- dot = true;
- floating = true;
- } else if (isdigit(c)) {
- /* keep on processing digits */
- } else if (c == 'e' || c == 'E') {
- /* exponent - read exponent and finish */
- c = NextChar_(token);
- if (c == '+' || c == '-') {
- c = NextChar_(token);
- }
- while (isdigit(c)) {
- c = NextChar_(token);
- }
- floating = true;
- break;
- } else {
- /* non numeric */
- break;
- }
-
- c = NextChar_(token);
- }
-
- if (c == 'f' || c == 'F') {
- // It's labelled a float. Gobble and go.
- floating = true;
- } else if (isspace(c) || ispunct(c)) {
- UndoNextChar_(token);
- } else {
- Error_("Invalid character while parsing number", token);
- }
-
- if (floating) {
- next_type_ = DOUBLE;
- } else {
- next_type_ = INTEGER;
- }
-}
-
-void TextTokenizer::ScanString_(char ending, std::vector<char>& token) {
- int c;
-
- while (1) {
- c = NextChar_(token);
-
- if (c == 0) {
- Error_("Unterminated String", token);
- UndoNextChar_(token);
- return;
- }
-
- if (c == ending) {
- next_type_ = STRING;
- return;
- }
- }
-}
-
-void TextTokenizer::Scan_(std::vector<char>& token) {
- char c = Skip_(token);
-
- if (c == 0) {
- // Makes token's capacity = 0
- token.clear();
- next_type_ = END;
- return;
- } else if (c == '.' || isdigit(c)) {
- ScanNumber_(c, token);
- } else if (isident_begin_(c)) {
- while (isident_rest_(NextChar_(token))) {}
- UndoNextChar_(token);
- next_type_ = IDENTIFIER;
- } else if (ispunct(c) || isspace(c)) {
- if (c == '"' || c == '\'') {
- ScanString_(c, token);
- } else if (c == '+' || c == '-') {
- c = NextChar_(token);
- if (c == '.' || isdigit(c)) {
- ScanNumber_(c, token);
- } else {
- UndoNextChar_(token);
- }
- } else {
- next_type_ = PUNCT;
- }
- } else {
- Error_("Unknown Character", token);
- }
-}
-
-void TextTokenizer::Gobble() {
-
- cur_.assign(next_);
- next_.clear();
- cur_type_ = next_type_;
-
- std::vector<char> token;
- Scan_(token);
- token.push_back('\0');
-
- next_ = &token.front();
-
- mlpack::Log::Assert(next_.length() == strlen(next_.c_str()));
-}
-
-bool TextWriter::Printf(const char *format, ...) {
- int rv;
-
- va_list vl;
-
- va_start(vl, format);
- rv = vfprintf(f_, format, vl);
- va_end(vl);
-
- return (rv > 0);
-}
-
Copied: mlpack/trunk/src/mlpack/core/file/textfile.cpp (from rev 10083, mlpack/trunk/src/mlpack/core/file/textfile.cc)
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/core/file/textfile.cpp 2011-10-31 16:07:01 UTC (rev 10091)
@@ -0,0 +1,420 @@
+/**
+ * @file textfile.cpp
+ *
+ * Implementations for the text-based file I/O helper classes.
+ *
+ * @bug These routines fail when trying to read files linewise that use the Mac
+ * eol '\r'. Both Windows and Unix eol ("\r\n" and '\n') work. Use the
+ * programs 'dos2unix' or 'tr' to convert the '\r's to '\n's.
+ *
+ */
+
+#include "textfile.hpp"
+
+#include "../io/cli.hpp"
+#include "../io/log.hpp"
+#include <ctype.h>
+#include <iostream>
+
+using namespace mlpack;
+
+/*
+char *TextTokenizer::ReadLine() {
+ char *buf = NULL;
+ size_t size = 0;
+ size_t len = 0;
+ const size_t extra = 64;
+ int c;
+
+ for (;;) {
+ c = getc(f_);
+
+ if (c == '\r') {
+ c = getc(f_);
+ if (c != '\n') {
+ ungetc(c, f_);
+ }
+ break;
+ } else if ((c == '\n') {
+ break;
+ } else if (c == EOF) {
+ if (len == 0) {
+ return NULL;
+ } else {
+ break;
+ }
+ }
+
+ len++;
+
+ if (size <= len) {
+ size = len * 2 + extra;
+ buf = mem::Realloc(buf, size);
+ }
+
+ buf[len-1] = c;
+ }
+
+ if (len == 0) {
+ // special case: empty line
+ buf = mem::Alloc<char>(1);
+ }
+
+ buf[len] = '\0';
+
+ return buf;
+}
+*/
+
+void TextLineReader::Error(const char *format, ...) {
+ va_list vl;
+
+ // TODO: Use a warning propagation system
+ fprintf(stderr, ".| %d: %s\nX| `-> ", line_num_, line_.c_str());
+
+ va_start(vl, format);
+ vfprintf(stderr, format, vl);
+ va_end(vl);
+
+ fprintf(stderr, "\n");
+}
+
+bool TextLineReader::Open(const char *fname) {
+ f_ = fopen(fname, "r");
+ fname_ = fname;
+ line_num_ = 0;
+ has_line_ = false;
+
+ if (f_ == NULL) {
+ return false;
+ } else {
+ Gobble();
+ return true;
+ }
+}
+
+bool TextLineReader::Gobble() {
+ char *ptr = ReadLine_();
+
+ if (ptr != NULL) {
+ line_ = ptr;
+ has_line_ = true;
+ line_num_++;
+ delete[] ptr;
+ return true;
+ } else {
+ has_line_ = false;
+ delete[] ptr;
+ return false;
+ }
+}
+
+char *TextLineReader::ReadLine_() {
+ char *buf = NULL;
+ size_t size = 1;
+ size_t len = 0;
+#ifdef DEBUG
+ const size_t extra = 10;
+#else
+ const size_t extra = 80;
+#endif
+
+ for (;;) {
+ size = size * 2 + extra;
+
+ // Reallocate
+ char* newbuf = new char[size];
+ if (buf != NULL) {
+ memcpy(newbuf, buf, len * sizeof(char));
+ delete[] buf;
+ }
+ buf = newbuf;
+
+ //! doesn't handle mac eol - OK?
+ char *result = ::fgets(buf + len, size - len, f_);
+ if (len == 0 && result == NULL) {
+ delete[] buf;
+ return NULL;
+ }
+ len += strlen(buf + len);
+ if (buf[len - 1] == '\r') { // is there a following \n we didn't pick up?
+ // we'll peek at the next character and keep it if it's \n, but move the
+ // pointer back a position if it isn't
+ char tmp = fgetc(f_);
+ if(tmp == '\n') { // append to end
+ size++;
+
+ char* newbuf = new char[size * sizeof(char)];
+ memcpy(newbuf, buf, (size - 1) * sizeof(char));
+ delete[] buf;
+ buf = newbuf;
+ buf[len] = tmp;
+ } else {
+ // go back a character
+ fseek(f_, -1, SEEK_CUR);
+ }
+ }
+
+ if (len < size - 1 || buf[len - 1] == '\r' || buf[len - 1] == '\n') {
+ while (len && (buf[len-1] == '\r' || buf[len-1] == '\n')) {
+ len--;
+ }
+ buf[len] = '\0';
+ return buf;
+ }
+ }
+}
+
+bool TextTokenizer::Open(const char *fname,
+ const char *comment_chars_in, const char *ident_extra_in,
+ int features_in) {
+ next_ = "";
+ cur_ = "";
+ next_type_ = END;
+ cur_type_ = END;
+ comment_start_ = comment_chars_in;
+ features_ = features_in;
+ ident_extra_ = ident_extra_in;
+ line_ = 1;
+
+ f_ = fopen(fname, "r");
+
+ if (f_ == NULL) {
+ return false;
+ } else {
+ Gobble();
+ return true;
+ }
+}
+
+char TextTokenizer::NextChar_() {
+ int c = GetChar_();
+
+ if (c != EOF && (strchr(comment_start_, c) != NULL)) {
+ do {
+ c = GetChar_();
+ } while (c != EOF && c != '\r' && c != '\n');
+ }
+
+ if (c == EOF) {
+ c = 0;
+ }
+
+ return c;
+}
+
+char TextTokenizer::NextChar_(std::vector<char>& token) {
+ char c = NextChar_();
+
+ token.push_back(c);
+
+ return c;
+}
+
+char TextTokenizer::Skip_(std::vector<char>& token) {
+ int c;
+
+ while (1) {
+ c = NextChar_();
+ if (!isspace(c)) {
+ break;
+ }
+
+ if (c == '\r' || c == '\n') {
+ if (c == '\r') {
+ c = NextChar_();
+ if (c != '\n') {
+ Unget_(c);
+ }
+ }
+ line_++;
+ if ((features_ & WANT_NEWLINE)) {
+ c = '\n';
+ break;
+ }
+ }
+ }
+
+ token.push_back(char(c));
+
+ return char(c);
+}
+
+void TextTokenizer::UndoNextChar_(std::vector<char>& token) {
+ char c;
+ c = token.back();
+ token.pop_back();
+ if (c != 0) { /* don't put EOF back on the stream */
+ Unget_(c);
+ }
+}
+
+void Sanitize(const std::string& src, std::string& dest) {
+ for (size_t i = 0; i < src.length(); i++) {
+ char c = src[i];
+
+ if (isgraph(c) || c == ' ' || c == '\t') {
+ dest += c;
+ } else if (isspace(c)) {
+ dest += "<whitespace>";
+ } else {
+ dest += "<nonprint>";
+ }
+ }
+}
+
+void TextTokenizer::Error(const char *format, ...) {
+ va_list vl;
+ std::string cur_sanitized;
+ std::string next_sanitized;
+
+ Sanitize(cur_, cur_sanitized);
+ Sanitize(next_, next_sanitized);
+
+ // TODO: Use a warning propagation system
+ fprintf(stderr, ".| %d: %s <-HERE-> %s\nX| `-> ", line_,
+ cur_sanitized.c_str(), next_sanitized.c_str());
+
+ va_start(vl, format);
+ vfprintf(stderr, format, vl);
+ va_end(vl);
+
+ fprintf(stderr, "\n");
+}
+
+void TextTokenizer::Error_(const char *msg, const std::vector<char>& token) {
+ next_type_ = INVALID;
+
+ printf("size is %zu, token[0] = %d\n", token.size(), token[0]);
+ Error("%s", msg);
+ next_.clear();
+}
+
+void TextTokenizer::ScanNumber_(char c, std::vector<char>& token) {
+ bool dot = false;
+ bool floating = false;
+
+ while (1) {
+ if (c == '.') {
+ /* handle a period */
+ if (dot) {
+ Error_("Multiple decimal points in a float", token);
+ return;
+ }
+ dot = true;
+ floating = true;
+ } else if (isdigit(c)) {
+ /* keep on processing digits */
+ } else if (c == 'e' || c == 'E') {
+ /* exponent - read exponent and finish */
+ c = NextChar_(token);
+ if (c == '+' || c == '-') {
+ c = NextChar_(token);
+ }
+ while (isdigit(c)) {
+ c = NextChar_(token);
+ }
+ floating = true;
+ break;
+ } else {
+ /* non numeric */
+ break;
+ }
+
+ c = NextChar_(token);
+ }
+
+ if (c == 'f' || c == 'F') {
+ // It's labelled a float. Gobble and go.
+ floating = true;
+ } else if (isspace(c) || ispunct(c)) {
+ UndoNextChar_(token);
+ } else {
+ Error_("Invalid character while parsing number", token);
+ }
+
+ if (floating) {
+ next_type_ = DOUBLE;
+ } else {
+ next_type_ = INTEGER;
+ }
+}
+
+void TextTokenizer::ScanString_(char ending, std::vector<char>& token) {
+ int c;
+
+ while (1) {
+ c = NextChar_(token);
+
+ if (c == 0) {
+ Error_("Unterminated String", token);
+ UndoNextChar_(token);
+ return;
+ }
+
+ if (c == ending) {
+ next_type_ = STRING;
+ return;
+ }
+ }
+}
+
+void TextTokenizer::Scan_(std::vector<char>& token) {
+ char c = Skip_(token);
+
+ if (c == 0) {
+ // Makes token's capacity = 0
+ token.clear();
+ next_type_ = END;
+ return;
+ } else if (c == '.' || isdigit(c)) {
+ ScanNumber_(c, token);
+ } else if (isident_begin_(c)) {
+ while (isident_rest_(NextChar_(token))) {}
+ UndoNextChar_(token);
+ next_type_ = IDENTIFIER;
+ } else if (ispunct(c) || isspace(c)) {
+ if (c == '"' || c == '\'') {
+ ScanString_(c, token);
+ } else if (c == '+' || c == '-') {
+ c = NextChar_(token);
+ if (c == '.' || isdigit(c)) {
+ ScanNumber_(c, token);
+ } else {
+ UndoNextChar_(token);
+ }
+ } else {
+ next_type_ = PUNCT;
+ }
+ } else {
+ Error_("Unknown Character", token);
+ }
+}
+
+void TextTokenizer::Gobble() {
+
+ cur_.assign(next_);
+ next_.clear();
+ cur_type_ = next_type_;
+
+ std::vector<char> token;
+ Scan_(token);
+ token.push_back('\0');
+
+ next_ = &token.front();
+
+ mlpack::Log::Assert(next_.length() == strlen(next_.c_str()));
+}
+
+bool TextWriter::Printf(const char *format, ...) {
+ int rv;
+
+ va_list vl;
+
+ va_start(vl, format);
+ rv = vfprintf(f_, format, vl);
+ va_end(vl);
+
+ return (rv > 0);
+}
+
Deleted: mlpack/trunk/src/mlpack/core/file/textfile.h
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile.h 2011-10-31 15:54:31 UTC (rev 10090)
+++ mlpack/trunk/src/mlpack/core/file/textfile.h 2011-10-31 16:07:01 UTC (rev 10091)
@@ -1,387 +0,0 @@
-/**
- * @file textfile.h
- *
- * Small wrappers for text files.
- * The most useful thing here is the ReadLine function.
- *
- * @bug These routines fail when trying to read files linewise that use the Mac
- * eol '\r'. Both Windows and Unix eol ("\r\n" and '\n') work. Use the
- * programs 'dos2unix' or 'tr' to convert the '\r's to '\n's.
- *
- */
-
-#ifndef FILE_TEXTFILE_H
-#define FILE_TEXTFILE_H
-
-#include "../io/cli.hpp"
-#include "../io/log.hpp"
-
-#include <cstdio>
-#include <ctype.h>
-#include <stdarg.h>
-
-#include <string>
-#include <vector>
-
-/**
- * Helper for reading text files.
- *
- * Files are closed automatically when they fall out of scope, though
- * you may choose to close it yourself at no harm.
- */
-class TextLineReader {
-
- private:
- FILE *f_;
- std::string line_;
- int line_num_;
- bool has_line_;
- std::string fname_;
-
- public:
- /** Creates an unitialized object. */
- TextLineReader() {
- f_ = NULL;
- }
-
- /**
- * Automatically closes the file.
- */
- ~TextLineReader() {
- if (f_)
- ::fclose(f_);
- }
-
- /**
- * Opens a file.
- *
- * @return success value
- */
- bool Open(const char *fname);
-
- /**
- * Closes the file.
- *
- * (No need for a return value since you are only reading the file.)
- */
- void Close() {
- (void)fclose(f_);
- f_ = NULL;
- }
-
- /**
- * Return the name of the file we are working with.
- * This will return NULL if no file has been opened yet.
- */
- const std::string& filename() const {
- return fname_;
- }
-
- /**
- * Are there more lines left?
- */
- bool MoreLines() {
- return has_line_;
- }
-
- /**
- * Returns the currnet line number.
- */
- int line_num() const {
- return line_num_;
- }
-
- /**
- * Gets the current line.
- */
- std::string& Peek() {
- return line_;
- }
-
- /**
- * Gets the current line.
- */
- const std::string& Peek() const {
- return line_;
- }
-
- /**
- * Tries to read one line from a file.
- *
- * @return true if a line was returned, false if end of file
- */
- bool Gobble();
-
- /**
- * Prints a formatted error message with line number info.
- */
- __attribute__((format(printf, 2, 3))) void Error(const char *msg, ...);
-
- private:
- char *ReadLine_();
-};
-
-/**
- * Simple text tokenizer.
- *
- * This tokenizes the input stream. It will identify the general type of
- * token, and ignore whitespace and different kinds of comments.
- *
- * This has the concept of 'next' token which you can peek at, and the
- * current token. The current token is always initialized to empty at
- * the very beginning, because you are encouraged to peek ahead one token.
- *
- * Example:
- *
- * @code
- * TextTokenizer tokenizer;
- * tokenizer.Open("file.txt");
- *
- * if (tokenizer->Match("count")) {
- * if (tokenizer->Match(TextTokenizer::INTEGER)) {
- * printf("Found number: %d\n", atoi(tokenizer->Current()));
- * } else { Error(); }
- * } else { Error(); }
- * @endcode
- */
-class TextTokenizer {
- public:
- enum TokenType {
- INVALID = -1,
- END,
- PUNCT,
- IDENTIFIER,
- STRING,
- DOUBLE,
- INTEGER
- };
-
- enum Features {
- WANT_NEWLINE = 0x01
- };
-
- private:
- FILE *f_;
- std::string next_;
- TokenType next_type_;
- std::string cur_;
- TokenType cur_type_;
- const char *comment_start_;
- const char *ident_extra_;
- int features_;
- int line_;
-
- public:
- TextTokenizer() {
- f_ = NULL;
- }
- ~TextTokenizer() {
- if (f_ != NULL) {
- (void) fclose(f_);
- }
- f_ = NULL;
- }
-
- bool Open(const char *fname,
- const char *comment_chars = "", const char *ident_extra = "",
- int features = 0);
-
-
- const std::string& Peek() const {
- return next_;
- }
-
- TokenType PeekType() const {
- return next_type_;
- }
-
- const std::string& Current() const {
- return cur_;
- }
-
- TokenType CurrentType() const {
- return cur_type_;
- }
-
- void Gobble();
-
- bool MoreTokens() const {
- return next_type_ != END;
- }
-
- bool Match(const std::string exact) {
- if (next_ == exact) {
- Gobble();
- return true;
- } else {
- return false;
- }
- }
-
- bool MatchInteger() {
- return MatchType(INTEGER);
- }
-
- bool MatchDouble() {
- return MatchType(DOUBLE);
- }
-
- bool MatchNumber() {
- return MatchInteger() || MatchDouble();
- }
-
- bool MatchIdentifier() {
- return MatchType(IDENTIFIER);
- }
-
- bool MatchQuasiIdentifier() {
- return MatchIdentifier() || MatchNumber();
- }
-
- bool MatchString() {
- return MatchType(STRING);
- }
-
- bool MatchPunct() {
- return MatchType(PUNCT);
- }
-
- bool MatchType(TokenType type) {
- if (next_type_ == type) {
- Gobble();
- return true;
- } else {
- return false;
- }
- }
-
- int line() const {
- return line_;
- }
-
- __attribute__((format(printf, 2, 3))) void Error(const char *msg, ...);
-
- private:
- int GetChar_() {
- return ::getc(f_);
- }
-
- void Unget_(int c) {
- ::ungetc(c, f_);
- }
-
- bool IsEOF_() {
- return ::feof(f_);
- }
-
- char Skip_(std::vector<char>& token);
-
- char NextChar_(std::vector<char>& token);
-
- char NextChar_();
-
- void UndoNextChar_(std::vector<char>& token);
-
- void Error_(const char *msg, const std::vector<char>& token);
-
- bool isident_begin_(int c) const {
- return isalpha(c) || (c == '_');
- }
-
- bool isident_rest_(int c) const {
- return isalnum(c) || (c == '_') || (c != 0 && strchr(ident_extra_, c));
- }
-
- void ScanNumber_(char c, std::vector<char>& token);
-
- void ScanString_(char ending, std::vector<char>& token);
-
- void Scan_(std::vector<char>& token);
-};
-
-/**
- * Helper for writing text fo a file.
- */
-class TextWriter {
-
- private:
- FILE *f_;
-
- public:
- /**
- * Creates an uninitialized text writer (you must initialize it).
- */
- TextWriter() {
- f_ = NULL;
- }
-
- /**
- * Automatically closes the file when it gets out of scope; for
- * best error handling, you should call Close first.
- *
- * If you do not explicitly close the file beforehand, this will abort the
- * program on a write error.
- */
- ~TextWriter() {
- if (f_) {
- mlpack::Log::Assert(fclose(f_) >= 0);
- mlpack::Log::Assert(fclose(f_) >= 0, "File close failed!");
- }
- f_ = NULL;
- }
-
- /**
- * Opens a file by name (initializer).
- *
- * @return success or failure
- */
- bool Open(const char *fname) {
- f_ = ::fopen(fname, "w");
- return (!f_) ? false : true;
- }
-
- /**
- * Explicitly closes the file.
- */
- bool Close() {
- int rv = fclose(f_);
- f_ = NULL;
- return (rv < 0) ? false : true;
- }
-
- bool Printf(const char *format, ...);
-
- bool Write(const char *s) {
- return (fputs(s, f_) > 0);
- }
-
- bool Write(int i) {
- return (fprintf(f_, "%d", i) > 0);
- }
-
- bool Write(unsigned int i) {
- return (fprintf(f_, "%u", i) > 0);
- }
-
- bool Write(long i) {
- return (fprintf(f_, "%ld", i) > 0);
- }
-
- bool Write(unsigned long i) {
- return (fprintf(f_, "%lu", i) > 0);
- }
-
- bool Write(long long i) {
- return (fprintf(f_, "%lld", i) > 0);
- }
-
- bool Write(unsigned long long i) {
- return (fprintf(f_, "%llu", i) > 0);
- }
-
- bool Write(double d) {
- return (fprintf(f_, "%.15e", d) > 0);
- }
-};
-
-#endif
Copied: mlpack/trunk/src/mlpack/core/file/textfile.hpp (from rev 10083, mlpack/trunk/src/mlpack/core/file/textfile.h)
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/core/file/textfile.hpp 2011-10-31 16:07:01 UTC (rev 10091)
@@ -0,0 +1,391 @@
+/**
+ * @file textfile.hpp
+ *
+ * Small wrappers for text files.
+ * The most useful thing here is the ReadLine function.
+ *
+ * @bug These routines fail when trying to read files linewise that use the Mac
+ * eol '\r'. Both Windows and Unix eol ("\r\n" and '\n') work. Use the
+ * programs 'dos2unix' or 'tr' to convert the '\r's to '\n's.
+ *
+ */
+
+#ifndef __MLPACK_CORE_FILE_TEXTFILE_HPP
+#define __MLPACK_CORE_FILE_TEXTFILE_HPP
+
+#include "../io/cli.hpp"
+#include "../io/log.hpp"
+
+#include <cstdio>
+#include <ctype.h>
+#include <stdarg.h>
+
+#include <string>
+#include <vector>
+
+namespace mlpack {
+
+/**
+ * Helper for reading text files.
+ *
+ * Files are closed automatically when they fall out of scope, though
+ * you may choose to close it yourself at no harm.
+ */
+class TextLineReader {
+
+ private:
+ FILE *f_;
+ std::string line_;
+ int line_num_;
+ bool has_line_;
+ std::string fname_;
+
+ public:
+ /** Creates an unitialized object. */
+ TextLineReader() {
+ f_ = NULL;
+ }
+
+ /**
+ * Automatically closes the file.
+ */
+ ~TextLineReader() {
+ if (f_)
+ ::fclose(f_);
+ }
+
+ /**
+ * Opens a file.
+ *
+ * @return success value
+ */
+ bool Open(const char *fname);
+
+ /**
+ * Closes the file.
+ *
+ * (No need for a return value since you are only reading the file.)
+ */
+ void Close() {
+ (void)fclose(f_);
+ f_ = NULL;
+ }
+
+ /**
+ * Return the name of the file we are working with.
+ * This will return NULL if no file has been opened yet.
+ */
+ const std::string& filename() const {
+ return fname_;
+ }
+
+ /**
+ * Are there more lines left?
+ */
+ bool MoreLines() {
+ return has_line_;
+ }
+
+ /**
+ * Returns the currnet line number.
+ */
+ int line_num() const {
+ return line_num_;
+ }
+
+ /**
+ * Gets the current line.
+ */
+ std::string& Peek() {
+ return line_;
+ }
+
+ /**
+ * Gets the current line.
+ */
+ const std::string& Peek() const {
+ return line_;
+ }
+
+ /**
+ * Tries to read one line from a file.
+ *
+ * @return true if a line was returned, false if end of file
+ */
+ bool Gobble();
+
+ /**
+ * Prints a formatted error message with line number info.
+ */
+ __attribute__((format(printf, 2, 3))) void Error(const char *msg, ...);
+
+ private:
+ char *ReadLine_();
+};
+
+/**
+ * Simple text tokenizer.
+ *
+ * This tokenizes the input stream. It will identify the general type of
+ * token, and ignore whitespace and different kinds of comments.
+ *
+ * This has the concept of 'next' token which you can peek at, and the
+ * current token. The current token is always initialized to empty at
+ * the very beginning, because you are encouraged to peek ahead one token.
+ *
+ * Example:
+ *
+ * @code
+ * TextTokenizer tokenizer;
+ * tokenizer.Open("file.txt");
+ *
+ * if (tokenizer->Match("count")) {
+ * if (tokenizer->Match(TextTokenizer::INTEGER)) {
+ * printf("Found number: %d\n", atoi(tokenizer->Current()));
+ * } else { Error(); }
+ * } else { Error(); }
+ * @endcode
+ */
+class TextTokenizer {
+ public:
+ enum TokenType {
+ INVALID = -1,
+ END,
+ PUNCT,
+ IDENTIFIER,
+ STRING,
+ DOUBLE,
+ INTEGER
+ };
+
+ enum Features {
+ WANT_NEWLINE = 0x01
+ };
+
+ private:
+ FILE *f_;
+ std::string next_;
+ TokenType next_type_;
+ std::string cur_;
+ TokenType cur_type_;
+ const char *comment_start_;
+ const char *ident_extra_;
+ int features_;
+ int line_;
+
+ public:
+ TextTokenizer() {
+ f_ = NULL;
+ }
+ ~TextTokenizer() {
+ if (f_ != NULL) {
+ (void) fclose(f_);
+ }
+ f_ = NULL;
+ }
+
+ bool Open(const char *fname,
+ const char *comment_chars = "", const char *ident_extra = "",
+ int features = 0);
+
+
+ const std::string& Peek() const {
+ return next_;
+ }
+
+ TokenType PeekType() const {
+ return next_type_;
+ }
+
+ const std::string& Current() const {
+ return cur_;
+ }
+
+ TokenType CurrentType() const {
+ return cur_type_;
+ }
+
+ void Gobble();
+
+ bool MoreTokens() const {
+ return next_type_ != END;
+ }
+
+ bool Match(const std::string exact) {
+ if (next_ == exact) {
+ Gobble();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool MatchInteger() {
+ return MatchType(INTEGER);
+ }
+
+ bool MatchDouble() {
+ return MatchType(DOUBLE);
+ }
+
+ bool MatchNumber() {
+ return MatchInteger() || MatchDouble();
+ }
+
+ bool MatchIdentifier() {
+ return MatchType(IDENTIFIER);
+ }
+
+ bool MatchQuasiIdentifier() {
+ return MatchIdentifier() || MatchNumber();
+ }
+
+ bool MatchString() {
+ return MatchType(STRING);
+ }
+
+ bool MatchPunct() {
+ return MatchType(PUNCT);
+ }
+
+ bool MatchType(TokenType type) {
+ if (next_type_ == type) {
+ Gobble();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ int line() const {
+ return line_;
+ }
+
+ __attribute__((format(printf, 2, 3))) void Error(const char *msg, ...);
+
+ private:
+ int GetChar_() {
+ return ::getc(f_);
+ }
+
+ void Unget_(int c) {
+ ::ungetc(c, f_);
+ }
+
+ bool IsEOF_() {
+ return ::feof(f_);
+ }
+
+ char Skip_(std::vector<char>& token);
+
+ char NextChar_(std::vector<char>& token);
+
+ char NextChar_();
+
+ void UndoNextChar_(std::vector<char>& token);
+
+ void Error_(const char *msg, const std::vector<char>& token);
+
+ bool isident_begin_(int c) const {
+ return isalpha(c) || (c == '_');
+ }
+
+ bool isident_rest_(int c) const {
+ return isalnum(c) || (c == '_') || (c != 0 && strchr(ident_extra_, c));
+ }
+
+ void ScanNumber_(char c, std::vector<char>& token);
+
+ void ScanString_(char ending, std::vector<char>& token);
+
+ void Scan_(std::vector<char>& token);
+};
+
+/**
+ * Helper for writing text fo a file.
+ */
+class TextWriter {
+
+ private:
+ FILE *f_;
+
+ public:
+ /**
+ * Creates an uninitialized text writer (you must initialize it).
+ */
+ TextWriter() {
+ f_ = NULL;
+ }
+
+ /**
+ * Automatically closes the file when it gets out of scope; for
+ * best error handling, you should call Close first.
+ *
+ * If you do not explicitly close the file beforehand, this will abort the
+ * program on a write error.
+ */
+ ~TextWriter() {
+ if (f_) {
+ mlpack::Log::Assert(fclose(f_) >= 0);
+ mlpack::Log::Assert(fclose(f_) >= 0, "File close failed!");
+ }
+ f_ = NULL;
+ }
+
+ /**
+ * Opens a file by name (initializer).
+ *
+ * @return success or failure
+ */
+ bool Open(const char *fname) {
+ f_ = ::fopen(fname, "w");
+ return (!f_) ? false : true;
+ }
+
+ /**
+ * Explicitly closes the file.
+ */
+ bool Close() {
+ int rv = fclose(f_);
+ f_ = NULL;
+ return (rv < 0) ? false : true;
+ }
+
+ bool Printf(const char *format, ...);
+
+ bool Write(const char *s) {
+ return (fputs(s, f_) > 0);
+ }
+
+ bool Write(int i) {
+ return (fprintf(f_, "%d", i) > 0);
+ }
+
+ bool Write(unsigned int i) {
+ return (fprintf(f_, "%u", i) > 0);
+ }
+
+ bool Write(long i) {
+ return (fprintf(f_, "%ld", i) > 0);
+ }
+
+ bool Write(unsigned long i) {
+ return (fprintf(f_, "%lu", i) > 0);
+ }
+
+ bool Write(long long i) {
+ return (fprintf(f_, "%lld", i) > 0);
+ }
+
+ bool Write(unsigned long long i) {
+ return (fprintf(f_, "%llu", i) > 0);
+ }
+
+ bool Write(double d) {
+ return (fprintf(f_, "%.15e", d) > 0);
+ }
+};
+
+}; // namespace mlpack
+
+#endif // __MLPACK_CORE_FILE_TEXTFILE_HPP
Deleted: mlpack/trunk/src/mlpack/core/file/textfile_test.cc
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile_test.cc 2011-10-31 15:54:31 UTC (rev 10090)
+++ mlpack/trunk/src/mlpack/core/file/textfile_test.cc 2011-10-31 16:07:01 UTC (rev 10091)
@@ -1,58 +0,0 @@
-#include "textfile.h"
-#include <math.h>
-
-#define BOOST_TEST_MODULE TextFileTest
-#include <boost/test/unit_test.hpp>
-
-/*void Test1() {
- TextTokenizer scanner;
- const char *input = xrun_param_str("input");
-
- scanner.Open(input, "#", "", TextTokenizer::WANT_NEWLINE);
-
- while (scanner.PeekType() != TextTokenizer::END) {
- fprintf(stderr, "Got: %d, [%s]\n", scanner.PeekType(), scanner.Peek().c_str());
- scanner.Gobble();
- }
-}*/
-
-BOOST_AUTO_TEST_CASE(Test2) {
- const char *fname = "tmpfile.txt";
- TextWriter writer;
-
- writer.Open(fname);
- writer.Printf("@begin(1, 1.0, 1.0e-31, abc-123, \"123\", '123') # comment here\r");
- writer.Printf("@end(2.0e-21)\r\nabc");
- writer.Close();
-
- TextTokenizer scanner;
- scanner.Open(fname, "#", "-", TextTokenizer::WANT_NEWLINE);
-
- BOOST_REQUIRE(scanner.MatchPunct());
- BOOST_REQUIRE(scanner.MatchIdentifier());
- BOOST_REQUIRE(scanner.MatchPunct());
- BOOST_REQUIRE(scanner.MatchInteger());
- BOOST_REQUIRE(scanner.Match(","));
- BOOST_REQUIRE(scanner.MatchDouble());
- BOOST_REQUIRE(scanner.Match(","));
- BOOST_REQUIRE(scanner.MatchDouble());
- BOOST_REQUIRE(scanner.Match(","));
- BOOST_REQUIRE(scanner.MatchType(TextTokenizer::IDENTIFIER));
- BOOST_REQUIRE(scanner.Match(","));
- BOOST_REQUIRE(scanner.MatchString());
- BOOST_REQUIRE(scanner.Match(","));
- BOOST_REQUIRE(scanner.MatchString());
- BOOST_REQUIRE(scanner.MatchPunct());
- BOOST_REQUIRE(scanner.Match("\n"));
- BOOST_REQUIRE(scanner.Match("@"));
- BOOST_REQUIRE(scanner.Match("end"));
- BOOST_REQUIRE(scanner.Match("("));
- BOOST_REQUIRE(scanner.Match("2.0e-21"));
- //assert(scanner.Current() == "2.0e-21");
- BOOST_REQUIRE(scanner.Current() == "2.0e-21");
- //assert(fabs(strtod(scanner.Current().c_str(), NULL) - 2.0e-21) < 1.0e-30);
- BOOST_REQUIRE_CLOSE(strtod(scanner.Current().c_str(), NULL),2.0e-21, 1e-5);
- BOOST_REQUIRE(scanner.Match(")"));
- BOOST_REQUIRE(scanner.Match("\n"));
- BOOST_REQUIRE(scanner.Match("abc"));
-}
Copied: mlpack/trunk/src/mlpack/core/file/textfile_test.cpp (from rev 10083, mlpack/trunk/src/mlpack/core/file/textfile_test.cc)
===================================================================
--- mlpack/trunk/src/mlpack/core/file/textfile_test.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/core/file/textfile_test.cpp 2011-10-31 16:07:01 UTC (rev 10091)
@@ -0,0 +1,59 @@
+#include "textfile.hpp"
+#include <math.h>
+
+#define BOOST_TEST_MODULE TextFileTest
+#include <boost/test/unit_test.hpp>
+
+/*void Test1() {
+ TextTokenizer scanner;
+ const char *input = xrun_param_str("input");
+
+ scanner.Open(input, "#", "", TextTokenizer::WANT_NEWLINE);
+
+ while (scanner.PeekType() != TextTokenizer::END) {
+ fprintf(stderr, "Got: %d, [%s]\n", scanner.PeekType(), scanner.Peek().c_str());
+ scanner.Gobble();
+ }
+}*/
+
+BOOST_AUTO_TEST_CASE(Test2) {
+ using namespace mlpack;
+ const char *fname = "tmpfile.txt";
+ TextWriter writer;
+
+ writer.Open(fname);
+ writer.Printf("@begin(1, 1.0, 1.0e-31, abc-123, \"123\", '123') # comment here\r");
+ writer.Printf("@end(2.0e-21)\r\nabc");
+ writer.Close();
+
+ TextTokenizer scanner;
+ scanner.Open(fname, "#", "-", TextTokenizer::WANT_NEWLINE);
+
+ BOOST_REQUIRE(scanner.MatchPunct());
+ BOOST_REQUIRE(scanner.MatchIdentifier());
+ BOOST_REQUIRE(scanner.MatchPunct());
+ BOOST_REQUIRE(scanner.MatchInteger());
+ BOOST_REQUIRE(scanner.Match(","));
+ BOOST_REQUIRE(scanner.MatchDouble());
+ BOOST_REQUIRE(scanner.Match(","));
+ BOOST_REQUIRE(scanner.MatchDouble());
+ BOOST_REQUIRE(scanner.Match(","));
+ BOOST_REQUIRE(scanner.MatchType(TextTokenizer::IDENTIFIER));
+ BOOST_REQUIRE(scanner.Match(","));
+ BOOST_REQUIRE(scanner.MatchString());
+ BOOST_REQUIRE(scanner.Match(","));
+ BOOST_REQUIRE(scanner.MatchString());
+ BOOST_REQUIRE(scanner.MatchPunct());
+ BOOST_REQUIRE(scanner.Match("\n"));
+ BOOST_REQUIRE(scanner.Match("@"));
+ BOOST_REQUIRE(scanner.Match("end"));
+ BOOST_REQUIRE(scanner.Match("("));
+ BOOST_REQUIRE(scanner.Match("2.0e-21"));
+ //assert(scanner.Current() == "2.0e-21");
+ BOOST_REQUIRE(scanner.Current() == "2.0e-21");
+ //assert(fabs(strtod(scanner.Current().c_str(), NULL) - 2.0e-21) < 1.0e-30);
+ BOOST_REQUIRE_CLOSE(strtod(scanner.Current().c_str(), NULL),2.0e-21, 1e-5);
+ BOOST_REQUIRE(scanner.Match(")"));
+ BOOST_REQUIRE(scanner.Match("\n"));
+ BOOST_REQUIRE(scanner.Match("abc"));
+}
Modified: mlpack/trunk/src/mlpack/core.h
===================================================================
--- mlpack/trunk/src/mlpack/core.h 2011-10-31 15:54:31 UTC (rev 10090)
+++ mlpack/trunk/src/mlpack/core.h 2011-10-31 16:07:01 UTC (rev 10091)
@@ -93,6 +93,6 @@
#include <mlpack/core/math/math_misc.hpp>
#include <mlpack/core/math/range.hpp>
#include <mlpack/core/utilities/save_restore_utility.hpp>
-#include <mlpack/core/file/textfile.h>
+#include <mlpack/core/file/textfile.hpp>
#endif
More information about the mlpack-svn
mailing list