[mlpack-svn] r13231 - mlpack/trunk/src/mlpack/tests

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Sun Jul 15 20:58:00 EDT 2012


Author: rcurtin
Date: 2012-07-15 20:57:59 -0400 (Sun, 15 Jul 2012)
New Revision: 13231

Modified:
   mlpack/trunk/src/mlpack/tests/kernel_test.cpp
Log:
Add tests for PSpectrumStringKernel.


Modified: mlpack/trunk/src/mlpack/tests/kernel_test.cpp
===================================================================
--- mlpack/trunk/src/mlpack/tests/kernel_test.cpp	2012-07-16 00:57:43 UTC (rev 13230)
+++ mlpack/trunk/src/mlpack/tests/kernel_test.cpp	2012-07-16 00:57:59 UTC (rev 13231)
@@ -14,6 +14,7 @@
 #include <mlpack/core/kernels/linear_kernel.hpp>
 #include <mlpack/core/kernels/polynomial_kernel.hpp>
 #include <mlpack/core/kernels/spherical_kernel.hpp>
+#include <mlpack/core/kernels/pspectrum_string_kernel.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 #include <mlpack/core/metrics/mahalanobis_distance.hpp>
 
@@ -344,4 +345,218 @@
   BOOST_REQUIRE_CLOSE(lk.Evaluate(b, a), 0.243116734, 5e-5);
 }
 
+// Ensure that the p-spectrum kernel successfully extracts all length-p
+// substrings from the data.
+BOOST_AUTO_TEST_CASE(PSpectrumSubstringExtractionTest)
+{
+  std::vector<std::vector<std::string> > datasets;
+
+  datasets.push_back(std::vector<std::string>());
+
+  datasets[0].push_back("herpgle");
+  datasets[0].push_back("herpagkle");
+  datasets[0].push_back("klunktor");
+  datasets[0].push_back("flibbynopple");
+
+  datasets.push_back(std::vector<std::string>());
+
+  datasets[1].push_back("floggy3245");
+  datasets[1].push_back("flippydopflip");
+  datasets[1].push_back("stupid fricking cat");
+  datasets[1].push_back("food time isn't until later");
+  datasets[1].push_back("leave me alone until 6:00");
+  datasets[1].push_back("only after that do you get any food.");
+  datasets[1].push_back("obloblobloblobloblobloblob");
+
+  PSpectrumStringKernel p(datasets, 3);
+
+  // Ensure the sizes are correct.
+  BOOST_REQUIRE_EQUAL(p.Counts().size(), 2);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0].size(), 4);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1].size(), 7);
+
+  // herpgle: her, erp, rpg, pgl, gle
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][0].size(), 5);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][0]["her"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][0]["erp"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][0]["rpg"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][0]["pgl"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][0]["gle"], 1);
+
+  // herpagkle: her, erp, rpa, pag, agk, gkl, kle
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][1].size(), 7);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][1]["her"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][1]["erp"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][1]["rpa"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][1]["pag"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][1]["agk"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][1]["gkl"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][1]["kle"], 1);
+
+  // klunktor: klu, lun, unk, nkt, kto, tor
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][2].size(), 6);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][2]["klu"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][2]["lun"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][2]["unk"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][2]["nkt"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][2]["kto"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][2]["tor"], 1);
+
+  // flibbynopple: fli lib ibb bby byn yno nop opp ppl ple
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3].size(), 10);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3]["fli"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3]["lib"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3]["ibb"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3]["bby"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3]["byn"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3]["yno"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3]["nop"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3]["opp"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3]["ppl"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[0][3]["ple"], 1);
+
+  // floggy3245: flo log ogg ggy gy3 y32 324 245
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][0].size(), 8);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][0]["flo"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][0]["log"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][0]["ogg"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][0]["ggy"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][0]["gy3"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][0]["y32"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][0]["324"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][0]["245"], 1);
+
+  // flippydopflip: fli lip ipp ppy pyd ydo dop opf pfl fli lip
+  // fli(2) lip(2) ipp ppy pyd ydo dop opf pfl
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][1].size(), 9);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][1]["fli"], 2);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][1]["lip"], 2);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][1]["ipp"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][1]["ppy"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][1]["pyd"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][1]["ydo"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][1]["dop"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][1]["opf"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][1]["pfl"], 1);
+
+  // stupid fricking cat: stu tup upi pid fri ric ick cki kin ing cat
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2].size(), 11);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["stu"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["tup"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["upi"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["pid"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["fri"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["ric"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["ick"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["cki"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["kin"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["ing"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][2]["cat"], 1);
+
+  // food time isn't until later: foo ood tim ime isn unt nti til lat ate ter
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3].size(), 11);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["foo"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["ood"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["tim"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["ime"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["isn"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["unt"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["nti"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["til"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["lat"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["ate"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][3]["ter"], 1);
+
+  // leave me alone until 6:00: lea eav ave alo lon one unt nti til
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][4].size(), 9);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][4]["lea"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][4]["eav"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][4]["ave"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][4]["alo"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][4]["lon"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][4]["one"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][4]["unt"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][4]["nti"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][4]["til"], 1);
+
+  // only after that do you get any food.:
+  // onl nly aft fte ter tha hat you get any foo ood
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5].size(), 12);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["onl"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["nly"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["aft"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["fte"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["ter"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["tha"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["hat"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["you"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["get"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["any"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["foo"], 1);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][5]["ood"], 1);
+
+  // obloblobloblobloblobloblob: obl(8) blo(8) lob(8)
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][6].size(), 3);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][6]["obl"], 8);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][6]["blo"], 8);
+  BOOST_REQUIRE_EQUAL(p.Counts()[1][6]["lob"], 8);
+}
+
+BOOST_AUTO_TEST_CASE(PSpectrumStringEvaluateTest)
+{
+  // Construct simple dataset.
+  std::vector<std::vector<std::string> > dataset;
+  dataset.push_back(std::vector<std::string>());
+  dataset[0].push_back("hello");
+  dataset[0].push_back("jello");
+  dataset[0].push_back("mellow");
+  dataset[0].push_back("mellow jello");
+
+  PSpectrumStringKernel p(dataset, 3);
+
+  arma::vec a("0 0");
+  arma::vec b("0 0");
+
+  BOOST_REQUIRE_CLOSE(p.Evaluate(a, b), 3.0, 1e-5);
+  BOOST_REQUIRE_CLOSE(p.Evaluate(b, a), 3.0, 1e-5);
+
+  b = "0 1";
+  BOOST_REQUIRE_CLOSE(p.Evaluate(a, b), 2.0, 1e-5);
+  BOOST_REQUIRE_CLOSE(p.Evaluate(b, a), 2.0, 1e-5);
+
+  b = "0 2";
+  BOOST_REQUIRE_CLOSE(p.Evaluate(a, b), 2.0, 1e-5);
+  BOOST_REQUIRE_CLOSE(p.Evaluate(b, a), 2.0, 1e-5);
+
+  b = "0 3";
+  BOOST_REQUIRE_CLOSE(p.Evaluate(a, b), 4.0, 1e-5);
+  BOOST_REQUIRE_CLOSE(p.Evaluate(b, a), 4.0, 1e-5);
+
+  a = "0 1";
+  b = "0 1";
+  BOOST_REQUIRE_CLOSE(p.Evaluate(a, b), 3.0, 1e-5);
+  BOOST_REQUIRE_CLOSE(p.Evaluate(b, a), 3.0, 1e-5);
+
+  b = "0 2";
+  BOOST_REQUIRE_CLOSE(p.Evaluate(a, b), 2.0, 1e-5);
+  BOOST_REQUIRE_CLOSE(p.Evaluate(b, a), 2.0, 1e-5);
+
+  b = "0 3";
+  BOOST_REQUIRE_CLOSE(p.Evaluate(a, b), 5.0, 1e-5);
+  BOOST_REQUIRE_CLOSE(p.Evaluate(b, a), 5.0, 1e-5);
+
+  a = "0 2";
+  b = "0 2";
+  BOOST_REQUIRE_CLOSE(p.Evaluate(a, b), 4.0, 1e-5);
+  BOOST_REQUIRE_CLOSE(p.Evaluate(b, a), 4.0, 1e-5);
+
+  b = "0 3";
+  BOOST_REQUIRE_CLOSE(p.Evaluate(a, b), 6.0, 1e-5);
+  BOOST_REQUIRE_CLOSE(p.Evaluate(b, a), 6.0, 1e-5);
+
+  a = "0 3";
+  BOOST_REQUIRE_CLOSE(p.Evaluate(a, b), 11.0, 1e-5);
+  BOOST_REQUIRE_CLOSE(p.Evaluate(b, a), 11.0, 1e-5);
+}
+
 BOOST_AUTO_TEST_SUITE_END();




More information about the mlpack-svn mailing list