| Message
| Now for a proper test ...
First I make your Trie class have a Lua interface, as follows (added this to the bottom of the .cpp file):
//----------------------- begin Lua stuff ----------------------------
const char trie_handle[] = "trie_handle";
// make string table item
static void MakeTableItem (lua_State *L, const char * name, const string & str)
{
lua_pushstring (L, name);
lua_pushstring (L, str.c_str ());
lua_rawset(L, -3);
}
// make number table item
static void MakeTableItem (lua_State *L, const char * name, const int n)
{
lua_pushstring (L, name);
lua_pushnumber (L, n);
lua_rawset(L, -3);
}
static LexicalTrie * Ltrie_gettrie (lua_State *L)
{
LexicalTrie **ud = (LexicalTrie **) luaL_checkudata (L, 1, trie_handle);
luaL_argcheck(L, *ud != NULL, 1, "trie userdata expected");
return *ud;
}
static int Ltrie_add(lua_State *L)
{
LexicalTrie *pTrie = Ltrie_gettrie (L);
string word (luaL_checkstring (L, 2));
pTrie->addWord (word);
return 0;
} // end of Ltrie_add
static int Ltrie_remove(lua_State *L)
{
LexicalTrie *pTrie = Ltrie_gettrie (L);
string word (luaL_checkstring (L, 2));
pTrie->removeWord (word);
return 0;
} // end of Ltrie_remove
static int Ltrie_contains(lua_State *L)
{
LexicalTrie *pTrie = Ltrie_gettrie (L);
string word (luaL_checkstring (L, 2));
lua_pushboolean (L, pTrie->containsWord (word));
return 1;
} // end of Ltrie_contains
static int Ltrie_prefix(lua_State *L)
{
LexicalTrie *pTrie = Ltrie_gettrie (L);
string word (luaL_checkstring (L, 2));
lua_pushboolean (L, pTrie->containsPrefix (word));
return 1;
} // end of Ltrie_prefix
static int Ltrie_save(lua_State *L)
{
LexicalTrie *pTrie = Ltrie_gettrie (L);
std::set<std::string> out;
pTrie->writeToSet (out);
lua_createtable (L, out.size (), 0);
int i = 1;
// copy set into table
for (std::set<std::string>::const_iterator it = out.begin ();
it != out.end (); it++)
{
lua_pushstring (L, it->c_str ());
lua_rawseti (L, -2, i++);
}
return 1; // the table
} // end of Ltrie_save
static int Ltrie_load(lua_State *L)
{
LexicalTrie *pTrie = Ltrie_gettrie (L);
luaL_checktype (L, 2, LUA_TTABLE);
lua_pushnil (L); // first key
while (lua_next (L, 2) != 0)
{
string word (luaL_checkstring (L, -1));
pTrie->addWord (word);
lua_pop (L, 1); // remove word, leave key
}
return 0;
} // end of Ltrie_load
static int Ltrie_count(lua_State *L)
{
LexicalTrie *pTrie = Ltrie_gettrie (L);
lua_pushinteger (L, pTrie->numWords ());
return 1; // the count
} // end of Ltrie_count
static int Ltrie_match(lua_State *L)
{
LexicalTrie *pTrie = Ltrie_gettrie (L);
string regexp = (luaL_checkstring (L, 2));
std::set<std::string> results;
pTrie->matchRegExp (regexp, results);
lua_createtable (L, results.size (), 0);
int i = 1;
// copy set into table
for (std::set<std::string>::const_iterator it = results.begin ();
it != results.end (); it++)
{
lua_pushstring (L, it->c_str ());
lua_rawseti (L, -2, i++);
}
return 1; // the table
} // end of Ltrie_match
static int Ltrie_corrections(lua_State *L)
{
LexicalTrie *pTrie = Ltrie_gettrie (L);
string regexp = (luaL_checkstring (L, 2));
int distance = (luaL_checkinteger (L, 3));
std::set<LexicalTrie::Correction> results;
pTrie->suggestCorrections (regexp, distance, results);
lua_createtable (L, results.size (), 0);
int i = 1;
// copy set into table
for (std::set<LexicalTrie::Correction>::const_iterator it = results.begin ();
it != results.end (); it++)
{
lua_createtable (L, 0, 2);
MakeTableItem (L, "word", it->suggestedWord_);
MakeTableItem (L, "distance", it->editDistance_);
lua_rawseti (L, -2, i++);
}
return 1; // the table
} // end of Ltrie_corrections
// done with the trie, delete it
static int Ltrie_gc (lua_State *L) {
LexicalTrie *pTrie = Ltrie_gettrie (L);
delete pTrie;
// set userdata to NULL, so we don't try to use it now
LexicalTrie **ud = (LexicalTrie **) luaL_checkudata (L, 1, trie_handle);
*ud = NULL;
return 0;
} // end of Ltrie_gc
// tostring helper
static int Ltrie_tostring (lua_State *L)
{
LexicalTrie *pTrie = Ltrie_gettrie (L);
lua_pushstring(L, "trie");
return 1;
} // end of Ltrie_tostring
//----------------------- create a new trie object ----------------------------
static int Ltrie_new(lua_State *L)
{
LexicalTrie *pTrie = new LexicalTrie (luaL_optnumber (L, 1, 1)); // case-sensitive flag
LexicalTrie **ud = (LexicalTrie **)lua_newuserdata(L, sizeof (LexicalTrie *));
luaL_getmetatable(L, trie_handle);
lua_setmetatable(L, -2);
*ud = pTrie; // store pointer to this trie in the userdata
return 1;
} // end of Ltrie_new
static const luaL_reg triemeta[] = {
{"add", Ltrie_add}, // add one word
{"remove", Ltrie_remove}, // remove one word
{"contains", Ltrie_contains}, // is word there?
{"prefix", Ltrie_prefix}, // is prefix there?
{"save", Ltrie_save}, // save trie to table
{"load", Ltrie_load}, // load trie from table
{"count", Ltrie_count}, // number of words in the trie
{"match", Ltrie_match}, // match regular expression
{"corrections",Ltrie_corrections}, // return table of corrections
{"__gc", Ltrie_gc},
{"__tostring", Ltrie_tostring},
{NULL, NULL}
};
/* Open the library */
static const luaL_reg trielib[] = {
{"new", Ltrie_new},
{NULL, NULL}
};
static void createmeta(lua_State *L, const char *name)
{
luaL_newmetatable(L, name); /* create new metatable */
lua_pushliteral(L, "__index");
lua_pushvalue(L, -2); /* push metatable */
lua_rawset(L, -3); /* metatable.__index = metatable */
}
LUALIB_API int luaopen_trie(lua_State *L)
{
createmeta(L, trie_handle);
luaL_register (L, NULL, triemeta);
lua_pop(L, 1);
luaL_register (L, "trie", trielib);
return 1;
}
I also added a couple of extra methods, because when outputting the trie I wanted to put it into a table, not a ostream. These are the new methods:
void LexicalTrie::writeToSet(std::set<std::string> & out)
{
root_->writeToSet(out);
}
void LexicalTrie::TrieNode::writeToSet(std::set<std::string> & output, const std::string & soFar)
{
if ( isWord_ )
output.insert(soFar);
for ( std::vector<LetterTriePair>::iterator it = letters_.begin();
it != letters_.end(); it++ )
{
it->trie_->writeToSet(output, soFar + it->letter_);
}
}
These were very similar to your writeToStream methods.
The code above illustrates how you can interface a C++ class to Lua fairly simply.
To use it, you make a new instance of the trie object, like this:
Then you use that new object, and call its methods (implemented by a metatable, and using __index to call the functions in the metatable):
tr:add ("nick")
tr:add ("gammon")
tr:load {"all", "good", "boys", "deserve", "fruit" }
print ("is nick there", tr:contains ("nick")) --> true
print ("is andrew there", tr:contains ("andrew")) --> false
require "tprint"
tprint (tr:save ())
The code above illustrates adding to the trie, loading a table, testing for membership, and saving back as a table.
I'm not quite sure how the "corrections" part is supposed to work. Given the above entries, which save as:
1="all"
2="boys"
3="deserve"
4="fruit"
5="gammon"
6="good"
7="nick"
If I enter:
tprint (tr:corrections("go", 3))
I get:
1:
"distance"=2
"word"="good"
2:
"distance"=3
"word"="all"
3:
"distance"=3
"word"="boys"
4:
"distance"=3
"word"="good"
The word "good" appears twice, I'm not quite sure why.
Speed and memory usage
The other question is, does this save time or space over the straight Lua tables? I'm not sure that it does.
In my implementation of the spell checker test code, I wanted to be able to lookup by metaphone code, so I made a trie per metaphone - that is, 9,251 tries.
The time taken to load the dictionary was about double, compared to straight Lua tables, and the memory usage was over double as well.
This doesn't totally surprise me, because for each letter stored in a trie, you need an instance of TrieNode, namely 16 bytes. Admittedly the trie structure means you don't need a node for each letter in the dictionary, but for short runs of words, the space saved by the trie is probably lost by the extra space for the node structures. |
- Nick Gammon
www.gammon.com.au, www.mushclient.com | | Top |
|