Performance improvements & refactoring CLI. * Closes #19.
@@ -15,7 +15,7 @@ var
file*, address*, version*, appname*: string port*: int operation = opRun - directory = "" + directory:string = nil readonly = false logLevelLabel = "INFO" #logLevel = lvlInfo@@ -58,24 +58,46 @@ usage* = appname & " v" & version & " - Lightweight REST Document Store" & """
(c) 2015 Fabio Cevasco Usage: - LS [-p:<port> -a:<address>] [<file>] [--import:<directory> | --export:<directory> | --delete:<directory>] + LS [command] [option1 option2 ...] + + Commands: + run Starts LiteStore server. + delete Delete a previously-imported specified directory (requires -d). + import Import the specified directory into the datastore (requires -d). + export Export the previously-imported specified directory to the current directory (requires -d). + optimize Optimize search indexes. + vacuum Vacuum datastore. Options: - -a, --address Specify address (default: 127.0.0.1). - -d, --delete Delete the previously-imported specified directory. - --export Export the previously-imported specified directory to the current directory. + -a, --address Specify server address (default: 127.0.0.1). + -d, --directory Specify a directory to import, export, delete, or mount. -h, --help Display this message. - --import Import the specified directory (Store all its contents). -l, --log Specify the log level: debug, info, warn, error, fatal, none (default: info) - -p, --port Specify port number (default: 9500). + -m, --mount Mirror database changes to the specified directory on the filesystem. + -p, --port Specify server port number (default: 9500). -r, --readonly Allow only data retrieval operations. - -m, --mount Run server and mirror database changes to the specified directory on the filesystem. - --reset If --mount is specified, resets (deletes) any previously imported directory data. + -s, --store Specify a datastore file (default: data.db) -v, --version Display the program version. """ for kind, key, val in getOpt(): case kind: + of cmdArgument: + case key: + of "run": + operation = opRun + of "import": + operation = opImport + of "export": + operation = opExport + of "delete": + operation = opDelete + of "optimize": + operation = opOptimize + of "vacuum": + operation = opVacuum + else: + discard of cmdLongOption, cmdShortOption: case key: of "address", "a":@@ -86,6 +108,8 @@ of "port", "p":
if val == "": fail(101, "Port not specified.") port = val.parseInt + of "store", "s": + file = val of "log", "l": if val == "": fail(102, "Log level not specified.")@@ -95,27 +119,12 @@ #logLevelLabel = val.toUpper
#logLevel = logging.LevelNames.find(logLevelLabel).Level except: fail(103, "Invalid log level '$1'" % val) - of "import": + of "directory", "d": if val == "": - fail(104, "Directory to import not specified.") - operation = opImport + fail(104, "Directory not specified.") directory = val of "mount", "m": - if val == "": - fail(104, "Directory to mount not specified.") - operation = opRun - directory = val mount = true - of "reset": - reset = true - of "export": - if val == "": - fail(105, "Directory to export not specified.") - operation = opExport - directory = val - of "delete", "d": - operation = opDelete - directory = val of "version", "v": echo version quit(0)@@ -126,10 +135,13 @@ of "readonly", "r":
readonly = true else: discard - of cmdArgument: - file = key else: discard + +# Validation + +if directory == nil and (operation in [opDelete, opImport, opExport] or mount): + fail(105, "Directory option not specified.") var LS* {.threadvar.}: LiteStore
@@ -21,14 +21,30 @@ # Manage Datastores
var LS_TRANSACTION = false +proc createIndexes(db: TDbConn) = + db.exec SQL_CREATE_INDEX_DOCUMENTS_DOCID + db.exec SQL_CREATE_INDEX_DOCUMENTS_ID + db.exec SQL_CREATE_INDEX_TAGS_TAG_ID + db.exec SQL_CREATE_INDEX_TAGS_DOCUMENT_ID + +proc dropIndexes(db: TDbConn) = + db.exec SQL_DROP_INDEX_DOCUMENTS_DOCID + db.exec SQL_DROP_INDEX_DOCUMENTS_ID + db.exec SQL_DROP_INDEX_TAGS_TAG_ID + db.exec SQL_DROP_INDEX_TAGS_DOCUMENT_ID + proc createDatastore*(file:string) = if file.fileExists(): raise newException(EDatastoreExists, "Datastore '$1' already exists." % file) - let store = db.open(file, "", "", "") - store.exec(SQL_CREATE_DOCUMENTS_TABLE) - store.exec(SQL_CREATE_DOCID_INDEX) - store.exec(SQL_CREATE_SEARCHCONTENTS_TABLE) - store.exec(SQL_CREATE_TAGS_TABLE) + debug("Creating datastore '$1'", file) + let data = db.open(file, "", "", "") + debug("Creating tables") + data.exec(SQL_CREATE_DOCUMENTS_TABLE) + data.exec(SQL_CREATE_SEARCHDATA_TABLE) + data.exec(SQL_CREATE_TAGS_TABLE) + debug("Creating indexes") + data.createIndexes() + debug("Database created") proc closeDatastore*(store:Datastore) = try:@@ -49,8 +65,14 @@ if not file.fileExists:
raise newException(EDatastoreDoesNotExist, "Datastore '$1' does not exists." % file) try: result.db = db.open(file, "", "", "") - # Register custom function + # Register custom function & PRAGMAs + debug("Registering custom functions...") discard result.db.create_function("rank", -1, SQLITE_ANY, cast[pointer](SQLITE_DETERMINISTIC), okapi_bm25f_kb, nil, nil) + debug("Executing PRAGMAs...") + discard result.db.tryExec("PRAGMA locking_mode = exclusive".sql) + discard result.db.tryExec("PRAGMA page_size = 4096".sql) + discard result.db.tryExec("PRAGMA cache_size = 10000".sql) + debug("Done.") result.path = file result.mount = "" except:@@ -61,16 +83,19 @@ return store.mount.len > 0
proc begin(store: Datastore) = if not LS_TRANSACTION: + debug("Beginning transaction") LS_TRANSACTION = true store.db.exec("BEGIN".sql) proc commit(store: Datastore) = if LS_TRANSACTION: + debug("Committing transaction") LS_TRANSACTION = false store.db.exec("COMMIT".sql) proc rollback(store: Datastore) = if LS_TRANSACTION: + debug("Rolling back transaction") LS_TRANSACTION = false store.db.exec("ROLLBACK".sql)@@ -269,6 +294,28 @@ raise
if singleOp: store.commit() +proc optimize*(store: Datastore) = + try: + store.begin() + debug("Reindexing columns...") + store.db.exec(SQL_REINDEX) + debug("Rebuilding full-text index...") + store.db.exec(SQL_REBUILD) + debug("Optimixing full-text index...") + store.db.exec(SQL_OPTIMIZE) + store.commit() + debug("Done") + except: + eWarn() + +proc vacuum*(store: Datastore) = + try: + db.close(store.db) + let data = db.open(store.path, "", "", "") + data.exec(SQL_VACUUM) + except: + eWarn() + proc importDir*(store: Datastore, dir: string) = # TODO: Only allow directory names (not paths)? var files = newSeq[string]()@@ -286,23 +333,27 @@ let batchSize = 50
let nBatches = ceil(files.len/batchSize).toInt var cFiles = 0 var cBatches = 0 - info("Importing $1 files in $2 batches", files.len, nBatches) store.begin() + info("Importing $1 files in $2 batches", files.len, nBatches) + debug("Dropping column indexes...") + store.db.dropIndexes() for f in files: try: store.importFile(f, dir) cFiles.inc if (cFiles-1) mod batchSize == 0: cBatches.inc - info("Importing batch $1/$2...", cBatches, nBatches) store.commit() + info("Importing batch $1/$2...", cBatches, nBatches) store.begin() except: warn("Unable to import file: $1", f) eWarn() store.rollback() - info("Imported $1/$2 files", cFiles, files.len) + debug("Recreating column indexes...") + store.db.createIndexes() store.commit() + info("Imported $1/$2 files", cFiles, files.len) proc exportDir*(store: Datastore, dir: string) = let docs = store.db.getAllRows(SQL_SELECT_DOCUMENTS_BY_TAG, "$dir:"&dir)@@ -318,7 +369,7 @@ file.writeFile(data)
proc deleteDir*(store: Datastore, dir: string) = store.db.exec(SQL_DELETE_DOCUMENTS_BY_TAG, "$dir:"&dir) - store.db.exec(SQL_DELETE_SEARCHCONTENTS_BY_TAG, "$dir:"&dir) + store.db.exec(SQL_DELETE_SEARCHDATA_BY_TAG, "$dir:"&dir) store.db.exec(SQL_DELETE_TAGS_BY_TAG, "$dir:"&dir) proc mountDir*(store: var Datastore, dir:string, reset=false) =
@@ -15,12 +15,27 @@ created TEXT,
modified TEXT) """ -const SQL_CREATE_DOCID_INDEX* = sql""" -CREATE INDEX docid_index ON documents(docid) -""" + + +const + SQL_CREATE_INDEX_DOCUMENTS_DOCID* = sql"CREATE INDEX IF NOT EXISTS documents_docid ON documents(docid)" + SQL_CREATE_INDEX_DOCUMENTS_ID* = sql"CREATE INDEX IF NOT EXISTS documents_id ON documents(id)" + SQL_CREATE_INDEX_TAGS_DOCUMENT_ID* = sql"CREATE INDEX IF NOT EXISTS tags_document_id ON tags(document_id)" + SQL_CREATE_INDEX_TAGS_TAG_ID* = sql"CREATE INDEX IF NOT EXISTS tags_tag_id ON tags(tag_id)" + + SQL_DROP_INDEX_DOCUMENTS_DOCID* = sql"DROP INDEX IF EXISTS documents_docid" + SQL_DROP_INDEX_DOCUMENTS_ID* = sql"DROP INDEX IF EXISTS documents_id" + SQL_DROP_INDEX_TAGS_DOCUMENT_ID* = sql"DROP INDEX IF EXISTS tags_document_id" + SQL_DROP_INDEX_TAGS_TAG_ID* = sql"DROP INDEX IF EXISTS tags_tag_id" + + SQL_REINDEX* = sql"REINDEX" + SQL_OPTIMIZE* = sql"INSERT INTO searchdata(searchdata) VALUES('optimize')" + SQL_REBUILD* = sql"INSERT INTO searchdata(searchdata) VALUES('rebuild')" + + SQL_VACUUM* = sql"VACUUM" -const SQL_CREATE_SEARCHCONTENTS_TABLE* = sql""" -CREATE VIRTUAL TABLE searchcontents USING fts4( +const SQL_CREATE_SEARCHDATA_TABLE* = sql""" +CREATE VIRTUAL TABLE searchdata USING fts4( id TEXT, data TEXT, tokenize=porter)@@ -87,18 +102,18 @@ document_id = ? AND tag_id LIKE "$%"
""" const SQL_INSERT_SEARCHCONTENT* = sql""" -INSERT INTO searchcontents +INSERT INTO searchdata (docid, id, data) VALUES (?, ?, ?) """ const SQL_DELETE_SEARCHCONTENT* = sql""" -DELETE FROM searchcontents WHERE +DELETE FROM searchdata WHERE id = ? """ const SQL_UPDATE_SEARCHCONTENT* = sql""" -UPDATE searchcontents +UPDATE searchdata SET data = ? WHERE id = ? """@@ -134,8 +149,8 @@ WHERE documents.id IN
(SELECT document_id FROM tags WHERE tag_id = ?) """ -const SQL_DELETE_SEARCHCONTENTS_BY_TAG* = sql""" -DELETE FROM searchcontents +const SQL_DELETE_SEARCHDATA_BY_TAG* = sql""" +DELETE FROM searchdata WHERE id IN (SELECT document_id FROM tags WHERE tag_id = ?) """
@@ -44,7 +44,7 @@ template route(req, peg: expr, op: stmt): stmt {.immediate.}=
if req.url.path.find(peg, matches) != -1: op try: - var info: ResourceInfo = (version: "", resource: "", id: "") + var info: ResourceInfo req.route peg"^\/?$": info.version = "v1" info.resource = "info"
@@ -27,7 +27,13 @@ tag*: string
startswith*: bool endswith*: bool negated*: bool - Operation* = enum opRun, opImport, opExport, opDelete + Operation* = enum + opRun, + opImport, + opExport, + opDelete, + opVacuum, + opOptimize LiteStore* = object store*: Datastore address*: string
@@ -39,13 +39,13 @@ proc prepareSelectDocumentsQuery*(options: var QueryOptions): string =
result = "SELECT " if options.search.len > 0: if options.select[0] != "COUNT(docid)": - let rank = "rank(matchinfo(searchcontents, 'pcxnal'), 1.20, 0.75, 5.0, 0.5) AS rank" - let snippet = "snippet(searchcontents, \"<strong>\", \"</strong>\", \"<strong>…</strong>\", -1, 30) as highlight" + let rank = "rank(matchinfo(searchdata, 'pcxnal'), 1.20, 0.75, 5.0, 0.5) AS rank" + let snippet = "snippet(searchdata, \"<strong>\", \"</strong>\", \"<strong>…</strong>\", -1, 30) as highlight" options.select.add(snippet) options.select.add("ranktable.rank AS rank") options.orderby = "rank DESC" # Create inner select - var innerSelect = "SELECT docid, " & rank & " FROM searchcontents WHERE searchcontents MATCH '" & options.search.replace("'", "''") & "' " + var innerSelect = "SELECT docid, " & rank & " FROM searchdata WHERE searchdata MATCH '" & options.search.replace("'", "''") & "' " if options.tags.len > 0: innerSelect = innerSelect & options.tags.selectDocumentsByTags() innerSelect = innerSelect & " ORDER BY rank DESC "@@ -54,11 +54,11 @@ innerSelect = innerSelect & "LIMIT " & $options.limit
if options.offset > 0: innerSelect = innerSelect & " OFFSET " & $options.offset result = result & options.select.join(", ") - result = result & " FROM documents JOIN (" & innerSelect & ") as ranktable USING(docid) JOIN searchcontents USING(docid) " + result = result & " FROM documents JOIN (" & innerSelect & ") as ranktable USING(docid) JOIN searchdata USING(docid) " result = result & "WHERE 1=1 " else: result = result & options.select.join(", ") - result = result & " FROM searchcontents " + result = result & " FROM searchdata " result = result & "WHERE 1=1 " options.orderby = "" else:@@ -69,10 +69,10 @@ result = result & "AND id = ?"
if options.tags.len > 0: result = result & options.tags.selectDocumentsByTags() if options.search.len > 0: - result = result & "AND searchcontents MATCH '" & options.search.replace("'", "''") & "' " - if options.orderby.len > 0 and options.select[0] != "COUNT(id)": + result = result & "AND searchdata MATCH '" & options.search.replace("'", "''") & "' " + if options.orderby.len > 0 and options.select[0] != "COUNT(docid)": result = result & "ORDER BY " & options.orderby & " " - if options.search.len == 0 and options.limit > 0: + if options.limit > 0: result = result & "LIMIT " & $options.limit & " " if options.offset > 0: result = result & "OFFSET " & $options.offset & " "
@@ -8,27 +8,39 @@ times,
json, pegs, strtabs, - #logging, base64 import lib/types, lib/utils, lib/core, lib/cli, + lib/queries, lib/server from asyncdispatch import runForever {.compile: "vendor/sqlite/libsqlite3.c".} -{.passC: "-DSQLITE_ENABLE_FTS4=1 -DSQLITE_ENABLE_LOCKING_STYLE=0 -DSQLITE_THREADSAFE=0".} +{.passC: "-DSQLITE_ENABLE_FTS4=1 -DSQLITE_ENABLE_LOCKING_STYLE=1".} when isMainModule: # Initialize Datastore if not LS.file.fileExists: try: + info("Creating datastore: ", LS.file) LS.file.createDatastore() except: fail(200, "Unable to create datastore '$1'" % [LS.file]) + # Manage vacuum operation separately + if LS.operation == opVacuum: + let data = db.open(LS.file, "", "", "") + try: + data.exec(SQL_VACUUM) + db.close(data) + except: + eWarn() + quit(203) + quit(0) + # Open Datastore and execute operation try: LS.store = LS.file.openDatastore() if LS.mount:@@ -40,12 +52,17 @@ fail(202, "Unable to mount directory '$1'" % [LS.directory])
except: fail(201, "Unable to open datastore '$1'" % [LS.file]) case LS.operation: + of opRun: + LS.serve + runForever() of opImport: LS.store.importDir(LS.directory) of opExport: LS.store.exportDir(LS.directory) of opDelete: LS.store.deleteDir(LS.directory) - of opRun: - LS.serve - runForever() + of opOptimize: + LS.store.optimize + else: + discard +