all repos — litestore @ e465690e6e082935801e5c6a10a37663def8bb05

A minimalist nosql document store.

Added initial optimizations for importing data and FTS4.
h3rald h3rald@h3rald.com
Wed, 22 Apr 2015 22:00:34 +0200
commit

e465690e6e082935801e5c6a10a37663def8bb05

parent

55d0737c2bbdefba2f534fc45ce8b5feedd410a3

M admin/index.htmladmin/index.html

@@ -37,6 +37,7 @@ <script src="js/components/uploader.js"> </script>

<script src="js/components/editor.js"> </script> <script src="js/modules/info.js"> </script> <script src="js/modules/tags.js"> </script> + <script src="js/modules/htmldoc.js"> </script> <script src="js/modules/guide.js"> </script> <script src="js/modules/document.js"> </script> <script src="js/modules/search.js"> </script>
M admin/js/app.jsadmin/js/app.js

@@ -11,6 +11,7 @@ app.system = info;

m.route(document.body, "/info", { "/info": app.info, "/tags/:id": app.tags, + "/html/:id...": app.htmldoc, "/document/:action/:id...": app.document, "/guide/:id": app.guide, "/new": app.create,
A admin/js/modules/htmldoc.js

@@ -0,0 +1,28 @@

+(function(){ + 'use strict'; + var app = window.LS || (window.LS = {}); + var u = LS.utils; + + // HTMLDoc Module + app.htmldoc = {vm: {}}; + app.htmldoc.vm.init = function() { + var vm = this; + vm.id = m.prop(m.route.param("id")); + vm.content = Doc.get(vm.id()).then(function(content){ + return $("<div>").html(content.data).html(); + }, vm.flashError); + vm.view = function(){ + m.route("/document/view/"+vm.id()); + }; + vm.links = m.prop([{action: vm.view, title: "View Source", icon: "code"}]); + }; + app.htmldoc.main = function(){ + return m("article.row", [ + u.toolbar({links: app.htmldoc.vm.links()}), + m.trust(app.htmldoc.vm.content()) + ]); + }; + + u.layout(app.htmldoc); + +}());
M admin/js/modules/search.jsadmin/js/modules/search.js

@@ -14,19 +14,22 @@ vm.page -= 1; // pages are 0-based

vm.offset = vm.page * vm.limit; vm.result = m.prop({total: 0, results: []}); vm.total = 0; + vm.execTime = 0; Doc.search(vm.query, vm.offset, vm.limit).then(function(result){ vm.result(result); vm.total = result.total; + vm.execTime = (result["execution-time"]*1000).toFixed(0); }, vm.flashError); }; app.search.main = function(){ var vm = app.search.vm; var result = vm.result(); var title = m("h2.col-md-12", ["You searched for: ", m("em", vm.query)]); - var total = m("p.col-md-12", [m("strong", result.total), " hits"]); + var total = m("p.col-md-12", [m("strong", result.total), " hits ("+vm.execTime+" ms)"]); var resultPanel = function(res){ var obj = {}; - obj.title = m("a", {href: "/document/view/"+res.id, config: m.route}, [res.id]); + var path = (res.id.match(/\.html?$/)) ? "/html/" : "/document/view/"; + obj.title = m("a", {href: path+res.id, config: m.route}, [res.id]); obj.content = m("div", [ m("p", [m.trust(res.highlight)]), m("p", res.tags.map(function(tag){

@@ -49,4 +52,4 @@ };

u.layout(app.search); -}())+}())
M lib/api_v1.nimlib/api_v1.nim

@@ -1,4 +1,4 @@

-import asynchttpserver2, asyncdispatch, strutils, cgi, strtabs, pegs, json, os +import asynchttpserver2, asyncdispatch, strutils, cgi, strtabs, pegs, json, os, times import types, core, utils # Helper procs

@@ -119,6 +119,7 @@ result.content = doc

result.code = Http200 proc getDocument(LS: LiteStore, id: string, options = newQueryOptions()): Response = + let id = id.decodeURL let doc = LS.store.retrieveDocument(id, options) if doc.data == "": result = resDocumentNotFound(id)

@@ -145,6 +146,7 @@ result = resError(Http500, "Unable to delete document '$1'" % id)

proc getRawDocuments(LS: LiteStore, options: QueryOptions = newQueryOptions()): Response = var options = options + let t0 = cpuTime() let docs = LS.store.retrieveRawDocuments(options) let orig_limit = options.limit let orig_offset = options.offset

@@ -169,6 +171,7 @@ content["offset"] = %orig_offset

if options.orderby != "": content["sort"] = %options.orderby content["total"] = %total + content["execution-time"] = %(cputime()-t0) content["results"] = docs result.headers = ctJsonHeader() result.content = content.pretty

@@ -234,7 +237,7 @@ let jbody = body.parseJson

if jbody.kind != JArray: return resError(Http400, "Bad request: PATCH request body is not an array.") var options = newQueryOptions() - options.select = @["id", "content_type", "binary", "searchable", "created", "modified"] + options.select = @["documents.id AS id", "content_type", "binary", "searchable", "created", "modified"] let doc = LS.store.retrieveRawDocument(id, options) if doc == "": return resDocumentNotFound(id)

@@ -295,7 +298,7 @@ discard # never happens really.

proc head(req: Request, LS: LiteStore, resource: string, id = ""): Response = var options = newQueryOptions() - options.select = @["id", "content_type", "binary", "searchable", "created", "modified"] + options.select = @["documents.id AS id", "content_type", "binary", "searchable", "created", "modified"] try: parseQueryOptions(req.url.query, options); if id != "":

@@ -312,7 +315,7 @@ case resource:

of "docs": var options = newQueryOptions() if req.url.query.contains("contents=false"): - options.select = @["id", "content_type", "binary", "searchable", "created", "modified"] + options.select = @["documents.id AS id", "content_type", "binary", "searchable", "created", "modified"] try: parseQueryOptions(req.url.query, options); if id != "":
M lib/core.nimlib/core.nim

@@ -8,7 +8,9 @@ json,

pegs, strtabs, strutils, - base64 + base64, + logging, + math import types, contenttypes,

@@ -16,6 +18,8 @@ queries,

utils # Manage Datastores + +var LS_TRANSACTION = false proc createDatastore*(file:string) = if file.fileExists():

@@ -54,6 +58,21 @@

proc hasMirror(store: Datastore): bool = return store.mount.len > 0 +proc begin(store: Datastore) = + if not LS_TRANSACTION: + store.db.exec("BEGIN".sql) + LS_TRANSACTION = true + +proc commit(store: Datastore) = + if LS_TRANSACTION: + store.db.exec("COMMIT".sql) + LS_TRANSACTION = false + +proc rollback(store: Datastore) = + if LS_TRANSACTION: + store.db.exec("ROLLBACK".sql) + LS_TRANSACTION = false + # Manage Tags proc createTag*(store: Datastore, tagid, documentid: string, system=false) =

@@ -108,6 +127,7 @@ else:

return $store.prepareJsonDocument(raw_document, options.select) proc createDocument*(store: Datastore, id="", rawdata = "", contenttype = "text/plain", binary = -1, searchable = 1): string = + let singleOp = not LS_TRANSACTION var id = id var contenttype = contenttype.replace(peg"""\;(.+)$""", "") # Strip charset for now var binary = checkIfBinary(binary, contenttype)

@@ -118,58 +138,83 @@ var data = rawdata

if id == "": id = $genOid() # Store document - var res = store.db.execAffectedRows(SQL_INSERT_DOCUMENT, id, data, contenttype, binary, searchable, currentTime()) - if res > 0: - if binary <= 0 and searchable >= 0: - # Add to search index - store.db.exec(SQL_INSERT_SEARCHCONTENT, id, data.toPlainText) - store.addDocumentSystemTags(id, contenttype) - if store.hasMirror and id.startsWith(store.mount): - # Add dir tag - store.createTag("$dir:"&store.mount, id, true) - var filename = id.unixToNativePath - if not fileExists(filename): - filename.parentDir.createDir - filename.writeFile(rawdata) - else: - raise newException(EFileExists, "File already exists: $1" % filename) - return $store.retrieveRawDocument(id) + try: + store.begin() + var res = store.db.execAffectedRows(SQL_INSERT_DOCUMENT, id, data, contenttype, binary, searchable, currentTime()) + if res > 0: + if binary <= 0 and searchable >= 0: + # Add to search index + store.db.exec(SQL_INSERT_SEARCHCONTENT, id, data.toPlainText) + store.addDocumentSystemTags(id, contenttype) + if store.hasMirror and id.startsWith(store.mount): + # Add dir tag + store.createTag("$dir:"&store.mount, id, true) + var filename = id.unixToNativePath + if not fileExists(filename): + filename.parentDir.createDir + filename.writeFile(rawdata) + else: + raise newException(EFileExists, "File already exists: $1" % filename) + if singleOp: + store.commit() + return $store.retrieveRawDocument(id) + except: + store.rollback() + eWarn() + raise proc updateDocument*(store: Datastore, id: string, rawdata: string, contenttype = "text/plain", binary = -1, searchable = 1): string = + let singleOp = not LS_TRANSACTION var contenttype = contenttype.replace(peg"""\;(.+)$""", "") # Strip charset for now var binary = checkIfBinary(binary, contenttype) var data = rawdata var searchable = searchable if binary == 1: searchable = 0 - var res = store.db.execAffectedRows(SQL_UPDATE_DOCUMENT, data, contenttype, binary, searchable, currentTime(), id) - if res > 0: - if binary <= 0 and searchable >= 0: - store.db.exec(SQL_UPDATE_SEARCHCONTENT, data.toPlainText, id) - if store.hasMirror and id.startsWith(store.mount): - var filename = id.unixToNativePath - if fileExists(filename): - filename.writeFile(rawdata) - else: - raise newException(EFileNotFound, "File not found: $1" % filename) - return $store.retrieveRawDocument(id) - else: - return "" + try: + store.begin() + var res = store.db.execAffectedRows(SQL_UPDATE_DOCUMENT, data, contenttype, binary, searchable, currentTime(), id) + if res > 0: + if binary <= 0 and searchable >= 0: + store.db.exec(SQL_UPDATE_SEARCHCONTENT, data.toPlainText, id) + if store.hasMirror and id.startsWith(store.mount): + var filename = id.unixToNativePath + if fileExists(filename): + filename.writeFile(rawdata) + else: + raise newException(EFileNotFound, "File not found: $1" % filename) + return $store.retrieveRawDocument(id) + else: + return "" + if singleOp: + store.commit() + except: + eWarn() + store.rollback() + raise proc setDocumentModified*(store: Datastore, id: string): string = store.db.exec(SQL_SET_DOCUMENT_MODIFIED, id, currentTime()) proc destroyDocument*(store: Datastore, id: string): int64 = - result = store.db.execAffectedRows(SQL_DELETE_DOCUMENT, id) - if result > 0: - store.db.exec(SQL_DELETE_SEARCHCONTENT, id) - store.db.exec(SQL_DELETE_DOCUMENT_TAGS, id) - if store.hasMirror and id.startsWith(store.mount): - var filename = id.unixToNativePath - if fileExists(filename): - removeFile(id.unixToNativePath) - else: - raise newException(EFileNotFound, "File not found: $1" % filename) + try: + let singleOp = not LS_TRANSACTION + store.begin() + result = store.db.execAffectedRows(SQL_DELETE_DOCUMENT, id) + if result > 0: + store.db.exec(SQL_DELETE_SEARCHCONTENT, id) + store.db.exec(SQL_DELETE_DOCUMENT_TAGS, id) + if store.hasMirror and id.startsWith(store.mount): + var filename = id.unixToNativePath + if fileExists(filename): + removeFile(id.unixToNativePath) + else: + raise newException(EFileNotFound, "File not found: $1" % filename) + if singleOp: + store.commit() + except: + eWarn() + store.rollback() proc retrieveDocument*(store: Datastore, id: string, options: QueryOptions = newQueryOptions()): tuple[data: string, contenttype: string] = var options = options

@@ -195,8 +240,37 @@

proc countDocuments*(store: Datastore): int64 = return store.db.getRow(SQL_COUNT_DOCUMENTS)[0].parseInt +proc importFile*(store: Datastore, f: string, dir = "") = + if not f.fileExists: + raise newException(EFileNotFound, "File '$1' not found." % f) + let ext = f.splitFile.ext + var d_id = f.replace("\\", "/") + var d_contents = f.readFile + var d_ct = "application/octet-stream" + if CONTENT_TYPES.hasKey(ext): + d_ct = CONTENT_TYPES[ext].replace("\"", "") + var d_binary = 0 + var d_searchable = 1 + if d_ct.isBinary: + d_binary = 1 + d_searchable = 0 + d_contents = d_contents.encode(d_contents.len*2) # Encode in Base64. + let singleOp = not LS_TRANSACTION + store.begin() + try: + discard store.createDocument(d_id, d_contents, d_ct, d_binary, d_searchable) + if dir != "": + store.db.exec(SQL_INSERT_TAG, "$dir:"&dir, d_id) + except: + store.rollback() + eWarn() + raise + if singleOp: + store.commit() + proc importDir*(store: Datastore, dir: string) = # TODO: Only allow directory names (not paths)? + var files = newSeq[string]() if not dir.dirExists: raise newException(EDirectoryNotFound, "Directory '$1' not found." % dir) for f in dir.walkDirRec():

@@ -205,20 +279,29 @@ continue

if f.splitFile.name.startsWith("."): # Ignore hidden files continue - let ext = f.splitFile.ext - var d_id = f.replace("\\", "/") - var d_contents = f.readFile - var d_ct = "application/octet-stream" - if CONTENT_TYPES.hasKey(ext): - d_ct = CONTENT_TYPES[ext].replace("\"", "") - var d_binary = 0 - var d_searchable = 1 - if d_ct.isBinary: - d_binary = 1 - d_searchable = 0 - d_contents = d_contents.encode(d_contents.len*2) # Encode in Base64. - discard store.createDocument(d_id, d_contents, d_ct, d_binary, d_searchable) - store.db.exec(SQL_INSERT_TAG, "$dir:"&dir, d_id) + files.add(f) + # Import single files in batch + let batchSize = 50 + let nBatches = ceil(files.len/batchSize).toInt + var cFiles = 0 + var cBatches = 0 + info("Importing $1 files in $2 batches", files.len, nBatches) + store.begin() + for f in files: + try: + store.importFile(f, dir) + cFiles.inc + if (cFiles-1) mod batchSize == 0: + cBatches.inc + info("Importing batch $1/$2...", cBatches, nBatches) + store.commit() + store.begin() + except: + warn("Unable to import file: $1", f) + eWarn() + store.rollback() + info("Imported $1/$2 files", cFiles, files.len) + store.commit() proc exportDir*(store: Datastore, dir: string) = let docs = store.db.getAllRows(SQL_SELECT_DOCUMENTS_BY_TAG, "$dir:"&dir)
M lib/utils.nimlib/utils.nim

@@ -23,12 +23,27 @@ proc prepareSelectDocumentsQuery*(options: var QueryOptions): string =

result = "SELECT " if options.search.len > 0: if options.select[0] != "COUNT(id)": + var rank = "rank(matchinfo(searchcontents, 'pcxnal'), 1.20, 0.75, 5.0, 0.5) AS rank" options.select.add("snippet(searchcontents) AS highlight") - options.select.add("rank(matchinfo(searchcontents, 'pcxnal'), 1.20, 0.75, 5.0, 0.5) AS rank") + options.select.add("ranktable.rank AS rank") + options.select.add(rank) options.orderby = "rank DESC" - result = result & options.select.join(", ") - result = result & " FROM documents, searchcontents " - result = result & "WHERE documents.id = searchcontents.document_id " + # Create inner select + var innerSelect = "SELECT document_id as id, " & rank & " FROM searchcontents WHERE searchcontents MATCH '" & options.search.replace("'", "''") & "' " + if options.tags.len > 0: + innerSelect = innerSelect & options.tags.selectDocumentsByTags() + innerSelect = innerSelect & " ORDER BY rank DESC " + if options.limit > 0: + innerSelect = innerSelect & "LIMIT " & $options.limit + if options.offset > 0: + innerSelect = innerSelect & " OFFSET " & $options.offset + result = result & options.select.join(", ") + result = result & " FROM documents, searchcontents, (" & innerSelect & ") AS ranktable " + result = result & "WHERE documents.id = ranktable.id AND documents.id = searchcontents.document_id " + else: + result = result & options.select.join(", ") + result = result & " FROM documents, searchcontents " + result = result & "WHERE documents.id = searchcontents.document_id " else: result = result & options.select.join(", ") result = result & " FROM documents WHERE 1=1 "

@@ -45,7 +60,7 @@ result = result & "LIMIT " & $options.limit & " "

if options.offset > 0: result = result & "OFFSET " & $options.offset & " " debug(result.replace("$", "$$")) - + proc prepareSelectTagsQuery*(options: QueryOptions): string = result = "SELECT tag_id, COUNT(document_ID) " result = result & "FROM tags "

@@ -119,6 +134,11 @@ result.headers = ctJsonHeader()

proc resDocumentNotFound*(id): Response = resError(Http404, "Document '$1' not found." % id) + +proc eWarn*() = + var e = getCurrentException() + warn(e.msg) + debug(getStackTrace(e)) # Created by Joshua Wilson on 27/05/14. # Copyright (c) 2014 Joshua Wilson. All rights reserved.