summaryrefslogtreecommitdiff
path: root/math/metadb.c
diff options
context:
space:
mode:
Diffstat (limited to 'math/metadb.c')
-rw-r--r--math/metadb.c581
1 files changed, 581 insertions, 0 deletions
diff --git a/math/metadb.c b/math/metadb.c
new file mode 100644
index 0000000..7e50959
--- /dev/null
+++ b/math/metadb.c
@@ -0,0 +1,581 @@
+/* $Id$ */
+
+/*
+ * Copyright (c) 2006 Dimitri A. Sokolyuk <demon@vhost.dyndns.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <assert.h>
+#include <ctype.h>
+#include <db.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "metadb.h"
+
+/* O_RDONLY */
+#define DB_FLAGS O_CREAT|O_RDWR
+#define DB_MODE S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH
+
+#define P(f1,f2) printf( #f1 " = '%s'\t" #f2 " = '%s'\n", (f1), (f2))
+
+DB *idx, *meta, *srch;
+
+struct meta *decode_meta(struct meta *, char *);
+int cmptitle_i(const void *, const void *);
+int cmptitle_d(const void *, const void *);
+int cmpdate_i(const void *, const void *);
+int cmpdate_d(const void *, const void *);
+int cmpauthor_i(const void *, const void *);
+int cmpauthor_d(const void *, const void *);
+
+int
+db_open(int flags)
+{
+ idx = dbopen(INDEXDB, flags, DB_MODE, DB_HASH, NULL);
+ meta = dbopen(METADB, flags, DB_MODE, DB_HASH, NULL);
+ srch = dbopen(SEARCHDB, flags, DB_MODE, DB_HASH, NULL);
+ if (idx == NULL || meta == NULL || srch == NULL) {
+ db_close();
+ return -1;
+ }
+ return 0;
+}
+
+void
+db_close(void)
+{
+ if (idx != NULL)
+ (idx->close)(idx);
+ if (meta != NULL)
+ (meta->close)(meta);
+ if (srch != NULL)
+ (srch->close)(srch);
+}
+
+static int
+chkid(unsigned int *p, size_t n, unsigned int id)
+{
+ while (n--)
+ if (*p++ == id)
+ return 1;
+ return 0;
+}
+
+static int
+addindex(int parent, int id)
+{
+ DBT k, d;
+ int ret, *buf;
+ size_t siz = 0;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ /* add new record, payload is parents id */
+ k.data = &id;
+ k.size = sizeof(int);
+ d.data = &parent;
+ d.size = sizeof(int);
+
+ ret = (idx->put)(idx, &k, &d, 0);
+
+ /* go further and update parent only if id isn't root */
+ if (id == 0 || ret == -1)
+ return ret;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ k.data = &parent;
+ k.size = sizeof(int);
+
+ /* try to get perent */
+ if ((ret = (idx->get)(idx, &k, &d, 0)) != 0)
+ return ret;
+
+ siz = d.size + sizeof(int);
+ buf = malloc(siz);
+ assert(buf);
+ memset(buf, 0, siz);
+
+ memcpy(buf, (int *)d.data, sizeof(int));
+ memcpy(buf + 1, &id, sizeof(int));
+ memcpy(buf + 2, (int *)d.data + 1, d.size - sizeof(int));
+
+ d.data = buf;
+ d.size = siz;
+
+ ret = (idx->put)(idx, &k, &d, 0);
+ free(buf);
+
+ return ret;
+}
+
+#define EMPTY(s) ((s) == NULL ? "" : (s))
+
+#define LENGTH(s) (strlen(EMPTY(s)) + 1)
+
+#define PUTSTR(d, s, b, l) do { \
+ (l) = LENGTH(s); \
+ memcpy((d) + (b), EMPTY(s), (l)); \
+ (b) += (l); \
+} while (0)
+
+static char *
+encode_meta(struct meta *mp, size_t *siz)
+{
+ char *buf;
+ size_t len, blen;
+
+ *siz = sizeof(mp->type);
+ *siz += sizeof(mp->date);
+ *siz += sizeof(mp->id);
+
+ *siz += LENGTH(mp->dir);
+ *siz += LENGTH(mp->title);
+ *siz += LENGTH(mp->abstract);
+ *siz += LENGTH(mp->author);
+ *siz += LENGTH(mp->email);
+ *siz += LENGTH(mp->html);
+ *siz += LENGTH(mp->mws);
+ *siz += LENGTH(mp->mw);
+ *siz += LENGTH(mp->pdf);
+ *siz += LENGTH(mp->code);
+ *siz += LENGTH(mp->img);
+ *siz += LENGTH(mp->language);
+ *siz += LENGTH(mp->tokens);
+
+ buf = malloc(*siz);
+ assert(buf);
+ memset(buf, 0, *siz);
+
+ len = sizeof(mp->type);
+ memcpy(buf, &mp->type, len);
+ blen = len;
+
+ len = sizeof(mp->date);
+ memcpy(buf + blen, &mp->date, len);
+ blen += len;
+
+ len = sizeof(mp->id);
+ memcpy(buf + blen, &mp->id, len);
+ blen += len;
+
+ PUTSTR(buf, mp->dir, blen, len);
+ PUTSTR(buf, mp->title, blen, len);
+ PUTSTR(buf, mp->abstract, blen, len);
+ PUTSTR(buf, mp->author, blen, len);
+ PUTSTR(buf, mp->email, blen, len);
+ PUTSTR(buf, mp->html, blen, len);
+ PUTSTR(buf, mp->mws, blen, len);
+ PUTSTR(buf, mp->mw, blen, len);
+ PUTSTR(buf, mp->pdf, blen, len);
+ PUTSTR(buf, mp->code, blen, len);
+ PUTSTR(buf, mp->img, blen, len);
+ PUTSTR(buf, mp->language, blen, len);
+ PUTSTR(buf, mp->tokens, blen, len);
+
+ return buf;
+}
+
+#define GETSTR(d, s, l) do { \
+ (d) = strdup((s) + (l)); \
+ (l) += strlen(d) + 1; \
+} while (0)
+
+struct meta *
+decode_meta(struct meta *mp, char *buf)
+{
+ size_t blen;
+
+ mp->type = *(unsigned int *)buf;
+ blen = sizeof(mp->type);
+ mp->date = *(time_t *)(buf + blen);
+ blen += sizeof(mp->date);
+ mp->id = *(unsigned int *)(buf + blen);
+ blen += sizeof(mp->id);
+ GETSTR(mp->dir, buf, blen);
+ GETSTR(mp->title, buf, blen);
+ GETSTR(mp->abstract, buf, blen);
+ GETSTR(mp->author, buf, blen);
+ GETSTR(mp->email, buf, blen);
+ GETSTR(mp->html, buf, blen);
+ GETSTR(mp->mws, buf, blen);
+ GETSTR(mp->mw, buf, blen);
+ GETSTR(mp->pdf, buf, blen);
+ GETSTR(mp->code, buf, blen);
+ GETSTR(mp->img, buf, blen);
+ GETSTR(mp->language, buf, blen);
+ GETSTR(mp->tokens, buf, blen);
+
+ return mp;
+}
+
+static int
+addmeta(struct meta *mp, int id)
+{
+ DBT k, d;
+ char *buf;
+ size_t siz;
+ int ret;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ mp->id = id;
+ buf = encode_meta(mp, &siz);
+
+ k.data = &id;
+ k.size = sizeof(int);
+ d.data = buf;
+ d.size = siz;
+
+ ret = (meta->put)(meta, &k, &d, 0);
+ free(buf);
+
+ return ret;
+}
+
+static int
+addsearch(char *s, int id)
+{
+ DBT k, d;
+ int ret, *buf;
+ size_t siz = 0;
+ char *t, *p;
+ char sep[] = " \t\n!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ while ((t = strsep(&s, sep)) != NULL) {
+ for (p = t; *p != '\0'; p++)
+ *p = tolower(*p);
+
+ k.data = t;
+ k.size = strlen(t) + 1;
+
+ switch (ret = (srch->get)(srch, &k, &d, 0)) {
+ case -1: /* error */
+ return -1;
+ case 0: /* found */
+ siz = d.size + sizeof(int);
+ if (chkid(d.data, d.size / sizeof(int), id) == 1)
+ return 1;
+ break;
+ case 1: /* not found */
+ siz = sizeof(int);
+ break;
+ }
+
+ buf = malloc(siz);
+ assert(buf);
+ memset(buf, 0, siz);
+
+ memcpy(buf, &id, sizeof(int));
+ if (ret == 0)
+ memcpy(buf + 1, (int *)d.data, d.size);
+
+ d.data = buf;
+ d.size = siz;
+
+ if ((srch->put)(srch, &k, &d, 0) == -1)
+ return -1;
+
+ free(buf);
+ }
+
+ return 0;
+}
+
+int
+db_add(struct meta *mp, int parent, int id)
+{
+ addindex(parent, id);
+ addmeta(mp, id);
+
+ addsearch(mp->title, id);
+ addsearch(mp->abstract, id);
+ addsearch(mp->author, id);
+ addsearch(mp->tokens, id);
+
+ /* XXX */
+ addsearch("", id);
+
+ return 0;
+}
+
+struct meta *
+db_find(char *key, size_t *n)
+{
+ DBT k, d;
+ struct meta *mp;
+ char *p;
+ size_t i;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ for (p = key; *p != '\0'; p++)
+ *p = tolower(*p);
+
+ k.data = key;
+ k.size = strlen(key) + 1;
+
+ *n = 0;
+ if ((srch->get)(srch, &k, &d, 0) != 0)
+ return NULL;
+
+ *n = d.size / sizeof(int);
+
+ mp = calloc(*n, sizeof(struct meta));
+ assert(mp);
+
+ for (i = 0; i < *n; i++)
+ db_get(&mp[i], ((unsigned int *)d.data)[i]);
+
+ return mp;
+}
+
+struct meta *
+db_get(struct meta *mp, unsigned int id)
+{
+ DBT k, d;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ k.data = &id;
+ k.size = sizeof(int);
+
+ if ((meta->get)(meta, &k, &d, 0) == 0) {
+ if (mp == NULL) {
+ mp = malloc(sizeof(struct meta));
+ assert(mp);
+ }
+
+ return decode_meta(mp, d.data);
+ }
+
+ return NULL;
+}
+
+size_t
+db_nelem(unsigned int id)
+{
+ DBT k, d;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ k.data = &id;
+ k.size = sizeof(int);
+
+ if ((idx->get)(idx, &k, &d, 0) == 0)
+ return d.size / sizeof(int) - 1;
+
+ return 0;
+}
+
+struct meta *
+db_children(unsigned int id, size_t *n)
+{
+ DBT k, d;
+ struct meta *mp;
+ size_t i;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ k.data = &id;
+ k.size = sizeof(int);
+
+ *n = 0;
+ if ((idx->get)(idx, &k, &d, 0) != 0)
+ return NULL;
+
+ if ((*n = d.size / sizeof(int) - 1) == 0)
+ return NULL;
+
+ mp = calloc(*n, sizeof(struct meta));
+ assert(mp);
+
+ for (i = 0; i < *n; i++)
+ db_get(&mp[i], ((unsigned int *)d.data)[i + 1]);
+
+ return mp;
+}
+
+struct meta *
+db_path(unsigned int id, size_t *n)
+{
+ DBT k, d;
+ struct meta *mp;
+ unsigned int path[32], *p, i;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ p = path;
+ *p = id;
+
+ k.data = &id;
+ k.size = sizeof(int);
+
+ while (*p != 0 && p - path < 32) {
+ if ((idx->get)(idx, &k, &d, 0) != 0)
+ return NULL;
+ id = *(unsigned int *)d.data;
+ *(++p) = id;
+ }
+
+ *n = p - path + 1;
+ mp = calloc(*n, sizeof(struct meta));
+ assert(mp);
+
+ for (i = 0; i < *n; i++) {
+ db_get(&mp[i], *p--);
+ };
+
+ return mp;
+}
+
+/* increasing sort order */
+int
+cmptitle_i(const void *p1, const void *p2)
+{
+ struct meta *m1, *m2;
+
+ m1 = (struct meta *)p1;
+ m2 = (struct meta *)p2;
+
+ return strcmp(m1->title, m2->title);
+}
+
+/* decreasing sort order */
+int
+cmptitle_d(const void *p1, const void *p2)
+{
+ struct meta *m1, *m2;
+
+ m1 = (struct meta *)p1;
+ m2 = (struct meta *)p2;
+
+ return strcmp(m2->title, m1->title);
+}
+
+/* increasing sort order */
+/* oldest first */
+int
+cmpdate_i(const void *p1, const void *p2)
+{
+ struct meta *m1, *m2;
+
+ m1 = (struct meta *)p1;
+ m2 = (struct meta *)p2;
+
+ if (m1->date > m2->date)
+ return 1;
+ else if (m1->date < m2->date)
+ return -1;
+ else
+ return 0;
+}
+
+/* decreasing sort order */
+/* newest first */
+int
+cmpdate_d(const void *p1, const void *p2)
+{
+ struct meta *m1, *m2;
+
+ m1 = (struct meta *)p1;
+ m2 = (struct meta *)p2;
+
+ if (m1->date > m2->date)
+ return -1;
+ else if (m1->date < m2->date)
+ return 1;
+ else
+ return 0;
+}
+
+/* increasing sort order */
+int
+cmpauthor_i(const void *p1, const void *p2)
+{
+ struct meta *m1, *m2;
+
+ m1 = (struct meta *)p1;
+ m2 = (struct meta *)p2;
+
+ return strcmp(m1->author, m2->author);
+}
+
+/* decreasing sort order */
+int
+cmpauthor_d(const void *p1, const void *p2)
+{
+ struct meta *m1, *m2;
+
+ m1 = (struct meta *)p1;
+ m2 = (struct meta *)p2;
+
+ return strcmp(m2->author, m1->author);
+}
+
+int (*cmp[])(const void *, const void *) = {
+ cmptitle_i, cmptitle_d,
+ cmpdate_i, cmpdate_d,
+ cmpauthor_i, cmpauthor_d
+};
+
+void
+db_sort(struct meta *mp, size_t n, enum sorder o)
+{
+ qsort(mp, n, sizeof(struct meta), cmp[o]);
+}
+
+#if 0
+int
+(*cmp(enum sorder o))(const void *, const void *)
+{
+ return fct[o];
+}
+#endif
+
+void
+db_wipe(void)
+{
+ DBT k, d;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ while ((idx->seq)(idx, &k, &d, 0) != 0)
+ (idx->del)(idx, &k, 0);
+
+ while ((meta->seq)(meta, &k, &d, 0) != 0)
+ (meta->del)(meta, &k, 0);
+
+ while ((srch->seq)(srch, &k, &d, 0) != 0)
+ (srch->del)(srch, &k, 0);
+}