SQLite compiling for dos!

This commit is contained in:
Scott Duensing 2022-03-14 20:24:37 -05:00
parent 185391ef2a
commit 9f32ea202a
296 changed files with 209948 additions and 28 deletions

1
.gitignore vendored
View file

@ -10,6 +10,7 @@ obj/
retired/
test/
doors/
client/src/thirdparty/sqlite-3.4.2/build
*/out/

View file

@ -58,6 +58,11 @@ SHA256
https://github.com/ilvn/SHA256
MIT
SQLite 3.4.2
------------
https://www.ibiblio.org/pub/micro/pc-stuff/freedos/files/distributions/1.2/repos/pkg-html/sqlite.html
Public Domain
stb_ds.h
--------
https://github.com/nothings/stb

View file

@ -35,10 +35,10 @@ BINDIR = bin
CFLAGS := $(DEBUG)
#CFLAGS += -DALLEGRONOTAVAIL -DALLEGRONOTPROGS -DDZCOMM_SRC
CFLAGS += -I$(SRCDIR)/client/src -I$(SRCDIR)/client/src/system -I$(SRCDIR)/client/src/dos -I$(SRCDIR)/client/src/gui -I$(SRCDIR)/client/src/thirdparty
CFLAGS += -I$(SRCDIR)/shared -I$(SRCDIR)/shared/thirdparty
CFLAGS += -I$(SRCDIR)/client/src/thirdparty/sqlite-3.4.2/build -I$(SRCDIR)/shared -I$(SRCDIR)/shared/thirdparty
#CFLAGS += -I$(SRCDIR)/client/src/thirdparty/dzcomm -I$(SRCDIR)/client/src/thirdparty/dzcomm/include
CPPFLAGS :=
LDFLAGS :=
LDFLAGS := -L$(SRCDIR)/client/src/thirdparty/sqlite-3.4.2/build/.libs -lsqlite3
#LDFLAGS += -L$(SRCDIR)/client/src/thirdparty/dzcomm -ldzcom
PREFIX := /usr/local
TARGET_ARCH :=
@ -58,7 +58,7 @@ ALL_LDLIBS := -lc
# Source, Binaries, Dependencies
SRC := $(shell find $(SRCDIR) -type f -name '*.c' | grep -v '/linux/' | grep -v '/server/' | grep -v '/primes/' | grep -v '/font/' | grep -v '/precache/' | grep -v '/retired/' | grep -v '/test/')
SRC := $(shell find $(SRCDIR) -type f -name '*.c' | grep -v '/linux/' | grep -v '/server/' | grep -v '/primes/' | grep -v '/font/' | grep -v '/precache/' | grep -v '/retired/' | grep -v '/test/' | grep -v '/sqlite-3.4.2/')
OBJ := $(patsubst $(SRCDIR)/%,$(OBJDIR)/%,$(SRC:.c=.o))
DEP := $(OBJ:.o=.d)
BIN := $(BINDIR)/$(TARGET)
@ -126,4 +126,3 @@ install: $(BIN)
clean:
$(RM) $(OBJ) $(DEP) $(BIN)
$(RMDIR) $(OBJDIR) $(BINDIR) 2> /dev/null; true

View file

@ -78,11 +78,22 @@ function outputLicense() {
}
pushd font/data
createEmbeddedBinary vga8x14 dat ../../client/src/embedded
popd
# Include DJGPP into the path for this script
PATH=/opt/cross/djgpp/bin:$PATH
pushd client/data
createEmbeddedBinary mouse png ../../client/src/embedded
popd
#pushd font/data
#createEmbeddedBinary vga8x14 dat ../../client/src/embedded
#popd
# Build SQLite
pushd client/src/thirdparty/sqlite-3.4.2
[[ -d build ]] && rm -rf build
mkdir -p build
cd build
sh ../configure \
--disable-tcl \
--disable-threadsafe\
--build=x86_64-linux-gnu \
--host=i586-pc-msdosdjgpp
make
popd

View file

@ -29,6 +29,7 @@
#include "listbox.h"
#include "network.h"
#include "util.h"
static WindowT *_winBrowser = NULL;
@ -62,6 +63,8 @@ static void packetHandler(PacketDecodeDataT *packet);
void browserShow(void) {
char **lines = NULL;
TagItemT uiBrowser[] = {
T_START,
T_WINDOW, O(_winBrowser),
@ -200,19 +203,18 @@ void browserShow(void) {
guiDebugAreaShow(W(_btnBoxTab));
guiDebugAreaShow(W(_btnInfoTab));
// 123456789012345678901234567890123456789012345678901234567
listboxItemAdd(_lstDescription, "The Post-Terran Minerals Corporation (PTMC) digs up");
listboxItemAdd(_lstDescription, "minerals on all nine planets of the solar system,");
listboxItemAdd(_lstDescription, "employing humans and robots to do its job. Unfortunately,");
listboxItemAdd(_lstDescription, "the mining robots are now being controlled by a hacker,");
listboxItemAdd(_lstDescription, "and have taken the human workers hostage. The PTMC has");
listboxItemAdd(_lstDescription, "tried to get the robots back under their control, but");
listboxItemAdd(_lstDescription, "communication with the mining stations has been lost. In");
listboxItemAdd(_lstDescription, "desperation, the PTMC modifies a Pyro-GX ship for combat");
listboxItemAdd(_lstDescription, "and hires a mercenary, codenamed 'Material Defender', to");
listboxItemAdd(_lstDescription, "fly it. The mission is to destroy the infected mines and");
listboxItemAdd(_lstDescription, "rescue human hostages, destroying any hostile robot that");
listboxItemAdd(_lstDescription, "gets in the way.");
lines = utilWrapText(
"The Post-Terran Minerals Corporation (PTMC) digs up minerals on all nine planets of the solar system, "
"employing humans and robots to do its job. Unfortunately, the mining robots are now being controlled by a hacker, "
"and have taken the human workers hostage. The PTMC has tried to get the robots back under their control, but "
"communication with the mining stations has been lost. In desperation, the PTMC modifies a Pyro-GX ship for combat "
"and hires a mercenary, codenamed 'Material Defender', to fly it. The mission is to destroy the infected mines and "
"rescue human hostages, destroying any hostile robot that gets in the way.", 57);
while (arrlen(lines) > 0) {
listboxItemAdd(_lstDescription, lines[0]);
DEL(lines[0]);
arrdel(lines, 0);
}
listboxItemAdd(_lstInfo, "123456789012345678901234567890123456789012345678901234567");
listboxItemAdd(_lstInfo, "2");

View file

@ -0,0 +1,629 @@
#!/usr/make
#
# Makefile for SQLITE
#
# This makefile is suppose to be configured automatically using the
# autoconf. But if that does not work for you, you can configure
# the makefile manually. Just set the parameters below to values that
# work well for your system.
#
# If the configure script does not work out-of-the-box, you might
# be able to get it to work by giving it some hints. See the comment
# at the beginning of configure.in for additional information.
#
# The toplevel directory of the source tree. This is the directory
# that contains this "Makefile.in" and the "configure.in" script.
#
TOP = .
# C Compiler and options for use in building executables that
# will run on the platform that is doing the build.
#
BCC = gcc -g -O2
# C Compile and options for use in building executables that
# will run on the target platform. (BCC and TCC are usually the
# same unless your are cross-compiling.)
#
TCC = gcc -g -O2 -I. -I${TOP}/src
# Define -DNDEBUG to compile without debugging (i.e., for production usage)
# Omitting the define will cause extra debugging code to be inserted and
# includes extra comments when "EXPLAIN stmt" is used.
#
TCC += -DNDEBUG
# Compiler options needed for programs that use the TCL library.
#
TCC +=
# The library that programs using TCL must link against.
#
LIBTCL =
# Compiler options needed for programs that use the readline() library.
#
READLINE_FLAGS = -DHAVE_READLINE=0
# The library that programs using readline() must link against.
#
LIBREADLINE =
# Should the database engine be compiled threadsafe
#
TCC += -DSQLITE_THREADSAFE=0
# The pthreads library if needed
#
LIBPTHREAD=
# Do threads override each others locks by default (1), or do we test (-1)
#
TCC += -DSQLITE_THREAD_OVERRIDE_LOCK=-1
# The fdatasync library
TLIBS =
# Flags controlling use of the in memory btree implementation
#
# TEMP_STORE is 0 to force temporary tables to be in a file, 1 to
# default to file, 2 to default to memory, and 3 to force temporary
# tables to always be in memory.
#
TEMP_STORE = -DTEMP_STORE=1
# Version numbers and release number for the SQLite being compiled.
#
VERSION = 3.5
VERSION_NUMBER = 3005006
RELEASE = 3.5.6
# Filename extensions
#
BEXE = .exe
TEXE = .exe
# The following variable is "1" if the configure script was able to locate
# the tclConfig.sh file. It is an empty string otherwise. When this
# variable is "1", the TCL extension library (libtclsqlite3.so) is built
# and installed.
#
HAVE_TCL =
# The suffix used on shared libraries. Ex: ".dll", ".so", ".dylib"
#
SHLIB_SUFFIX = @TCL_SHLIB_SUFFIX@
# The directory into which to store package information for
# Some standard variables and programs
#
prefix = /dev/env/DJDIR
exec_prefix = ${prefix}
libdir = ${exec_prefix}/lib
INSTALL = /dev/env/DJDIR/bin/ginstall -c
LIBTOOL = ./libtool
ALLOWRELEASE =
# libtool compile/link/install
LTCOMPILE = $(LIBTOOL) --mode=compile --tag=CC $(TCC)
LTLINK = $(LIBTOOL) --mode=link $(TCC)
LTINSTALL = $(LIBTOOL) --mode=install $(INSTALL)
# nawk compatible awk.
NAWK = gawk
# You should not have to change anything below this line
###############################################################################
TCC += -DSQLITE_OMIT_LOAD_EXTENSION=1
# Object files for the SQLite library.
#
LIBOBJ = alter.lo analyze.lo attach.lo auth.lo btmutex.lo btree.lo build.lo \
callback.lo complete.lo date.lo \
delete.lo expr.lo fault.lo func.lo \
hash.lo journal.lo insert.lo loadext.lo \
main.lo malloc.lo mem1.lo mem2.lo mem3.lo mem4.lo mutex.lo \
mutex_os2.lo mutex_unix.lo mutex_w32.lo \
opcodes.lo os.lo os_unix.lo os_win.lo os_os2.lo \
pager.lo parse.lo pragma.lo prepare.lo printf.lo random.lo \
select.lo table.lo tokenize.lo trigger.lo update.lo \
util.lo vacuum.lo \
vdbe.lo vdbeapi.lo vdbeaux.lo vdbeblob.lo vdbefifo.lo vdbemem.lo \
where.lo utf.lo legacy.lo vtab.lo
# All of the source code files.
#
SRC = \
$(TOP)/src/alter.c \
$(TOP)/src/analyze.c \
$(TOP)/src/attach.c \
$(TOP)/src/auth.c \
$(TOP)/src/btmutex.c \
$(TOP)/src/btree.c \
$(TOP)/src/btree.h \
$(TOP)/src/build.c \
$(TOP)/src/callback.c \
$(TOP)/src/complete.c \
$(TOP)/src/date.c \
$(TOP)/src/delete.c \
$(TOP)/src/expr.c \
$(TOP)/src/fault.c \
$(TOP)/src/func.c \
$(TOP)/src/hash.c \
$(TOP)/src/hash.h \
$(TOP)/src/insert.c \
$(TOP)/src/journal.c \
$(TOP)/src/legacy.c \
$(TOP)/src/loadext.c \
$(TOP)/src/main.c \
$(TOP)/src/malloc.c \
$(TOP)/src/mem1.c \
$(TOP)/src/mem2.c \
$(TOP)/src/mem3.c \
$(TOP)/src/mem4.c \
$(TOP)/src/mutex.c \
$(TOP)/src/mutex_os2.c \
$(TOP)/src/mutex_unix.c \
$(TOP)/src/mutex_w32.c \
$(TOP)/src/os.c \
$(TOP)/src/os_unix.c \
$(TOP)/src/os_win.c \
$(TOP)/src/os_os2.c \
$(TOP)/src/pager.c \
$(TOP)/src/pager.h \
$(TOP)/src/parse.y \
$(TOP)/src/pragma.c \
$(TOP)/src/prepare.c \
$(TOP)/src/printf.c \
$(TOP)/src/random.c \
$(TOP)/src/select.c \
$(TOP)/src/shell.c \
$(TOP)/src/sqlite.h.in \
$(TOP)/src/sqliteInt.h \
$(TOP)/src/table.c \
$(TOP)/src/tclsqlite.c \
$(TOP)/src/tokenize.c \
$(TOP)/src/trigger.c \
$(TOP)/src/utf.c \
$(TOP)/src/update.c \
$(TOP)/src/util.c \
$(TOP)/src/vacuum.c \
$(TOP)/src/vdbe.c \
$(TOP)/src/vdbe.h \
$(TOP)/src/vdbeapi.c \
$(TOP)/src/vdbeaux.c \
$(TOP)/src/vdbeblob.c \
$(TOP)/src/vdbefifo.c \
$(TOP)/src/vdbemem.c \
$(TOP)/src/vdbeInt.h \
$(TOP)/src/vtab.c \
$(TOP)/src/where.c
# Source code for extensions
#
SRC += \
$(TOP)/ext/fts1/fts1.c \
$(TOP)/ext/fts1/fts1.h \
$(TOP)/ext/fts1/fts1_hash.c \
$(TOP)/ext/fts1/fts1_hash.h \
$(TOP)/ext/fts1/fts1_porter.c \
$(TOP)/ext/fts1/fts1_tokenizer.h \
$(TOP)/ext/fts1/fts1_tokenizer1.c
# Source code to the test files.
#
TESTSRC = \
$(TOP)/src/attach.c \
$(TOP)/src/btree.c \
$(TOP)/src/build.c \
$(TOP)/src/date.c \
$(TOP)/src/expr.c \
$(TOP)/src/func.c \
$(TOP)/src/insert.c \
$(TOP)/src/malloc.c \
$(TOP)/src/os.c \
$(TOP)/src/os_os2.c \
$(TOP)/src/os_unix.c \
$(TOP)/src/os_win.c \
$(TOP)/src/pager.c \
$(TOP)/src/pragma.c \
$(TOP)/src/prepare.c \
$(TOP)/src/printf.c \
$(TOP)/src/random.c \
$(TOP)/src/select.c \
$(TOP)/src/test1.c \
$(TOP)/src/test2.c \
$(TOP)/src/test3.c \
$(TOP)/src/test4.c \
$(TOP)/src/test5.c \
$(TOP)/src/test6.c \
$(TOP)/src/test7.c \
$(TOP)/src/test8.c \
$(TOP)/src/test9.c \
$(TOP)/src/test_autoext.c \
$(TOP)/src/test_async.c \
$(TOP)/src/test_btree.c \
$(TOP)/src/test_config.c \
$(TOP)/src/test_devsym.c \
$(TOP)/src/test_hexio.c \
$(TOP)/src/test_malloc.c \
$(TOP)/src/test_md5.c \
$(TOP)/src/test_onefile.c \
$(TOP)/src/test_schema.c \
$(TOP)/src/test_server.c \
$(TOP)/src/test_tclvar.c \
$(TOP)/src/test_thread.c \
$(TOP)/src/tokenize.c \
$(TOP)/src/utf.c \
$(TOP)/src/util.c \
$(TOP)/src/vdbe.c \
$(TOP)/src/vdbeapi.c \
$(TOP)/src/vdbeaux.c \
$(TOP)/src/vdbemem.c \
$(TOP)/src/where.c \
parse.c
# Header files used by all library source files.
#
HDR = \
sqlite3.h \
$(TOP)/src/btree.h \
$(TOP)/src/btreeInt.h \
$(TOP)/src/hash.h \
$(TOP)/src/sqliteLimit.h \
$(TOP)/src/mutex.h \
opcodes.h \
$(TOP)/src/os.h \
$(TOP)/src/os_common.h \
$(TOP)/src/sqlite3ext.h \
$(TOP)/src/sqliteInt.h \
$(TOP)/src/vdbe.h \
parse.h
# Header files used by extensions
#
HDR += \
$(TOP)/ext/fts1/fts1.h \
$(TOP)/ext/fts1/fts1_hash.h \
$(TOP)/ext/fts1/fts1_tokenizer.h
# Header files used by the VDBE submodule
#
VDBEHDR = \
$(HDR) \
$(TOP)/src/vdbeInt.h
# This is the default Makefile target. The objects listed here
# are what get build when you type just "make" with no arguments.
#
all: sqlite3.h libsqlite3.la sqlite3$(TEXE) $(HAVE_TCL:1=libtclsqlite3.la)
Makefile: $(TOP)/Makefile.in
./config.status
# Generate the file "last_change" which contains the date of change
# of the most recently modified source code file
#
last_change: $(SRC)
cat $(SRC) | grep '$$Id: ' | sort -k 5 | tail -1 \
| $(NAWK) '{print $$5,$$6}' >last_change
libsqlite3.la: $(LIBOBJ)
$(LTLINK) -o libsqlite3.la $(LIBOBJ) $(LIBPTHREAD) \
${ALLOWRELEASE} -rpath $(libdir) -version-info "8:6:8"
libtclsqlite3.la: tclsqlite.lo libsqlite3.la
$(LTLINK) -o libtclsqlite3.la tclsqlite.lo \
$(LIBOBJ) $(LIBPTHREAD) \
-rpath $(libdir)/sqlite \
-version-info "8:6:8"
sqlite3$(TEXE): $(TOP)/src/shell.c libsqlite3.la sqlite3.h
$(LTLINK) $(READLINE_FLAGS) $(LIBPTHREAD) \
-o $@ $(TOP)/src/shell.c libsqlite3.la \
$(LIBREADLINE) $(TLIBS)
# This target creates a directory named "tsrc" and fills it with
# copies of all of the C source code and header files needed to
# build on the target system. Some of the C source code and header
# files are automatically generated. This target takes care of
# all that automatic generation.
#
target_source: $(SRC) parse.c opcodes.c keywordhash.h $(VDBEHDR)
rm -rf tsrc
mkdir -p tsrc
cp $(SRC) $(VDBEHDR) tsrc
rm tsrc/sqlite.h.in tsrc/parse.y
cp parse.c opcodes.c keywordhash.h tsrc
sqlite3.c: target_source $(TOP)/tool/mksqlite3c.tcl
tclsh $(TOP)/tool/mksqlite3c.tcl
# Rules to build the LEMON compiler generator
#
lemon$(BEXE): $(TOP)/tool/lemon.c $(TOP)/tool/lempar.c
$(BCC) -o lemon$(BEXE) $(TOP)/tool/lemon.c
cp $(TOP)/tool/lempar.c .
# Rules to build individual files
#
alter.lo: $(TOP)/src/alter.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/alter.c
analyze.lo: $(TOP)/src/analyze.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/analyze.c
attach.lo: $(TOP)/src/attach.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/attach.c
auth.lo: $(TOP)/src/auth.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/auth.c
btmutex.lo: $(TOP)/src/btmutex.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/btmutex.c
btree.lo: $(TOP)/src/btree.c $(HDR) $(TOP)/src/pager.h
$(LTCOMPILE) -c $(TOP)/src/btree.c
build.lo: $(TOP)/src/build.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/build.c
callback.lo: $(TOP)/src/callback.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/callback.c
complete.lo: $(TOP)/src/complete.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/complete.c
date.lo: $(TOP)/src/date.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/date.c
delete.lo: $(TOP)/src/delete.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/delete.c
expr.lo: $(TOP)/src/expr.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/expr.c
fault.lo: $(TOP)/src/fault.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/fault.c
func.lo: $(TOP)/src/func.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/func.c
hash.lo: $(TOP)/src/hash.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/hash.c
insert.lo: $(TOP)/src/insert.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/insert.c
journal.lo: $(TOP)/src/journal.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/journal.c
legacy.lo: $(TOP)/src/legacy.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/legacy.c
loadext.lo: $(TOP)/src/loadext.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/loadext.c
main.lo: $(TOP)/src/main.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/main.c
malloc.lo: $(TOP)/src/malloc.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/malloc.c
mem1.lo: $(TOP)/src/mem1.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem1.c
mem2.lo: $(TOP)/src/mem2.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem2.c
mem3.lo: $(TOP)/src/mem3.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem3.c
mem4.lo: $(TOP)/src/mem4.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem4.c
mutex.lo: $(TOP)/src/mutex.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex.c
mutex_os2.lo: $(TOP)/src/mutex_os2.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex_os2.c
mutex_unix.lo: $(TOP)/src/mutex_unix.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex_unix.c
mutex_w32.lo: $(TOP)/src/mutex_w32.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex_w32.c
pager.lo: $(TOP)/src/pager.c $(HDR) $(TOP)/src/pager.h
$(LTCOMPILE) -c $(TOP)/src/pager.c
opcodes.lo: opcodes.c
$(LTCOMPILE) -c opcodes.c
opcodes.c: opcodes.h $(TOP)/mkopcodec.awk
sort -n -b -k 3 opcodes.h | $(NAWK) -f $(TOP)/mkopcodec.awk >opcodes.c
opcodes.h: parse.h $(TOP)/src/vdbe.c $(TOP)/mkopcodeh.awk
cat parse.h $(TOP)/src/vdbe.c | $(NAWK) -f $(TOP)/mkopcodeh.awk >opcodes.h
os.lo: $(TOP)/src/os.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os.c
os_unix.lo: $(TOP)/src/os_unix.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os_unix.c
os_win.lo: $(TOP)/src/os_win.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os_win.c
os_os2.lo: $(TOP)/src/os_os2.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os_os2.c
parse.lo: parse.c $(HDR)
$(LTCOMPILE) -c parse.c
parse.h: parse.c
parse.c: $(TOP)/src/parse.y lemon$(BEXE) $(TOP)/addopcodes.awk
cp $(TOP)/src/parse.y .
./lemon$(BEXE) $(OPTS) parse.y
mv parse.h parse.h.temp
$(NAWK) -f $(TOP)/addopcodes.awk parse.h.temp >parse.h
pragma.lo: $(TOP)/src/pragma.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/pragma.c
prepare.lo: $(TOP)/src/prepare.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/prepare.c
printf.lo: $(TOP)/src/printf.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/printf.c
random.lo: $(TOP)/src/random.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/random.c
select.lo: $(TOP)/src/select.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/select.c
sqlite3.h: $(TOP)/src/sqlite.h.in
sed -e s/--VERS--/$(RELEASE)/ $(TOP)/src/sqlite.h.in | \
sed -e s/--VERSION-NUMBER--/$(VERSION_NUMBER)/ >sqlite3.h
table.lo: $(TOP)/src/table.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/table.c
tclsqlite.lo: $(TOP)/src/tclsqlite.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/tclsqlite.c
tokenize.lo: $(TOP)/src/tokenize.c keywordhash.h $(HDR)
$(LTCOMPILE) -c $(TOP)/src/tokenize.c
keywordhash.h: $(TOP)/tool/mkkeywordhash.c
$(BCC) -o mkkeywordhash$(BEXE) $(OPTS) $(TOP)/tool/mkkeywordhash.c
./mkkeywordhash$(BEXE) >keywordhash.h
trigger.lo: $(TOP)/src/trigger.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/trigger.c
update.lo: $(TOP)/src/update.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/update.c
utf.lo: $(TOP)/src/utf.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/utf.c
util.lo: $(TOP)/src/util.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/util.c
vacuum.lo: $(TOP)/src/vacuum.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/vacuum.c
vdbe.lo: $(TOP)/src/vdbe.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbe.c
vdbeapi.lo: $(TOP)/src/vdbeapi.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbeapi.c
vdbeaux.lo: $(TOP)/src/vdbeaux.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbeaux.c
vdbeblob.lo: $(TOP)/src/vdbeblob.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbeblob.c
vdbefifo.lo: $(TOP)/src/vdbefifo.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbefifo.c
vdbemem.lo: $(TOP)/src/vdbemem.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbemem.c
vtab.lo: $(TOP)/src/vtab.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vtab.c
where.lo: $(TOP)/src/where.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/where.c
tclsqlite-shell.lo: $(TOP)/src/tclsqlite.c $(HDR)
$(LTCOMPILE) -DTCLSH=1 -o $@ -c $(TOP)/src/tclsqlite.c
tclsqlite-stubs.lo: $(TOP)/src/tclsqlite.c $(HDR)
$(LTCOMPILE) -DTCL_USE_STUBS=1 -o $@ -c $(TOP)/src/tclsqlite.c
tclsqlite3: tclsqlite-shell.lo libsqlite3.la
$(LTLINK) -o tclsqlite3 tclsqlite-shell.lo \
libsqlite3.la $(LIBTCL)
testfixture$(TEXE): $(TOP)/src/tclsqlite.c libsqlite3.la $(TESTSRC)
$(LTLINK) -DTCLSH=1 -DSQLITE_TEST=1 -DSQLITE_CRASH_TEST=1 \
-DSQLITE_NO_SYNC=1 $(TEMP_STORE) \
-o testfixture $(TESTSRC) $(TOP)/src/tclsqlite.c \
libsqlite3.la $(LIBTCL)
fulltest: testfixture$(TEXE) sqlite3$(TEXE)
./testfixture $(TOP)/test/all.test
test: testfixture$(TEXE) sqlite3$(TEXE)
./testfixture $(TOP)/test/quick.test
sqlite3_analyzer$(TEXE): $(TOP)/src/tclsqlite.c libtclsqlite3.la \
$(TESTSRC) $(TOP)/tool/spaceanal.tcl
sed \
-e '/^#/d' \
-e 's,\\,\\\\,g' \
-e 's,",\\",g' \
-e 's,^,",' \
-e 's,$$,\\n",' \
$(TOP)/tool/spaceanal.tcl >spaceanal_tcl.h
$(LTLINK) -DTCLSH=2 -DSQLITE_TEST=1 $(TEMP_STORE)\
-o sqlite3_analyzer$(EXE) $(TESTSRC) $(TOP)/src/tclsqlite.c \
libtclsqlite3.la $(LIBTCL)
install: sqlite3$(BEXE) libsqlite3.la sqlite3.h ${HAVE_TCL:1=tcl_install}
$(INSTALL) -d $(DESTDIR)$(libdir)
$(LTINSTALL) libsqlite3.la $(DESTDIR)$(libdir)
$(INSTALL) -d $(DESTDIR)$(exec_prefix)/bin
$(LTINSTALL) sqlite3$(BEXE) $(DESTDIR)$(exec_prefix)/bin
$(INSTALL) -d $(DESTDIR)$(prefix)/include
$(INSTALL) -m 0644 sqlite3.h $(DESTDIR)$(prefix)/include
$(INSTALL) -m 0644 $(TOP)/src/sqlite3ext.h $(DESTDIR)$(prefix)/include
$(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig;
$(INSTALL) -m 0644 sqlite3.pc $(DESTDIR)$(libdir)/pkgconfig;
tcl_install: libtclsqlite3.la
tclsh $(TOP)/tclinstaller.tcl $(VERSION)
clean:
rm -f *.lo *.la *.o sqlite3$(TEXE) libsqlite3.la
rm -f sqlite3.h opcodes.*
rm -rf .libs .deps
rm -f lemon$(BEXE) lempar.c parse.* sqlite*.tar.gz
rm -f mkkeywordhash$(BEXE) keywordhash.h
rm -f $(PUBLISH)
rm -f *.da *.bb *.bbg gmon.out
rm -f testfixture$(TEXE) test.db
rm -f common.tcl
rm -f sqlite3.dll sqlite3.lib sqlite3.def
distclean: clean
rm -f config.log config.status libtool Makefile config.h sqlite3.pc
#
# Windows section
#
dll: sqlite3.dll
REAL_LIBOBJ = $(LIBOBJ:%.lo=.libs/%.o)
$(REAL_LIBOBJ): $(LIBOBJ)
sqlite3.def: $(REAL_LIBOBJ)
echo 'EXPORTS' >sqlite3.def
nm $(REAL_LIBOBJ) | grep ' T ' | grep ' _sqlite3_' \
| sed 's/^.* _//' >>sqlite3.def
sqlite3.dll: $(REAL_LIBOBJ) sqlite3.def
$(TCC) -shared -o sqlite3.dll sqlite3.def \
-Wl,"--strip-all" $(REAL_LIBOBJ)

View file

@ -0,0 +1,138 @@
#!/usr/make
#
# Makefile for SQLITE
#
# This is a template makefile for SQLite. Most people prefer to
# use the autoconf generated "configure" script to generate the
# makefile automatically. But that does not work for everybody
# and in every situation. If you are having problems with the
# "configure" script, you might want to try this makefile as an
# alternative. Create a copy of this file, edit the parameters
# below and type "make".
#
#### The directory where to find the mingw32ce tools
MINGW32CE = /opt/mingw32ce/bin
#### The target prefix of the mingw32ce tools
TARGET = arm-wince-mingw32ce
#### The toplevel directory of the source tree. This is the directory
# that contains this "Makefile.in" and the "configure.in" script.
#
TOP = ../sqlite
#### C Compiler and options for use in building executables that
# will run on the platform that is doing the build.
#
BCC = gcc -g -O2
#BCC = /opt/ancic/bin/c89 -0
#### If the target operating system supports the "usleep()" system
# call, then define the HAVE_USLEEP macro for all C modules.
#
USLEEP =
#USLEEP = -DHAVE_USLEEP=1
#### If you want the SQLite library to be safe for use within a
# multi-threaded program, then define the following macro
# appropriately:
#
THREADSAFE = -DTHREADSAFE=1
#THREADSAFE = -DTHREADSAFE=0
#### Specify any extra linker options needed to make the library
# thread safe
#
#THREADLIB = -lpthread
THREADLIB =
#### Specify any extra libraries needed to access required functions.
#
#TLIBS = -lrt # fdatasync on Solaris 8
TLIBS =
#### Leave SQLITE_DEBUG undefined for maximum speed. Use SQLITE_DEBUG=1
# to check for memory leaks. Use SQLITE_DEBUG=2 to print a log of all
# malloc()s and free()s in order to track down memory leaks.
#
# SQLite uses some expensive assert() statements in the inner loop.
# You can make the library go almost twice as fast if you compile
# with -DNDEBUG=1
#
#OPTS = -DSQLITE_DEBUG=2
#OPTS = -DSQLITE_DEBUG=1
#OPTS =
OPTS = -DNDEBUG=1 -DOS_WIN=1 -D_WIN32_WCE=1
#OPTS += -DHAVE_FDATASYNC=1
#### The suffix to add to executable files. ".exe" for windows.
# Nothing for unix.
#
EXE = .exe
#EXE =
#### C Compile and options for use in building executables that
# will run on the target platform. This is usually the same
# as BCC, unless you are cross-compiling.
#
#TCC = gcc -O6
#TCC = gcc -g -O0 -Wall
#TCC = gcc -g -O0 -Wall -fprofile-arcs -ftest-coverage
#TCC = /opt/mingw/bin/i386-mingw32-gcc -O6
TCC = $(MINGW32CE)/$(TARGET)-gcc -O2
#TCC = /opt/ansic/bin/c89 -O +z -Wl,-a,archive
#### Tools used to build a static library.
#
#AR = ar cr
#AR = /opt/mingw/bin/i386-mingw32-ar cr
AR = $(MINGW32CE)/$(TARGET)-ar cr
#RANLIB = ranlib
#RANLIB = /opt/mingw/bin/i386-mingw32-ranlib
RANLIB = $(MINGW32CE)/$(TARGET)-ranlib
#MKSHLIB = gcc -shared
#SO = so
#SHPREFIX = lib
MKSHLIB = $(MINGW32CE)/$(TARGET)-gcc -shared
SO = dll
SHPREFIX =
#### Extra compiler options needed for programs that use the TCL library.
#
#TCL_FLAGS =
#TCL_FLAGS = -DSTATIC_BUILD=1
TCL_FLAGS = -I/home/drh/tcltk/8.4linux
#TCL_FLAGS = -I/home/drh/tcltk/8.4win -DSTATIC_BUILD=1
#TCL_FLAGS = -I/home/drh/tcltk/8.3hpux
#### Linker options needed to link against the TCL library.
#
#LIBTCL = -ltcl -lm -ldl
LIBTCL = /home/drh/tcltk/8.4linux/libtcl8.4g.a -lm -ldl
#LIBTCL = /home/drh/tcltk/8.4win/libtcl84s.a -lmsvcrt
#LIBTCL = /home/drh/tcltk/8.3hpux/libtcl8.3.a -ldld -lm -lc
#### Additional objects for SQLite library when TCL support is enabled.
TCLOBJ =
#TCLOBJ = tclsqlite.o
#### Compiler options needed for programs that use the readline() library.
#
READLINE_FLAGS =
#READLINE_FLAGS = -DHAVE_READLINE=1 -I/usr/include/readline
#### Linker options needed by programs using readline() must link against.
#
LIBREADLINE =
#LIBREADLINE = -static -lreadline -ltermcap
#### Which "awk" program provides nawk compatibilty
#
# NAWK = nawk
NAWK = awk
# You should not have to change anything below this line
###############################################################################
include $(TOP)/main.mk

View file

@ -0,0 +1,629 @@
#!/usr/make
#
# Makefile for SQLITE
#
# This makefile is suppose to be configured automatically using the
# autoconf. But if that does not work for you, you can configure
# the makefile manually. Just set the parameters below to values that
# work well for your system.
#
# If the configure script does not work out-of-the-box, you might
# be able to get it to work by giving it some hints. See the comment
# at the beginning of configure.in for additional information.
#
# The toplevel directory of the source tree. This is the directory
# that contains this "Makefile.in" and the "configure.in" script.
#
TOP = @srcdir@
# C Compiler and options for use in building executables that
# will run on the platform that is doing the build.
#
BCC = @BUILD_CC@ @BUILD_CFLAGS@
# C Compile and options for use in building executables that
# will run on the target platform. (BCC and TCC are usually the
# same unless your are cross-compiling.)
#
TCC = @CC@ @CFLAGS@ -I. -I${TOP}/src
# Define -DNDEBUG to compile without debugging (i.e., for production usage)
# Omitting the define will cause extra debugging code to be inserted and
# includes extra comments when "EXPLAIN stmt" is used.
#
TCC += @TARGET_DEBUG@ @XTHREADCONNECT@
# Compiler options needed for programs that use the TCL library.
#
TCC += @TCL_INCLUDE_SPEC@
# The library that programs using TCL must link against.
#
LIBTCL = @TCL_LIB_SPEC@ @TCL_LIBS@
# Compiler options needed for programs that use the readline() library.
#
READLINE_FLAGS = -DHAVE_READLINE=@TARGET_HAVE_READLINE@ @TARGET_READLINE_INC@
# The library that programs using readline() must link against.
#
LIBREADLINE = @TARGET_READLINE_LIBS@
# Should the database engine be compiled threadsafe
#
TCC += -DSQLITE_THREADSAFE=@SQLITE_THREADSAFE@
# The pthreads library if needed
#
LIBPTHREAD=@TARGET_THREAD_LIB@
# Do threads override each others locks by default (1), or do we test (-1)
#
TCC += -DSQLITE_THREAD_OVERRIDE_LOCK=@THREADSOVERRIDELOCKS@
# The fdatasync library
TLIBS = @LIBS@
# Flags controlling use of the in memory btree implementation
#
# TEMP_STORE is 0 to force temporary tables to be in a file, 1 to
# default to file, 2 to default to memory, and 3 to force temporary
# tables to always be in memory.
#
TEMP_STORE = -DTEMP_STORE=@TEMP_STORE@
# Version numbers and release number for the SQLite being compiled.
#
VERSION = @VERSION@
VERSION_NUMBER = @VERSION_NUMBER@
RELEASE = @RELEASE@
# Filename extensions
#
BEXE = @BUILD_EXEEXT@
TEXE = @TARGET_EXEEXT@
# The following variable is "1" if the configure script was able to locate
# the tclConfig.sh file. It is an empty string otherwise. When this
# variable is "1", the TCL extension library (libtclsqlite3.so) is built
# and installed.
#
HAVE_TCL = @HAVE_TCL@
# The suffix used on shared libraries. Ex: ".dll", ".so", ".dylib"
#
SHLIB_SUFFIX = @TCL_SHLIB_SUFFIX@
# The directory into which to store package information for
# Some standard variables and programs
#
prefix = @prefix@
exec_prefix = @exec_prefix@
libdir = @libdir@
INSTALL = @INSTALL@
LIBTOOL = ./libtool
ALLOWRELEASE = @ALLOWRELEASE@
# libtool compile/link/install
LTCOMPILE = $(LIBTOOL) --mode=compile --tag=CC $(TCC)
LTLINK = $(LIBTOOL) --mode=link $(TCC) @LDFLAGS@
LTINSTALL = $(LIBTOOL) --mode=install $(INSTALL)
# nawk compatible awk.
NAWK = @AWK@
# You should not have to change anything below this line
###############################################################################
TCC += -DSQLITE_OMIT_LOAD_EXTENSION=1
# Object files for the SQLite library.
#
LIBOBJ = alter.lo analyze.lo attach.lo auth.lo btmutex.lo btree.lo build.lo \
callback.lo complete.lo date.lo \
delete.lo expr.lo fault.lo func.lo \
hash.lo journal.lo insert.lo loadext.lo \
main.lo malloc.lo mem1.lo mem2.lo mem3.lo mem4.lo mutex.lo \
mutex_os2.lo mutex_unix.lo mutex_w32.lo \
opcodes.lo os.lo os_unix.lo os_win.lo os_os2.lo \
pager.lo parse.lo pragma.lo prepare.lo printf.lo random.lo \
select.lo table.lo tokenize.lo trigger.lo update.lo \
util.lo vacuum.lo \
vdbe.lo vdbeapi.lo vdbeaux.lo vdbeblob.lo vdbefifo.lo vdbemem.lo \
where.lo utf.lo legacy.lo vtab.lo
# All of the source code files.
#
SRC = \
$(TOP)/src/alter.c \
$(TOP)/src/analyze.c \
$(TOP)/src/attach.c \
$(TOP)/src/auth.c \
$(TOP)/src/btmutex.c \
$(TOP)/src/btree.c \
$(TOP)/src/btree.h \
$(TOP)/src/build.c \
$(TOP)/src/callback.c \
$(TOP)/src/complete.c \
$(TOP)/src/date.c \
$(TOP)/src/delete.c \
$(TOP)/src/expr.c \
$(TOP)/src/fault.c \
$(TOP)/src/func.c \
$(TOP)/src/hash.c \
$(TOP)/src/hash.h \
$(TOP)/src/insert.c \
$(TOP)/src/journal.c \
$(TOP)/src/legacy.c \
$(TOP)/src/loadext.c \
$(TOP)/src/main.c \
$(TOP)/src/malloc.c \
$(TOP)/src/mem1.c \
$(TOP)/src/mem2.c \
$(TOP)/src/mem3.c \
$(TOP)/src/mem4.c \
$(TOP)/src/mutex.c \
$(TOP)/src/mutex_os2.c \
$(TOP)/src/mutex_unix.c \
$(TOP)/src/mutex_w32.c \
$(TOP)/src/os.c \
$(TOP)/src/os_unix.c \
$(TOP)/src/os_win.c \
$(TOP)/src/os_os2.c \
$(TOP)/src/pager.c \
$(TOP)/src/pager.h \
$(TOP)/src/parse.y \
$(TOP)/src/pragma.c \
$(TOP)/src/prepare.c \
$(TOP)/src/printf.c \
$(TOP)/src/random.c \
$(TOP)/src/select.c \
$(TOP)/src/shell.c \
$(TOP)/src/sqlite.h.in \
$(TOP)/src/sqliteInt.h \
$(TOP)/src/table.c \
$(TOP)/src/tclsqlite.c \
$(TOP)/src/tokenize.c \
$(TOP)/src/trigger.c \
$(TOP)/src/utf.c \
$(TOP)/src/update.c \
$(TOP)/src/util.c \
$(TOP)/src/vacuum.c \
$(TOP)/src/vdbe.c \
$(TOP)/src/vdbe.h \
$(TOP)/src/vdbeapi.c \
$(TOP)/src/vdbeaux.c \
$(TOP)/src/vdbeblob.c \
$(TOP)/src/vdbefifo.c \
$(TOP)/src/vdbemem.c \
$(TOP)/src/vdbeInt.h \
$(TOP)/src/vtab.c \
$(TOP)/src/where.c
# Source code for extensions
#
SRC += \
$(TOP)/ext/fts1/fts1.c \
$(TOP)/ext/fts1/fts1.h \
$(TOP)/ext/fts1/fts1_hash.c \
$(TOP)/ext/fts1/fts1_hash.h \
$(TOP)/ext/fts1/fts1_porter.c \
$(TOP)/ext/fts1/fts1_tokenizer.h \
$(TOP)/ext/fts1/fts1_tokenizer1.c
# Source code to the test files.
#
TESTSRC = \
$(TOP)/src/attach.c \
$(TOP)/src/btree.c \
$(TOP)/src/build.c \
$(TOP)/src/date.c \
$(TOP)/src/expr.c \
$(TOP)/src/func.c \
$(TOP)/src/insert.c \
$(TOP)/src/malloc.c \
$(TOP)/src/os.c \
$(TOP)/src/os_os2.c \
$(TOP)/src/os_unix.c \
$(TOP)/src/os_win.c \
$(TOP)/src/pager.c \
$(TOP)/src/pragma.c \
$(TOP)/src/prepare.c \
$(TOP)/src/printf.c \
$(TOP)/src/random.c \
$(TOP)/src/select.c \
$(TOP)/src/test1.c \
$(TOP)/src/test2.c \
$(TOP)/src/test3.c \
$(TOP)/src/test4.c \
$(TOP)/src/test5.c \
$(TOP)/src/test6.c \
$(TOP)/src/test7.c \
$(TOP)/src/test8.c \
$(TOP)/src/test9.c \
$(TOP)/src/test_autoext.c \
$(TOP)/src/test_async.c \
$(TOP)/src/test_btree.c \
$(TOP)/src/test_config.c \
$(TOP)/src/test_devsym.c \
$(TOP)/src/test_hexio.c \
$(TOP)/src/test_malloc.c \
$(TOP)/src/test_md5.c \
$(TOP)/src/test_onefile.c \
$(TOP)/src/test_schema.c \
$(TOP)/src/test_server.c \
$(TOP)/src/test_tclvar.c \
$(TOP)/src/test_thread.c \
$(TOP)/src/tokenize.c \
$(TOP)/src/utf.c \
$(TOP)/src/util.c \
$(TOP)/src/vdbe.c \
$(TOP)/src/vdbeapi.c \
$(TOP)/src/vdbeaux.c \
$(TOP)/src/vdbemem.c \
$(TOP)/src/where.c \
parse.c
# Header files used by all library source files.
#
HDR = \
sqlite3.h \
$(TOP)/src/btree.h \
$(TOP)/src/btreeInt.h \
$(TOP)/src/hash.h \
$(TOP)/src/sqliteLimit.h \
$(TOP)/src/mutex.h \
opcodes.h \
$(TOP)/src/os.h \
$(TOP)/src/os_common.h \
$(TOP)/src/sqlite3ext.h \
$(TOP)/src/sqliteInt.h \
$(TOP)/src/vdbe.h \
parse.h
# Header files used by extensions
#
HDR += \
$(TOP)/ext/fts1/fts1.h \
$(TOP)/ext/fts1/fts1_hash.h \
$(TOP)/ext/fts1/fts1_tokenizer.h
# Header files used by the VDBE submodule
#
VDBEHDR = \
$(HDR) \
$(TOP)/src/vdbeInt.h
# This is the default Makefile target. The objects listed here
# are what get build when you type just "make" with no arguments.
#
all: sqlite3.h libsqlite3.la sqlite3$(TEXE) $(HAVE_TCL:1=libtclsqlite3.la)
Makefile: $(TOP)/Makefile.in
./config.status
# Generate the file "last_change" which contains the date of change
# of the most recently modified source code file
#
last_change: $(SRC)
cat $(SRC) | grep '$$Id: ' | sort -k 5 | tail -1 \
| $(NAWK) '{print $$5,$$6}' >last_change
libsqlite3.la: $(LIBOBJ)
$(LTLINK) -o libsqlite3.la $(LIBOBJ) $(LIBPTHREAD) \
${ALLOWRELEASE} -rpath $(libdir) -version-info "8:6:8"
libtclsqlite3.la: tclsqlite.lo libsqlite3.la
$(LTLINK) -o libtclsqlite3.la tclsqlite.lo \
$(LIBOBJ) @TCL_STUB_LIB_SPEC@ $(LIBPTHREAD) \
-rpath $(libdir)/sqlite \
-version-info "8:6:8"
sqlite3$(TEXE): $(TOP)/src/shell.c libsqlite3.la sqlite3.h
$(LTLINK) $(READLINE_FLAGS) $(LIBPTHREAD) \
-o $@ $(TOP)/src/shell.c libsqlite3.la \
$(LIBREADLINE) $(TLIBS)
# This target creates a directory named "tsrc" and fills it with
# copies of all of the C source code and header files needed to
# build on the target system. Some of the C source code and header
# files are automatically generated. This target takes care of
# all that automatic generation.
#
target_source: $(SRC) parse.c opcodes.c keywordhash.h $(VDBEHDR)
rm -rf tsrc
mkdir -p tsrc
cp $(SRC) $(VDBEHDR) tsrc
rm tsrc/sqlite.h.in tsrc/parse.y
cp parse.c opcodes.c keywordhash.h tsrc
sqlite3.c: target_source $(TOP)/tool/mksqlite3c.tcl
tclsh $(TOP)/tool/mksqlite3c.tcl
# Rules to build the LEMON compiler generator
#
lemon$(BEXE): $(TOP)/tool/lemon.c $(TOP)/tool/lempar.c
$(BCC) -o lemon$(BEXE) $(TOP)/tool/lemon.c
cp $(TOP)/tool/lempar.c .
# Rules to build individual files
#
alter.lo: $(TOP)/src/alter.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/alter.c
analyze.lo: $(TOP)/src/analyze.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/analyze.c
attach.lo: $(TOP)/src/attach.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/attach.c
auth.lo: $(TOP)/src/auth.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/auth.c
btmutex.lo: $(TOP)/src/btmutex.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/btmutex.c
btree.lo: $(TOP)/src/btree.c $(HDR) $(TOP)/src/pager.h
$(LTCOMPILE) -c $(TOP)/src/btree.c
build.lo: $(TOP)/src/build.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/build.c
callback.lo: $(TOP)/src/callback.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/callback.c
complete.lo: $(TOP)/src/complete.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/complete.c
date.lo: $(TOP)/src/date.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/date.c
delete.lo: $(TOP)/src/delete.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/delete.c
expr.lo: $(TOP)/src/expr.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/expr.c
fault.lo: $(TOP)/src/fault.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/fault.c
func.lo: $(TOP)/src/func.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/func.c
hash.lo: $(TOP)/src/hash.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/hash.c
insert.lo: $(TOP)/src/insert.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/insert.c
journal.lo: $(TOP)/src/journal.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/journal.c
legacy.lo: $(TOP)/src/legacy.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/legacy.c
loadext.lo: $(TOP)/src/loadext.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/loadext.c
main.lo: $(TOP)/src/main.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/main.c
malloc.lo: $(TOP)/src/malloc.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/malloc.c
mem1.lo: $(TOP)/src/mem1.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem1.c
mem2.lo: $(TOP)/src/mem2.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem2.c
mem3.lo: $(TOP)/src/mem3.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem3.c
mem4.lo: $(TOP)/src/mem4.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem4.c
mutex.lo: $(TOP)/src/mutex.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex.c
mutex_os2.lo: $(TOP)/src/mutex_os2.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex_os2.c
mutex_unix.lo: $(TOP)/src/mutex_unix.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex_unix.c
mutex_w32.lo: $(TOP)/src/mutex_w32.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex_w32.c
pager.lo: $(TOP)/src/pager.c $(HDR) $(TOP)/src/pager.h
$(LTCOMPILE) -c $(TOP)/src/pager.c
opcodes.lo: opcodes.c
$(LTCOMPILE) -c opcodes.c
opcodes.c: opcodes.h $(TOP)/mkopcodec.awk
sort -n -b -k 3 opcodes.h | $(NAWK) -f $(TOP)/mkopcodec.awk >opcodes.c
opcodes.h: parse.h $(TOP)/src/vdbe.c $(TOP)/mkopcodeh.awk
cat parse.h $(TOP)/src/vdbe.c | $(NAWK) -f $(TOP)/mkopcodeh.awk >opcodes.h
os.lo: $(TOP)/src/os.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os.c
os_unix.lo: $(TOP)/src/os_unix.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os_unix.c
os_win.lo: $(TOP)/src/os_win.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os_win.c
os_os2.lo: $(TOP)/src/os_os2.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os_os2.c
parse.lo: parse.c $(HDR)
$(LTCOMPILE) -c parse.c
parse.h: parse.c
parse.c: $(TOP)/src/parse.y lemon$(BEXE) $(TOP)/addopcodes.awk
cp $(TOP)/src/parse.y .
./lemon$(BEXE) $(OPTS) parse.y
mv parse.h parse.h.temp
$(NAWK) -f $(TOP)/addopcodes.awk parse.h.temp >parse.h
pragma.lo: $(TOP)/src/pragma.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/pragma.c
prepare.lo: $(TOP)/src/prepare.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/prepare.c
printf.lo: $(TOP)/src/printf.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/printf.c
random.lo: $(TOP)/src/random.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/random.c
select.lo: $(TOP)/src/select.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/select.c
sqlite3.h: $(TOP)/src/sqlite.h.in
sed -e s/--VERS--/$(RELEASE)/ $(TOP)/src/sqlite.h.in | \
sed -e s/--VERSION-NUMBER--/$(VERSION_NUMBER)/ >sqlite3.h
table.lo: $(TOP)/src/table.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/table.c
tclsqlite.lo: $(TOP)/src/tclsqlite.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/tclsqlite.c
tokenize.lo: $(TOP)/src/tokenize.c keywordhash.h $(HDR)
$(LTCOMPILE) -c $(TOP)/src/tokenize.c
keywordhash.h: $(TOP)/tool/mkkeywordhash.c
$(BCC) -o mkkeywordhash$(BEXE) $(OPTS) $(TOP)/tool/mkkeywordhash.c
./mkkeywordhash$(BEXE) >keywordhash.h
trigger.lo: $(TOP)/src/trigger.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/trigger.c
update.lo: $(TOP)/src/update.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/update.c
utf.lo: $(TOP)/src/utf.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/utf.c
util.lo: $(TOP)/src/util.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/util.c
vacuum.lo: $(TOP)/src/vacuum.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/vacuum.c
vdbe.lo: $(TOP)/src/vdbe.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbe.c
vdbeapi.lo: $(TOP)/src/vdbeapi.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbeapi.c
vdbeaux.lo: $(TOP)/src/vdbeaux.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbeaux.c
vdbeblob.lo: $(TOP)/src/vdbeblob.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbeblob.c
vdbefifo.lo: $(TOP)/src/vdbefifo.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbefifo.c
vdbemem.lo: $(TOP)/src/vdbemem.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbemem.c
vtab.lo: $(TOP)/src/vtab.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vtab.c
where.lo: $(TOP)/src/where.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/where.c
tclsqlite-shell.lo: $(TOP)/src/tclsqlite.c $(HDR)
$(LTCOMPILE) -DTCLSH=1 -o $@ -c $(TOP)/src/tclsqlite.c
tclsqlite-stubs.lo: $(TOP)/src/tclsqlite.c $(HDR)
$(LTCOMPILE) -DTCL_USE_STUBS=1 -o $@ -c $(TOP)/src/tclsqlite.c
tclsqlite3: tclsqlite-shell.lo libsqlite3.la
$(LTLINK) -o tclsqlite3 tclsqlite-shell.lo \
libsqlite3.la $(LIBTCL)
testfixture$(TEXE): $(TOP)/src/tclsqlite.c libsqlite3.la $(TESTSRC)
$(LTLINK) -DTCLSH=1 -DSQLITE_TEST=1 -DSQLITE_CRASH_TEST=1 \
-DSQLITE_NO_SYNC=1 $(TEMP_STORE) \
-o testfixture $(TESTSRC) $(TOP)/src/tclsqlite.c \
libsqlite3.la $(LIBTCL)
fulltest: testfixture$(TEXE) sqlite3$(TEXE)
./testfixture $(TOP)/test/all.test
test: testfixture$(TEXE) sqlite3$(TEXE)
./testfixture $(TOP)/test/quick.test
sqlite3_analyzer$(TEXE): $(TOP)/src/tclsqlite.c libtclsqlite3.la \
$(TESTSRC) $(TOP)/tool/spaceanal.tcl
sed \
-e '/^#/d' \
-e 's,\\,\\\\,g' \
-e 's,",\\",g' \
-e 's,^,",' \
-e 's,$$,\\n",' \
$(TOP)/tool/spaceanal.tcl >spaceanal_tcl.h
$(LTLINK) -DTCLSH=2 -DSQLITE_TEST=1 $(TEMP_STORE)\
-o sqlite3_analyzer$(EXE) $(TESTSRC) $(TOP)/src/tclsqlite.c \
libtclsqlite3.la $(LIBTCL)
install: sqlite3$(BEXE) libsqlite3.la sqlite3.h ${HAVE_TCL:1=tcl_install}
$(INSTALL) -d $(DESTDIR)$(libdir)
$(LTINSTALL) libsqlite3.la $(DESTDIR)$(libdir)
$(INSTALL) -d $(DESTDIR)$(exec_prefix)/bin
$(LTINSTALL) sqlite3$(BEXE) $(DESTDIR)$(exec_prefix)/bin
$(INSTALL) -d $(DESTDIR)$(prefix)/include
$(INSTALL) -m 0644 sqlite3.h $(DESTDIR)$(prefix)/include
$(INSTALL) -m 0644 $(TOP)/src/sqlite3ext.h $(DESTDIR)$(prefix)/include
$(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig;
$(INSTALL) -m 0644 sqlite3.pc $(DESTDIR)$(libdir)/pkgconfig;
tcl_install: libtclsqlite3.la
tclsh $(TOP)/tclinstaller.tcl $(VERSION)
clean:
rm -f *.lo *.la *.o sqlite3$(TEXE) libsqlite3.la
rm -f sqlite3.h opcodes.*
rm -rf .libs .deps
rm -f lemon$(BEXE) lempar.c parse.* sqlite*.tar.gz
rm -f mkkeywordhash$(BEXE) keywordhash.h
rm -f $(PUBLISH)
rm -f *.da *.bb *.bbg gmon.out
rm -f testfixture$(TEXE) test.db
rm -f common.tcl
rm -f sqlite3.dll sqlite3.lib sqlite3.def
distclean: clean
rm -f config.log config.status libtool Makefile config.h sqlite3.pc
#
# Windows section
#
dll: sqlite3.dll
REAL_LIBOBJ = $(LIBOBJ:%.lo=.libs/%.o)
$(REAL_LIBOBJ): $(LIBOBJ)
sqlite3.def: $(REAL_LIBOBJ)
echo 'EXPORTS' >sqlite3.def
nm $(REAL_LIBOBJ) | grep ' T ' | grep ' _sqlite3_' \
| sed 's/^.* _//' >>sqlite3.def
sqlite3.dll: $(REAL_LIBOBJ) sqlite3.def
$(TCC) -shared -o sqlite3.dll sqlite3.def \
-Wl,"--strip-all" $(REAL_LIBOBJ)

View file

@ -0,0 +1,128 @@
#!/usr/make
#
# Makefile for SQLITE
#
# This is a template makefile for SQLite. Most people prefer to
# use the autoconf generated "configure" script to generate the
# makefile automatically. But that does not work for everybody
# and in every situation. If you are having problems with the
# "configure" script, you might want to try this makefile as an
# alternative. Create a copy of this file, edit the parameters
# below and type "make".
#
#### The toplevel directory of the source tree. This is the directory
# that contains this "Makefile.in" and the "configure.in" script.
#
TOP = ../sqlite
#### C Compiler and options for use in building executables that
# will run on the platform that is doing the build.
#
BCC = gcc -g -O2
#BCC = /opt/ancic/bin/c89 -0
#### If the target operating system supports the "usleep()" system
# call, then define the HAVE_USLEEP macro for all C modules.
#
#USLEEP =
USLEEP = -DHAVE_USLEEP=1
#### If you want the SQLite library to be safe for use within a
# multi-threaded program, then define the following macro
# appropriately:
#
#THREADSAFE = -DTHREADSAFE=1
THREADSAFE = -DTHREADSAFE=0
#### Specify any extra linker options needed to make the library
# thread safe
#
#THREADLIB = -lpthread
THREADLIB =
#### Specify any extra libraries needed to access required functions.
#
#TLIBS = -lrt # fdatasync on Solaris 8
TLIBS =
#### Leave SQLITE_DEBUG undefined for maximum speed. Use SQLITE_DEBUG=1
# to check for memory leaks. Use SQLITE_DEBUG=2 to print a log of all
# malloc()s and free()s in order to track down memory leaks.
#
# SQLite uses some expensive assert() statements in the inner loop.
# You can make the library go almost twice as fast if you compile
# with -DNDEBUG=1
#
#OPTS = -DSQLITE_DEBUG=2
#OPTS = -DSQLITE_DEBUG=1
#OPTS =
OPTS = -DNDEBUG=1
OPTS += -DHAVE_FDATASYNC=1
#### The suffix to add to executable files. ".exe" for windows.
# Nothing for unix.
#
#EXE = .exe
EXE =
#### C Compile and options for use in building executables that
# will run on the target platform. This is usually the same
# as BCC, unless you are cross-compiling.
#
TCC = gcc -O6
#TCC = gcc -g -O0 -Wall
#TCC = gcc -g -O0 -Wall -fprofile-arcs -ftest-coverage
#TCC = /opt/mingw/bin/i386-mingw32-gcc -O6
#TCC = /opt/ansic/bin/c89 -O +z -Wl,-a,archive
#### Tools used to build a static library.
#
AR = ar cr
#AR = /opt/mingw/bin/i386-mingw32-ar cr
RANLIB = ranlib
#RANLIB = /opt/mingw/bin/i386-mingw32-ranlib
MKSHLIB = gcc -shared
SO = so
SHPREFIX = lib
# SO = dll
# SHPREFIX =
#### Extra compiler options needed for programs that use the TCL library.
#
#TCL_FLAGS =
#TCL_FLAGS = -DSTATIC_BUILD=1
TCL_FLAGS = -I/home/drh/tcltk/8.4linux
#TCL_FLAGS = -I/home/drh/tcltk/8.4win -DSTATIC_BUILD=1
#TCL_FLAGS = -I/home/drh/tcltk/8.3hpux
#### Linker options needed to link against the TCL library.
#
#LIBTCL = -ltcl -lm -ldl
LIBTCL = /home/drh/tcltk/8.4linux/libtcl8.4g.a -lm -ldl
#LIBTCL = /home/drh/tcltk/8.4win/libtcl84s.a -lmsvcrt
#LIBTCL = /home/drh/tcltk/8.3hpux/libtcl8.3.a -ldld -lm -lc
#### Additional objects for SQLite library when TCL support is enabled.
#TCLOBJ =
TCLOBJ = tclsqlite.o
#### Compiler options needed for programs that use the readline() library.
#
READLINE_FLAGS =
#READLINE_FLAGS = -DHAVE_READLINE=1 -I/usr/include/readline
#### Linker options needed by programs using readline() must link against.
#
LIBREADLINE =
#LIBREADLINE = -static -lreadline -ltermcap
#### Which "awk" program provides nawk compatibilty
#
# NAWK = nawk
NAWK = awk
# You should not have to change anything below this line
###############################################################################
include $(TOP)/main.mk

View file

@ -0,0 +1,622 @@
#!/usr/make
#
# Makefile for SQLITE
#
# This makefile is suppose to be configured automatically using the
# autoconf. But if that does not work for you, you can configure
# the makefile manually. Just set the parameters below to values that
# work well for your system.
#
# If the configure script does not work out-of-the-box, you might
# be able to get it to work by giving it some hints. See the comment
# at the beginning of configure.in for additional information.
#
# The toplevel directory of the source tree. This is the directory
# that contains this "Makefile.in" and the "configure.in" script.
#
TOP = @srcdir@
# C Compiler and options for use in building executables that
# will run on the platform that is doing the build.
#
BCC = @BUILD_CC@ @BUILD_CFLAGS@
# C Compile and options for use in building executables that
# will run on the target platform. (BCC and TCC are usually the
# same unless your are cross-compiling.)
#
TCC = @CC@ @CFLAGS@ -I. -I${TOP}/src
# Define -DNDEBUG to compile without debugging (i.e., for production usage)
# Omitting the define will cause extra debugging code to be inserted and
# includes extra comments when "EXPLAIN stmt" is used.
#
TCC += @TARGET_DEBUG@ @XTHREADCONNECT@
# Compiler options needed for programs that use the TCL library.
#
TCC += @TCL_INCLUDE_SPEC@
# The library that programs using TCL must link against.
#
LIBTCL = @TCL_LIB_SPEC@ @TCL_LIBS@
# Compiler options needed for programs that use the readline() library.
#
READLINE_FLAGS = -DHAVE_READLINE=@TARGET_HAVE_READLINE@ @TARGET_READLINE_INC@
# The library that programs using readline() must link against.
#
LIBREADLINE = @TARGET_READLINE_LIBS@
# Should the database engine be compiled threadsafe
#
TCC += -DSQLITE_THREADSAFE=@SQLITE_THREADSAFE@
# The pthreads library if needed
#
LIBPTHREAD=@TARGET_THREAD_LIB@
# Do threads override each others locks by default (1), or do we test (-1)
#
TCC += -DSQLITE_THREAD_OVERRIDE_LOCK=@THREADSOVERRIDELOCKS@
# The fdatasync library
TLIBS = @LIBS@
# Flags controlling use of the in memory btree implementation
#
# TEMP_STORE is 0 to force temporary tables to be in a file, 1 to
# default to file, 2 to default to memory, and 3 to force temporary
# tables to always be in memory.
#
TEMP_STORE = -DTEMP_STORE=@TEMP_STORE@
# Version numbers and release number for the SQLite being compiled.
#
VERSION = @VERSION@
VERSION_NUMBER = @VERSION_NUMBER@
RELEASE = @RELEASE@
# Filename extensions
#
BEXE = @BUILD_EXEEXT@
TEXE = @TARGET_EXEEXT@
# The following variable is "1" if the configure script was able to locate
# the tclConfig.sh file. It is an empty string otherwise. When this
# variable is "1", the TCL extension library (libtclsqlite3.so) is built
# and installed.
#
HAVE_TCL = @HAVE_TCL@
# The suffix used on shared libraries. Ex: ".dll", ".so", ".dylib"
#
SHLIB_SUFFIX = @TCL_SHLIB_SUFFIX@
# The directory into which to store package information for
# Some standard variables and programs
#
prefix = @prefix@
exec_prefix = @exec_prefix@
libdir = @libdir@
INSTALL = @INSTALL@
LIBTOOL = ./libtool
ALLOWRELEASE = @ALLOWRELEASE@
# libtool compile/link/install
LTCOMPILE = $(LIBTOOL) --mode=compile --tag=CC $(TCC)
LTLINK = $(LIBTOOL) --mode=link $(TCC) @LDFLAGS@
LTINSTALL = $(LIBTOOL) --mode=install $(INSTALL)
# nawk compatible awk.
NAWK = @AWK@
# You should not have to change anything below this line
###############################################################################
TCC += -DSQLITE_OMIT_LOAD_EXTENSION=1
# Object files for the SQLite library.
#
LIBOBJ = alter.lo analyze.lo attach.lo auth.lo btmutex.lo btree.lo build.lo \
callback.lo complete.lo date.lo \
delete.lo expr.lo func.lo hash.lo journal.lo insert.lo loadext.lo \
main.lo malloc.lo mem1.lo mem2.lo mem3.lo mem4.lo mutex.lo \
mutex_os2.lo mutex_unix.lo mutex_w32.lo \
opcodes.lo os.lo os_unix.lo os_win.lo os_os2.lo \
pager.lo parse.lo pragma.lo prepare.lo printf.lo random.lo \
select.lo table.lo tokenize.lo trigger.lo update.lo \
util.lo vacuum.lo \
vdbe.lo vdbeapi.lo vdbeaux.lo vdbeblob.lo vdbefifo.lo vdbemem.lo \
where.lo utf.lo legacy.lo vtab.lo
# All of the source code files.
#
SRC = \
$(TOP)/src/alter.c \
$(TOP)/src/analyze.c \
$(TOP)/src/attach.c \
$(TOP)/src/auth.c \
$(TOP)/src/btmutex.c \
$(TOP)/src/btree.c \
$(TOP)/src/btree.h \
$(TOP)/src/build.c \
$(TOP)/src/callback.c \
$(TOP)/src/complete.c \
$(TOP)/src/date.c \
$(TOP)/src/delete.c \
$(TOP)/src/expr.c \
$(TOP)/src/func.c \
$(TOP)/src/hash.c \
$(TOP)/src/hash.h \
$(TOP)/src/insert.c \
$(TOP)/src/journal.c \
$(TOP)/src/legacy.c \
$(TOP)/src/loadext.c \
$(TOP)/src/main.c \
$(TOP)/src/malloc.c \
$(TOP)/src/mem1.c \
$(TOP)/src/mem2.c \
$(TOP)/src/mem3.c \
$(TOP)/src/mem4.c \
$(TOP)/src/mutex.c \
$(TOP)/src/mutex_os2.c \
$(TOP)/src/mutex_unix.c \
$(TOP)/src/mutex_w32.c \
$(TOP)/src/os.c \
$(TOP)/src/os_unix.c \
$(TOP)/src/os_win.c \
$(TOP)/src/os_os2.c \
$(TOP)/src/pager.c \
$(TOP)/src/pager.h \
$(TOP)/src/parse.y \
$(TOP)/src/pragma.c \
$(TOP)/src/prepare.c \
$(TOP)/src/printf.c \
$(TOP)/src/random.c \
$(TOP)/src/select.c \
$(TOP)/src/shell.c \
$(TOP)/src/sqlite.h.in \
$(TOP)/src/sqliteInt.h \
$(TOP)/src/table.c \
$(TOP)/src/tclsqlite.c \
$(TOP)/src/tokenize.c \
$(TOP)/src/trigger.c \
$(TOP)/src/utf.c \
$(TOP)/src/update.c \
$(TOP)/src/util.c \
$(TOP)/src/vacuum.c \
$(TOP)/src/vdbe.c \
$(TOP)/src/vdbe.h \
$(TOP)/src/vdbeapi.c \
$(TOP)/src/vdbeaux.c \
$(TOP)/src/vdbeblob.c \
$(TOP)/src/vdbefifo.c \
$(TOP)/src/vdbemem.c \
$(TOP)/src/vdbeInt.h \
$(TOP)/src/vtab.c \
$(TOP)/src/where.c
# Source code for extensions
#
SRC += \
$(TOP)/ext/fts1/fts1.c \
$(TOP)/ext/fts1/fts1.h \
$(TOP)/ext/fts1/fts1_hash.c \
$(TOP)/ext/fts1/fts1_hash.h \
$(TOP)/ext/fts1/fts1_porter.c \
$(TOP)/ext/fts1/fts1_tokenizer.h \
$(TOP)/ext/fts1/fts1_tokenizer1.c
# Source code to the test files.
#
TESTSRC = \
$(TOP)/src/attach.c \
$(TOP)/src/btree.c \
$(TOP)/src/build.c \
$(TOP)/src/date.c \
$(TOP)/src/expr.c \
$(TOP)/src/func.c \
$(TOP)/src/insert.c \
$(TOP)/src/malloc.c \
$(TOP)/src/os.c \
$(TOP)/src/os_os2.c \
$(TOP)/src/os_unix.c \
$(TOP)/src/os_win.c \
$(TOP)/src/pager.c \
$(TOP)/src/pragma.c \
$(TOP)/src/prepare.c \
$(TOP)/src/printf.c \
$(TOP)/src/select.c \
$(TOP)/src/test1.c \
$(TOP)/src/test2.c \
$(TOP)/src/test3.c \
$(TOP)/src/test4.c \
$(TOP)/src/test5.c \
$(TOP)/src/test6.c \
$(TOP)/src/test7.c \
$(TOP)/src/test8.c \
$(TOP)/src/test9.c \
$(TOP)/src/test_autoext.c \
$(TOP)/src/test_async.c \
$(TOP)/src/test_btree.c \
$(TOP)/src/test_config.c \
$(TOP)/src/test_hexio.c \
$(TOP)/src/test_malloc.c \
$(TOP)/src/test_md5.c \
$(TOP)/src/test_onefile.c \
$(TOP)/src/test_schema.c \
$(TOP)/src/test_server.c \
$(TOP)/src/test_tclvar.c \
$(TOP)/src/test_thread.c \
$(TOP)/src/tokenize.c \
$(TOP)/src/utf.c \
$(TOP)/src/util.c \
$(TOP)/src/vdbe.c \
$(TOP)/src/vdbeapi.c \
$(TOP)/src/vdbeaux.c \
$(TOP)/src/vdbemem.c \
$(TOP)/src/where.c \
parse.c
# Header files used by all library source files.
#
HDR = \
sqlite3.h \
$(TOP)/src/btree.h \
$(TOP)/src/btreeInt.h \
$(TOP)/src/hash.h \
$(TOP)/src/sqliteLimit.h \
$(TOP)/src/mutex.h \
opcodes.h \
$(TOP)/src/os.h \
$(TOP)/src/os_common.h \
$(TOP)/src/sqlite3ext.h \
$(TOP)/src/sqliteInt.h \
$(TOP)/src/vdbe.h \
parse.h
# Header files used by extensions
#
HDR += \
$(TOP)/ext/fts1/fts1.h \
$(TOP)/ext/fts1/fts1_hash.h \
$(TOP)/ext/fts1/fts1_tokenizer.h
# Header files used by the VDBE submodule
#
VDBEHDR = \
$(HDR) \
$(TOP)/src/vdbeInt.h
# This is the default Makefile target. The objects listed here
# are what get build when you type just "make" with no arguments.
#
all: sqlite3.h libsqlite3.la sqlite3$(TEXE) $(HAVE_TCL:1=libtclsqlite3.la)
Makefile: $(TOP)/Makefile.in
./config.status
# Generate the file "last_change" which contains the date of change
# of the most recently modified source code file
#
last_change: $(SRC)
cat $(SRC) | grep '$$Id: ' | sort -k 5 | tail -1 \
| $(NAWK) '{print $$5,$$6}' >last_change
libsqlite3.la: $(LIBOBJ)
$(LTLINK) -o libsqlite3.la $(LIBOBJ) $(LIBPTHREAD) \
${ALLOWRELEASE} -rpath $(libdir) -version-info "8:6:8"
libtclsqlite3.la: tclsqlite.lo libsqlite3.la
$(LTLINK) -o libtclsqlite3.la tclsqlite.lo \
$(LIBOBJ) @TCL_STUB_LIB_SPEC@ $(LIBPTHREAD) \
-rpath $(libdir)/sqlite \
-version-info "8:6:8"
sqlite3$(TEXE): $(TOP)/src/shell.c libsqlite3.la sqlite3.h
$(LTLINK) $(READLINE_FLAGS) $(LIBPTHREAD) \
-o $@ $(TOP)/src/shell.c libsqlite3.la \
$(LIBREADLINE) $(TLIBS)
# This target creates a directory named "tsrc" and fills it with
# copies of all of the C source code and header files needed to
# build on the target system. Some of the C source code and header
# files are automatically generated. This target takes care of
# all that automatic generation.
#
target_source: $(SRC) parse.c opcodes.c keywordhash.h $(VDBEHDR)
rm -rf tsrc
mkdir -p tsrc
cp $(SRC) $(VDBEHDR) tsrc
rm tsrc/sqlite.h.in tsrc/parse.y
cp parse.c opcodes.c keywordhash.h tsrc
sqlite3.c: target_source $(TOP)/tool/mksqlite3c.tcl
tclsh $(TOP)/tool/mksqlite3c.tcl
# Rules to build the LEMON compiler generator
#
lemon$(BEXE): $(TOP)/tool/lemon.c $(TOP)/tool/lempar.c
$(BCC) -o lemon$(BEXE) $(TOP)/tool/lemon.c
cp $(TOP)/tool/lempar.c .
# Rules to build individual files
#
alter.lo: $(TOP)/src/alter.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/alter.c
analyze.lo: $(TOP)/src/analyze.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/analyze.c
attach.lo: $(TOP)/src/attach.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/attach.c
auth.lo: $(TOP)/src/auth.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/auth.c
btmutex.lo: $(TOP)/src/btmutex.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/btmutex.c
btree.lo: $(TOP)/src/btree.c $(HDR) $(TOP)/src/pager.h
$(LTCOMPILE) -c $(TOP)/src/btree.c
build.lo: $(TOP)/src/build.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/build.c
callback.lo: $(TOP)/src/callback.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/callback.c
complete.lo: $(TOP)/src/complete.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/complete.c
date.lo: $(TOP)/src/date.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/date.c
delete.lo: $(TOP)/src/delete.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/delete.c
expr.lo: $(TOP)/src/expr.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/expr.c
func.lo: $(TOP)/src/func.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/func.c
hash.lo: $(TOP)/src/hash.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/hash.c
insert.lo: $(TOP)/src/insert.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/insert.c
journal.lo: $(TOP)/src/journal.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/journal.c
legacy.lo: $(TOP)/src/legacy.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/legacy.c
loadext.lo: $(TOP)/src/loadext.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/loadext.c
main.lo: $(TOP)/src/main.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/main.c
malloc.lo: $(TOP)/src/malloc.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/malloc.c
mem1.lo: $(TOP)/src/mem1.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem1.c
mem2.lo: $(TOP)/src/mem2.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem2.c
mem3.lo: $(TOP)/src/mem3.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem3.c
mem4.lo: $(TOP)/src/mem4.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mem4.c
mutex.lo: $(TOP)/src/mutex.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex.c
mutex_os2.lo: $(TOP)/src/mutex_os2.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex_os2.c
mutex_unix.lo: $(TOP)/src/mutex_unix.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex_unix.c
mutex_w32.lo: $(TOP)/src/mutex_w32.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/mutex_w32.c
pager.lo: $(TOP)/src/pager.c $(HDR) $(TOP)/src/pager.h
$(LTCOMPILE) -c $(TOP)/src/pager.c
opcodes.lo: opcodes.c
$(LTCOMPILE) -c opcodes.c
opcodes.c: opcodes.h $(TOP)/mkopcodec.awk
sort -n -b -k 3 opcodes.h | $(NAWK) -f $(TOP)/mkopcodec.awk >opcodes.c
opcodes.h: parse.h $(TOP)/src/vdbe.c $(TOP)/mkopcodeh.awk
cat parse.h $(TOP)/src/vdbe.c | $(NAWK) -f $(TOP)/mkopcodeh.awk >opcodes.h
os.lo: $(TOP)/src/os.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os.c
os_unix.lo: $(TOP)/src/os_unix.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os_unix.c
os_win.lo: $(TOP)/src/os_win.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os_win.c
os_os2.lo: $(TOP)/src/os_os2.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/os_os2.c
parse.lo: parse.c $(HDR)
$(LTCOMPILE) -c parse.c
parse.h: parse.c
parse.c: $(TOP)/src/parse.y lemon$(BEXE) $(TOP)/addopcodes.awk
cp $(TOP)/src/parse.y .
./lemon$(BEXE) $(OPTS) parse.y
mv parse.h parse.h.temp
$(NAWK) -f $(TOP)/addopcodes.awk parse.h.temp >parse.h
pragma.lo: $(TOP)/src/pragma.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/pragma.c
prepare.lo: $(TOP)/src/prepare.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/prepare.c
printf.lo: $(TOP)/src/printf.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/printf.c
random.lo: $(TOP)/src/random.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/random.c
select.lo: $(TOP)/src/select.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/select.c
sqlite3.h: $(TOP)/src/sqlite.h.in
sed -e s/--VERS--/$(RELEASE)/ $(TOP)/src/sqlite.h.in | \
sed -e s/--VERSION-NUMBER--/$(VERSION_NUMBER)/ >sqlite3.h
table.lo: $(TOP)/src/table.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/table.c
tclsqlite.lo: $(TOP)/src/tclsqlite.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/tclsqlite.c
tokenize.lo: $(TOP)/src/tokenize.c keywordhash.h $(HDR)
$(LTCOMPILE) -c $(TOP)/src/tokenize.c
keywordhash.h: $(TOP)/tool/mkkeywordhash.c
$(BCC) -o mkkeywordhash$(BEXE) $(OPTS) $(TOP)/tool/mkkeywordhash.c
./mkkeywordhash$(BEXE) >keywordhash.h
trigger.lo: $(TOP)/src/trigger.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/trigger.c
update.lo: $(TOP)/src/update.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/update.c
utf.lo: $(TOP)/src/utf.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/utf.c
util.lo: $(TOP)/src/util.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/util.c
vacuum.lo: $(TOP)/src/vacuum.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/vacuum.c
vdbe.lo: $(TOP)/src/vdbe.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbe.c
vdbeapi.lo: $(TOP)/src/vdbeapi.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbeapi.c
vdbeaux.lo: $(TOP)/src/vdbeaux.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbeaux.c
vdbeblob.lo: $(TOP)/src/vdbeblob.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbeblob.c
vdbefifo.lo: $(TOP)/src/vdbefifo.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbefifo.c
vdbemem.lo: $(TOP)/src/vdbemem.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vdbemem.c
vtab.lo: $(TOP)/src/vtab.c $(VDBEHDR)
$(LTCOMPILE) -c $(TOP)/src/vtab.c
where.lo: $(TOP)/src/where.c $(HDR)
$(LTCOMPILE) -c $(TOP)/src/where.c
tclsqlite-shell.lo: $(TOP)/src/tclsqlite.c $(HDR)
$(LTCOMPILE) -DTCLSH=1 -o $@ -c $(TOP)/src/tclsqlite.c
tclsqlite-stubs.lo: $(TOP)/src/tclsqlite.c $(HDR)
$(LTCOMPILE) -DTCL_USE_STUBS=1 -o $@ -c $(TOP)/src/tclsqlite.c
tclsqlite3: tclsqlite-shell.lo libsqlite3.la
$(LTLINK) -o tclsqlite3 tclsqlite-shell.lo \
libsqlite3.la $(LIBTCL)
testfixture$(TEXE): $(TOP)/src/tclsqlite.c libsqlite3.la $(TESTSRC)
$(LTLINK) -DTCLSH=1 -DSQLITE_TEST=1 -DSQLITE_CRASH_TEST=1 \
-DSQLITE_NO_SYNC=1 $(TEMP_STORE) \
-o testfixture $(TESTSRC) $(TOP)/src/tclsqlite.c \
libsqlite3.la $(LIBTCL)
fulltest: testfixture$(TEXE) sqlite3$(TEXE)
./testfixture $(TOP)/test/all.test
test: testfixture$(TEXE) sqlite3$(TEXE)
./testfixture $(TOP)/test/quick.test
sqlite3_analyzer$(TEXE): $(TOP)/src/tclsqlite.c libtclsqlite3.la \
$(TESTSRC) $(TOP)/tool/spaceanal.tcl
sed \
-e '/^#/d' \
-e 's,\\,\\\\,g' \
-e 's,",\\",g' \
-e 's,^,",' \
-e 's,$$,\\n",' \
$(TOP)/tool/spaceanal.tcl >spaceanal_tcl.h
$(LTLINK) -DTCLSH=2 -DSQLITE_TEST=1 $(TEMP_STORE)\
-o sqlite3_analyzer$(EXE) $(TESTSRC) $(TOP)/src/tclsqlite.c \
libtclsqlite3.la $(LIBTCL)
install: sqlite3$(BEXE) libsqlite3.la sqlite3.h ${HAVE_TCL:1=tcl_install}
$(INSTALL) -d $(DESTDIR)$(libdir)
$(LTINSTALL) libsqlite3.la $(DESTDIR)$(libdir)
$(INSTALL) -d $(DESTDIR)$(exec_prefix)/bin
$(LTINSTALL) sqlite3$(BEXE) $(DESTDIR)$(exec_prefix)/bin
$(INSTALL) -d $(DESTDIR)$(prefix)/include
$(INSTALL) -m 0644 sqlite3.h $(DESTDIR)$(prefix)/include
$(INSTALL) -m 0644 $(TOP)/src/sqlite3ext.h $(DESTDIR)$(prefix)/include
$(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig;
$(INSTALL) -m 0644 sqlite3.pc $(DESTDIR)$(libdir)/pkgconfig;
tcl_install: libtclsqlite3.la
tclsh $(TOP)/tclinstaller.tcl $(VERSION)
clean:
rm -f *.lo *.la *.o sqlite3$(TEXE) libsqlite3.la
rm -f sqlite3.h opcodes.*
rm -rf .libs .deps
rm -f lemon$(BEXE) lempar.c parse.* sqlite*.tar.gz
rm -f mkkeywordhash$(BEXE) keywordhash.h
rm -f $(PUBLISH)
rm -f *.da *.bb *.bbg gmon.out
rm -f testfixture$(TEXE) test.db
rm -f common.tcl
rm -f sqlite3.dll sqlite3.lib sqlite3.def
distclean: clean
rm -f config.log config.status libtool Makefile config.h sqlite3.pc
#
# Windows section
#
dll: sqlite3.dll
REAL_LIBOBJ = $(LIBOBJ:%.lo=.libs/%.o)
$(REAL_LIBOBJ): $(LIBOBJ)
sqlite3.def: $(REAL_LIBOBJ)
echo 'EXPORTS' >sqlite3.def
nm $(REAL_LIBOBJ) | grep ' T ' | grep ' _sqlite3_' \
| sed 's/^.* _//' >>sqlite3.def
sqlite3.dll: $(REAL_LIBOBJ) sqlite3.def
$(TCC) -shared -o sqlite3.dll sqlite3.def \
-Wl,"--strip-all" $(REAL_LIBOBJ)

View file

@ -0,0 +1,35 @@
This directory contains source code to
SQLite: An Embeddable SQL Database Engine
To compile the project, first create a directory in which to place
the build products. It is recommended, but not required, that the
build directory be separate from the source directory. Cd into the
build directory and then from the build directory run the configure
script found at the root of the source tree. Then run "make".
For example:
tar xzf sqlite.tar.gz ;# Unpack the source tree into "sqlite"
mkdir bld ;# Build will occur in a sibling directory
cd bld ;# Change to the build directory
../sqlite/configure ;# Run the configure script
make ;# Run the makefile.
make install ;# (Optional) Install the build products
The configure script uses autoconf 2.50 and libtool. If the configure
script does not work out for you, there is a generic makefile named
"Makefile.linux-gcc" in the top directory of the source tree that you
can copy and edit to suite your needs. Comments on the generic makefile
show what changes are needed.
The linux binaries on the website are created using the generic makefile,
not the configure script. The configure script is unmaintained. (You
can volunteer to take over maintenance of the configure script, if you want!)
The windows binaries on the website are created using MinGW32 configured
as a cross-compiler running under Linux. For details, see the ./publish.sh
script at the top-level of the source tree.
Contacts:
http://www.sqlite.org/

View file

@ -0,0 +1 @@
3.5.6

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,32 @@
#!/usr/bin/awk
#
# This script appends additional token codes to the end of the
# parse.h file that lemon generates. These extra token codes are
# not used by the parser. But they are used by the tokenizer and/or
# the code generator.
#
#
BEGIN {
max = 0
}
/^#define TK_/ {
print $0
if( max<$3 ) max = $3
}
END {
printf "#define TK_%-29s %4d\n", "TO_TEXT", max+1
printf "#define TK_%-29s %4d\n", "TO_BLOB", max+2
printf "#define TK_%-29s %4d\n", "TO_NUMERIC", max+3
printf "#define TK_%-29s %4d\n", "TO_INT", max+4
printf "#define TK_%-29s %4d\n", "TO_REAL", max+5
printf "#define TK_%-29s %4d\n", "END_OF_FILE", max+6
printf "#define TK_%-29s %4d\n", "ILLEGAL", max+7
printf "#define TK_%-29s %4d\n", "SPACE", max+8
printf "#define TK_%-29s %4d\n", "UNCLOSED_STRING", max+9
printf "#define TK_%-29s %4d\n", "COMMENT", max+10
printf "#define TK_%-29s %4d\n", "FUNCTION", max+11
printf "#define TK_%-29s %4d\n", "COLUMN", max+12
printf "#define TK_%-29s %4d\n", "AGG_FUNCTION", max+13
printf "#define TK_%-29s %4d\n", "AGG_COLUMN", max+14
printf "#define TK_%-29s %4d\n", "CONST_FUNC", max+15
}

BIN
client/src/thirdparty/sqlite-3.4.2/art/2005osaward.gif (Stored with Git LFS) vendored Normal file

Binary file not shown.

Binary file not shown.

BIN
client/src/thirdparty/sqlite-3.4.2/art/SQLite.gif (Stored with Git LFS) vendored Normal file

Binary file not shown.

Binary file not shown.

BIN
client/src/thirdparty/sqlite-3.4.2/art/SQLite_big.gif (Stored with Git LFS) vendored Normal file

Binary file not shown.

BIN
client/src/thirdparty/sqlite-3.4.2/art/nocopy.gif (Stored with Git LFS) vendored Normal file

Binary file not shown.

BIN
client/src/thirdparty/sqlite-3.4.2/art/powered_by_sqlite.gif (Stored with Git LFS) vendored Normal file

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,774 @@
#! /dev/env/DJDIR/bin/sh.exe
# Generated by configure.
# Run this file to recreate the current configuration.
# Compiler output produced by configure, useful for debugging
# configure, is in config.log if it exists.
debug=false
ac_cs_recheck=false
ac_cs_silent=false
SHELL=${CONFIG_SHELL-/dev/env/DJDIR/bin/sh.exe}
## --------------------- ##
## M4sh Initialization. ##
## --------------------- ##
# Be Bourne compatible
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
emulate sh
NULLCMD=:
# Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
# is contrary to our usage. Disable this feature.
alias -g '${1+"$@"}'='"$@"'
elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
set -o posix
fi
DUALCASE=1; export DUALCASE # for MKS sh
# Support unset when possible.
if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
as_unset=unset
else
as_unset=false
fi
# Work around bugs in pre-3.0 UWIN ksh.
$as_unset ENV MAIL MAILPATH
PS1='$ '
PS2='> '
PS4='+ '
# NLS nuisances.
for as_var in \
LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
LC_TELEPHONE LC_TIME
do
if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
eval $as_var=C; export $as_var
else
$as_unset $as_var
fi
done
# Required to use basename.
if expr a : '\(a\)' >/dev/null 2>&1; then
as_expr=expr
else
as_expr=false
fi
if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
as_basename=basename
else
as_basename=false
fi
# Name of the executable.
as_me=`$as_basename "$0" ||
$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
X"$0" : 'X\(//\)$' \| \
X"$0" : 'X\(/\)$' \| \
. : '\(.\)' 2>/dev/null ||
echo X/"$0" |
sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
/^X\/\(\/\/\)$/{ s//\1/; q; }
/^X\/\(\/\).*/{ s//\1/; q; }
s/.*/./; q'`
# PATH needs CR, and LINENO needs CR and PATH.
# Avoid depending upon Character Ranges.
as_cr_letters='abcdefghijklmnopqrstuvwxyz'
as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
as_cr_Letters=$as_cr_letters$as_cr_LETTERS
as_cr_digits='0123456789'
as_cr_alnum=$as_cr_Letters$as_cr_digits
# The user is always right.
if test "${PATH_SEPARATOR+set}" != set; then
echo "#! /bin/sh" >conf$$.sh
echo "exit 0" >>conf$$.sh
chmod +x conf$$.sh
if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
PATH_SEPARATOR=';'
else
PATH_SEPARATOR=:
fi
rm -f conf$$.sh
fi
as_lineno_1=$LINENO
as_lineno_2=$LINENO
as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
test "x$as_lineno_1" != "x$as_lineno_2" &&
test "x$as_lineno_3" = "x$as_lineno_2" || {
# Find who we are. Look in the path if we contain no path at all
# relative or not.
case $0 in
*[\\/]* ) as_myself=$0 ;;
*) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
done
;;
esac
# We did not find ourselves, most probably we were run as `sh COMMAND'
# in which case we are not to be found in the path.
if test "x$as_myself" = x; then
as_myself=$0
fi
if test ! -f "$as_myself"; then
{ { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
{ (exit 1); exit 1; }; }
fi
case $CONFIG_SHELL in
'')
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for as_base in sh bash ksh sh5; do
case $as_dir in
/*)
if ("$as_dir/$as_base" -c '
as_lineno_1=$LINENO
as_lineno_2=$LINENO
as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
test "x$as_lineno_1" != "x$as_lineno_2" &&
test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
$as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
$as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
CONFIG_SHELL=$as_dir/$as_base
export CONFIG_SHELL
exec "$CONFIG_SHELL" "$0" ${1+"$@"}
fi;;
esac
done
done
;;
esac
# Create $as_me.lineno as a copy of $as_myself, but with $LINENO
# uniformly replaced by the line number. The first 'sed' inserts a
# line-number line before each line; the second 'sed' does the real
# work. The second script uses 'N' to pair each line-number line
# with the numbered line, and appends trailing '-' during
# substitution so that $LINENO is not a special case at line end.
# (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
# second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
sed '=' <$as_myself |
sed '
N
s,$,-,
: loop
s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
t loop
s,-$,,
s,^['$as_cr_digits']*\n,,
' >$as_me.lineno &&
chmod +x $as_me.lineno ||
{ { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
{ (exit 1); exit 1; }; }
# Don't try to exec as it changes $[0], causing all sort of problems
# (the dirname of $[0] is not the place where we might find the
# original and so on. Autoconf is especially sensible to this).
. ./$as_me.lineno
# Exit status is that of the last command.
exit
}
case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
*c*,-n*) ECHO_N= ECHO_C='
' ECHO_T=' ' ;;
*c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
*) ECHO_N= ECHO_C='\c' ECHO_T= ;;
esac
if expr a : '\(a\)' >/dev/null 2>&1; then
as_expr=expr
else
as_expr=false
fi
rm -f conf$$ conf$$.exe conf$$.file
echo >conf$$.file
if ln -s conf$$.file conf$$ 2>/dev/null; then
# We could just check for DJGPP; but this test a) works b) is more generic
# and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
if test -f conf$$.exe; then
# Don't use ln at all; we don't have any links
as_ln_s='cp -p'
else
as_ln_s='ln -s'
fi
elif ln conf$$.file conf$$ 2>/dev/null; then
as_ln_s=ln
else
as_ln_s='cp -p'
fi
rm -f conf$$ conf$$.exe conf$$.file
if mkdir -p . 2>/dev/null; then
as_mkdir_p=:
else
test -d ./-p && rmdir ./-p
as_mkdir_p=false
fi
as_executable_p="test -f"
# Sed expression to map a string onto a valid CPP name.
as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
# Sed expression to map a string onto a valid variable name.
as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
# IFS
# We need space, tab and new line, in precisely that order.
as_nl='
'
IFS=" $as_nl"
# CDPATH.
$as_unset CDPATH
exec 6>&1
# Open the log real soon, to keep \$[0] and so on meaningful, and to
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. Logging --version etc. is OK.
exec 5>>config.log
{
echo
sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
## Running $as_me. ##
_ASBOX
} >&5
cat >&5 <<_CSEOF
This file was extended by $as_me, which was
generated by GNU Autoconf 2.59. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
CONFIG_HEADERS = $CONFIG_HEADERS
CONFIG_LINKS = $CONFIG_LINKS
CONFIG_COMMANDS = $CONFIG_COMMANDS
$ $0 $@
_CSEOF
echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
echo >&5
config_files=" Makefile sqlite3.pc"
ac_cs_usage="\
\`$as_me' instantiates files from templates according to the
current configuration.
Usage: $0 [OPTIONS] [FILE]...
-h, --help print this help, then exit
-V, --version print version number, then exit
-q, --quiet do not print progress messages
-d, --debug don't remove temporary files
--recheck update $as_me by reconfiguring in the same conditions
--file=FILE[:TEMPLATE]
instantiate the configuration file FILE
Configuration files:
$config_files
Report bugs to <bug-autoconf@gnu.org>."
ac_cs_version="\
config.status
configured by configure, generated by GNU Autoconf 2.59,
with options \"'--disable-tcl' '--disable-threadsafe'\"
Copyright (C) 2003 Free Software Foundation, Inc.
This config.status script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it."
srcdir=.
INSTALL="/dev/env/DJDIR/bin/ginstall -c"
# If no file are specified by the user, then we need to provide default
# value. By we need to know if files were specified by the user.
ac_need_defaults=:
while test $# != 0
do
case $1 in
--*=*)
ac_option=`expr "x$1" : 'x\([^=]*\)='`
ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
ac_shift=:
;;
-*)
ac_option=$1
ac_optarg=$2
ac_shift=shift
;;
*) # This is not an option, so the user has probably given explicit
# arguments.
ac_option=$1
ac_need_defaults=false;;
esac
case $ac_option in
# Handling of the options.
-recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
ac_cs_recheck=: ;;
--version | --vers* | -V )
echo "$ac_cs_version"; exit 0 ;;
--he | --h)
# Conflict between --help and --header
{ { echo "$as_me:$LINENO: error: ambiguous option: $1
Try \`$0 --help' for more information." >&5
echo "$as_me: error: ambiguous option: $1
Try \`$0 --help' for more information." >&2;}
{ (exit 1); exit 1; }; };;
--help | --hel | -h )
echo "$ac_cs_usage"; exit 0 ;;
--debug | --d* | -d )
debug=: ;;
--file | --fil | --fi | --f )
$ac_shift
CONFIG_FILES="$CONFIG_FILES $ac_optarg"
ac_need_defaults=false;;
--header | --heade | --head | --hea )
$ac_shift
CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
ac_need_defaults=false;;
-q | -quiet | --quiet | --quie | --qui | --qu | --q \
| -silent | --silent | --silen | --sile | --sil | --si | --s)
ac_cs_silent=: ;;
# This is an error.
-*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
Try \`$0 --help' for more information." >&5
echo "$as_me: error: unrecognized option: $1
Try \`$0 --help' for more information." >&2;}
{ (exit 1); exit 1; }; } ;;
*) ac_config_targets="$ac_config_targets $1" ;;
esac
shift
done
ac_configure_extra_args=
if $ac_cs_silent; then
exec 6>/dev/null
ac_configure_extra_args="$ac_configure_extra_args --silent"
fi
if $ac_cs_recheck; then
echo "running /dev/env/DJDIR/bin/sh.exe configure " '--disable-tcl' '--disable-threadsafe' $ac_configure_extra_args " --no-create --no-recursion" >&6
exec /dev/env/DJDIR/bin/sh.exe configure '--disable-tcl' '--disable-threadsafe' $ac_configure_extra_args --no-create --no-recursion
fi
for ac_config_target in $ac_config_targets
do
case "$ac_config_target" in
# Handling of arguments.
"Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
"sqlite3.pc" ) CONFIG_FILES="$CONFIG_FILES sqlite3.pc" ;;
*) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
{ (exit 1); exit 1; }; };;
esac
done
# If the user did not use the arguments to specify the items to instantiate,
# then the envvar interface is used. Set only those that are not.
# We use the long form for the default assignment because of an extremely
# bizarre bug on SunOS 4.1.3.
if $ac_need_defaults; then
test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
fi
# Have a temporary directory for convenience. Make it in the build tree
# simply because there is no reason to put it here, and in addition,
# creating and moving files from /tmp can sometimes cause problems.
# Create a temporary directory, and hook for its removal unless debugging.
$debug ||
{
trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
trap '{ (exit 1); exit 1; }' 1 2 13 15
}
# Create a (secure) tmp directory for tmp files.
{
tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` &&
test -n "$tmp" && test -d "$tmp"
} ||
{
tmp=./confstat$$-$RANDOM
(umask 077 && mkdir $tmp)
} ||
{
echo "$me: cannot create a temporary directory in ." >&2
{ (exit 1); exit 1; }
}
#
# CONFIG_FILES section.
#
# No need to generate the scripts if there are no CONFIG_FILES.
# This happens for instance when ./config.status config.h
if test -n "$CONFIG_FILES"; then
# Protect against being on the right side of a sed subst in config.status.
sed 's/,@/@@/; s/@,/@@/; s/,;t t$/@;t t/; /@;t t$/s/[\\&,]/\\&/g;
s/@@/,@/; s/@@/@,/; s/@;t t$/,;t t/' >$tmp/subs.sed <<\CEOF
s,@SHELL@,/dev/env/DJDIR/bin/sh.exe,;t t
s,@PATH_SEPARATOR@,:,;t t
s,@PACKAGE_NAME@,,;t t
s,@PACKAGE_TARNAME@,,;t t
s,@PACKAGE_VERSION@,,;t t
s,@PACKAGE_STRING@,,;t t
s,@PACKAGE_BUGREPORT@,,;t t
s,@exec_prefix@,${prefix},;t t
s,@prefix@,/dev/env/DJDIR,;t t
s,@program_transform_name@,s,x,x,,;t t
s,@bindir@,${exec_prefix}/bin,;t t
s,@sbindir@,${exec_prefix}/sbin,;t t
s,@libexecdir@,${exec_prefix}/libexec,;t t
s,@datadir@,${prefix}/share,;t t
s,@sysconfdir@,${prefix}/etc,;t t
s,@sharedstatedir@,${prefix}/com,;t t
s,@localstatedir@,${prefix}/var,;t t
s,@libdir@,${exec_prefix}/lib,;t t
s,@includedir@,${prefix}/include,;t t
s,@oldincludedir@,/usr/include,;t t
s,@infodir@,${prefix}/info,;t t
s,@mandir@,${prefix}/man,;t t
s,@build_alias@,,;t t
s,@host_alias@,,;t t
s,@target_alias@,,;t t
s,@DEFS@,-DPACKAGE_NAME=\"\" -DPACKAGE_TARNAME=\"\" -DPACKAGE_VERSION=\"\" -DPACKAGE_STRING=\"\" -DPACKAGE_BUGREPORT=\"\" -DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_DLFCN_H=1 ,;t t
s,@ECHO_C@,,;t t
s,@ECHO_N@,-n,;t t
s,@ECHO_T@,,;t t
s,@LIBS@,,;t t
s,@build@,i386-pc-msdosdjgpp,;t t
s,@build_cpu@,i386,;t t
s,@build_vendor@,pc,;t t
s,@build_os@,msdosdjgpp,;t t
s,@host@,SCHREIBTISCH_GP,;t t
s,@host_cpu@,i386,;t t
s,@host_vendor@,pc,;t t
s,@host_os@,msdosdjgpp,;t t
s,@CC@,gcc,;t t
s,@CFLAGS@,-g -O2,;t t
s,@LDFLAGS@,,;t t
s,@CPPFLAGS@,,;t t
s,@ac_ct_CC@,gcc,;t t
s,@EXEEXT@,.exe,;t t
s,@OBJEXT@,o,;t t
s,@EGREP@,grep -E,;t t
s,@LN_S@,ln,;t t
s,@ECHO@,echo,;t t
s,@AR@,ar,;t t
s,@ac_ct_AR@,ar,;t t
s,@RANLIB@,ranlib,;t t
s,@ac_ct_RANLIB@,ranlib,;t t
s,@STRIP@,strip,;t t
s,@ac_ct_STRIP@,strip,;t t
s,@CPP@,gcc -E,;t t
s,@CXX@,gpp,;t t
s,@CXXFLAGS@,-g -O2,;t t
s,@ac_ct_CXX@,gpp,;t t
s,@CXXCPP@,gpp -E,;t t
s,@F77@,,;t t
s,@FFLAGS@,,;t t
s,@ac_ct_F77@,,;t t
s,@LIBTOOL@,$(SHELL) $(top_builddir)/libtool,;t t
s,@INSTALL_PROGRAM@,${INSTALL},;t t
s,@INSTALL_SCRIPT@,${INSTALL},;t t
s,@INSTALL_DATA@,${INSTALL} -m 644,;t t
s,@AWK@,gawk,;t t
s,@program_prefix@,,;t t
s,@VERSION@,3.5,;t t
s,@RELEASE@,3.5.6,;t t
s,@VERSION_NUMBER@,3005006,;t t
s,@BUILD_CC@,gcc,;t t
s,@BUILD_CFLAGS@,-g -O2,;t t
s,@SQLITE_THREADSAFE@,0,;t t
s,@TARGET_THREAD_LIB@,,;t t
s,@XTHREADCONNECT@,,;t t
s,@THREADSOVERRIDELOCKS@,-1,;t t
s,@ALLOWRELEASE@,,;t t
s,@TEMP_STORE@,1,;t t
s,@BUILD_EXEEXT@,.exe,;t t
s,@OS_UNIX@,0,;t t
s,@OS_WIN@,1,;t t
s,@OS_OS2@,0,;t t
s,@TARGET_EXEEXT@,.exe,;t t
s,@TCL_VERSION@,,;t t
s,@TCL_BIN_DIR@,,;t t
s,@TCL_SRC_DIR@,,;t t
s,@TCL_LIBS@,,;t t
s,@TCL_INCLUDE_SPEC@,,;t t
s,@TCL_LIB_FILE@,,;t t
s,@TCL_LIB_FLAG@,,;t t
s,@TCL_LIB_SPEC@,,;t t
s,@TCL_STUB_LIB_FILE@,,;t t
s,@TCL_STUB_LIB_FLAG@,,;t t
s,@TCL_STUB_LIB_SPEC@,,;t t
s,@HAVE_TCL@,,;t t
s,@TARGET_READLINE_LIBS@,,;t t
s,@TARGET_READLINE_INC@,,;t t
s,@TARGET_HAVE_READLINE@,0,;t t
s,@TARGET_DEBUG@,-DNDEBUG,;t t
s,@LIBOBJS@,,;t t
s,@LTLIBOBJS@,,;t t
CEOF
# Split the substitutions into bite-sized pieces for seds with
# small command number limits, like on Digital OSF/1 and HP-UX.
ac_max_sed_lines=48
ac_sed_frag=1 # Number of current file.
ac_beg=1 # First line for current file.
ac_end=$ac_max_sed_lines # Line after last line for current file.
ac_more_lines=:
ac_sed_cmds=
while $ac_more_lines; do
if test $ac_beg -gt 1; then
sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
else
sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
fi
if test ! -s $tmp/subs.frag; then
ac_more_lines=false
else
# The purpose of the label and of the branching condition is to
# speed up the sed processing (if there are no `@' at all, there
# is no need to browse any of the substitutions).
# These are the two extra sed commands mentioned above.
(echo ':t
/@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
if test -z "$ac_sed_cmds"; then
ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
else
ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
fi
ac_sed_frag=`expr $ac_sed_frag + 1`
ac_beg=$ac_end
ac_end=`expr $ac_end + $ac_max_sed_lines`
fi
done
if test -z "$ac_sed_cmds"; then
ac_sed_cmds=cat
fi
fi # test -n "$CONFIG_FILES"
for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
# Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
case $ac_file in
- | *:- | *:-:* ) # input from stdin
cat >$tmp/stdin
ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
*:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
* ) ac_file_in=$ac_file.in ;;
esac
# Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
ac_dir=`(dirname "$ac_file") 2>/dev/null ||
$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
X"$ac_file" : 'X\(//\)[^/]' \| \
X"$ac_file" : 'X\(//\)$' \| \
X"$ac_file" : 'X\(/\)' \| \
. : '\(.\)' 2>/dev/null ||
echo X"$ac_file" |
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
/^X\(\/\/\)[^/].*/{ s//\1/; q; }
/^X\(\/\/\)$/{ s//\1/; q; }
/^X\(\/\).*/{ s//\1/; q; }
s/.*/./; q'`
{ if $as_mkdir_p; then
mkdir -p "$ac_dir"
else
as_dir="$ac_dir"
as_dirs=
while test ! -d "$as_dir"; do
as_dirs="$as_dir $as_dirs"
as_dir=`(dirname "$as_dir") 2>/dev/null ||
$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
X"$as_dir" : 'X\(//\)[^/]' \| \
X"$as_dir" : 'X\(//\)$' \| \
X"$as_dir" : 'X\(/\)' \| \
. : '\(.\)' 2>/dev/null ||
echo X"$as_dir" |
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
/^X\(\/\/\)[^/].*/{ s//\1/; q; }
/^X\(\/\/\)$/{ s//\1/; q; }
/^X\(\/\).*/{ s//\1/; q; }
s/.*/./; q'`
done
test ! -n "$as_dirs" || mkdir $as_dirs
fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
{ (exit 1); exit 1; }; }; }
ac_builddir=.
if test "$ac_dir" != .; then
ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
# A "../" for each directory in $ac_dir_suffix.
ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
else
ac_dir_suffix= ac_top_builddir=
fi
case $srcdir in
.) # No --srcdir option. We are building in place.
ac_srcdir=.
if test -z "$ac_top_builddir"; then
ac_top_srcdir=.
else
ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
fi ;;
[\\/]* | ?:[\\/]* ) # Absolute path.
ac_srcdir=$srcdir$ac_dir_suffix;
ac_top_srcdir=$srcdir ;;
*) # Relative path.
ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
ac_top_srcdir=$ac_top_builddir$srcdir ;;
esac
# Do not use `cd foo && pwd` to compute absolute paths, because
# the directories may not exist.
case `pwd` in
.) ac_abs_builddir="$ac_dir";;
*)
case "$ac_dir" in
.) ac_abs_builddir=`pwd`;;
[\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
*) ac_abs_builddir=`pwd`/"$ac_dir";;
esac;;
esac
case $ac_abs_builddir in
.) ac_abs_top_builddir=${ac_top_builddir}.;;
*)
case ${ac_top_builddir}. in
.) ac_abs_top_builddir=$ac_abs_builddir;;
[\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
*) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
esac;;
esac
case $ac_abs_builddir in
.) ac_abs_srcdir=$ac_srcdir;;
*)
case $ac_srcdir in
.) ac_abs_srcdir=$ac_abs_builddir;;
[\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
*) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
esac;;
esac
case $ac_abs_builddir in
.) ac_abs_top_srcdir=$ac_top_srcdir;;
*)
case $ac_top_srcdir in
.) ac_abs_top_srcdir=$ac_abs_builddir;;
[\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
*) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
esac;;
esac
case $INSTALL in
[\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
*) ac_INSTALL=$ac_top_builddir$INSTALL ;;
esac
if test x"$ac_file" != x-; then
{ echo "$as_me:$LINENO: creating $ac_file" >&5
echo "$as_me: creating $ac_file" >&6;}
rm -f "$ac_file"
fi
# Let's still pretend it is `configure' which instantiates (i.e., don't
# use $as_me), people would be surprised to read:
# /* config.h. Generated by config.status. */
if test x"$ac_file" = x-; then
configure_input=
else
configure_input="$ac_file. "
fi
configure_input=$configure_input"Generated from `echo $ac_file_in |
sed 's,.*/,,'` by configure."
# First look for the input files in the build tree, otherwise in the
# src tree.
ac_file_inputs=`IFS=:
for f in $ac_file_in; do
case $f in
-) echo $tmp/stdin ;;
[\\/$]*)
# Absolute (can't be DOS-style, as IFS=:)
test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
echo "$as_me: error: cannot find input file: $f" >&2;}
{ (exit 1); exit 1; }; }
echo "$f";;
*) # Relative
if test -f "$f"; then
# Build tree
echo "$f"
elif test -f "$srcdir/$f"; then
# Source tree
echo "$srcdir/$f"
else
# /dev/null tree
{ { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
echo "$as_me: error: cannot find input file: $f" >&2;}
{ (exit 1); exit 1; }; }
fi;;
esac
done` || { (exit 1); exit 1; }
sed "/^[ ]*VPATH[ ]*=/{
s/:*\$(srcdir):*/:/;
s/:*\${srcdir}:*/:/;
s/:*@srcdir@:*/:/;
s/^\([^=]*=[ ]*\):*/\1/;
s/:*$//;
s/^[^=]*=[ ]*$//;
}
:t
/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
s,@configure_input@,$configure_input,;t t
s,@srcdir@,$ac_srcdir,;t t
s,@abs_srcdir@,$ac_abs_srcdir,;t t
s,@top_srcdir@,$ac_top_srcdir,;t t
s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t
s,@builddir@,$ac_builddir,;t t
s,@abs_builddir@,$ac_abs_builddir,;t t
s,@top_builddir@,$ac_top_builddir,;t t
s,@abs_top_builddir@,$ac_abs_top_builddir,;t t
s,@INSTALL@,$ac_INSTALL,;t t
" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
rm -f $tmp/stdin
if test x"$ac_file" != x-; then
mv $tmp/out $ac_file
else
cat $tmp/out
rm -f $tmp/out
fi
done
{ (exit 0); exit 0; }

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,582 @@
#
# The build process allows for using a cross-compiler. But the default
# action is to target the same platform that we are running on. The
# configure script needs to discover the following properties of the
# build and target systems:
#
# srcdir
#
# The is the name of the directory that contains the
# "configure" shell script. All source files are
# located relative to this directory.
#
# bindir
#
# The name of the directory where executables should be
# written by the "install" target of the makefile.
#
# program_prefix
#
# Add this prefix to the names of all executables that run
# on the target machine. Default: ""
#
# ENABLE_SHARED
#
# True if shared libraries should be generated.
#
# BUILD_CC
#
# The name of a command that is used to convert C
# source files into executables that run on the build
# platform.
#
# BUILD_CFLAGS
#
# Switches that the build compiler needs in order to construct
# command-line programs.
#
# BUILD_LIBS
#
# Libraries that the build compiler needs in order to construct
# command-line programs.
#
# BUILD_EXEEXT
#
# The filename extension for executables on the build
# platform. "" for Unix and ".exe" for Windows.
#
# TCL_*
#
# Lots of values are read in from the tclConfig.sh script,
# if that script is available. This values are used for
# constructing and installing the TCL extension.
#
# TARGET_READLINE_LIBS
#
# This is the library directives passed to the target linker
# that cause the executable to link against the readline library.
# This might be a switch like "-lreadline" or pathnames of library
# file like "../../src/libreadline.a".
#
# TARGET_READLINE_INC
#
# This variables define the directory that contain header
# files for the readline library. If the compiler is able
# to find <readline.h> on its own, then this can be blank.
#
# TARGET_EXEEXT
#
# The filename extension for executables on the
# target platform. "" for Unix and ".exe" for windows.
#
# The generated configure script will make an attempt to guess
# at all of the above parameters. You can override any of
# the guesses by setting the environment variable named
# "config_AAAA" where "AAAA" is the name of the parameter
# described above. (Exception: srcdir cannot be set this way.)
# If you have a file that sets one or more of these environment
# variables, you can invoke configure as follows:
#
# configure --with-hints=FILE
#
# where FILE is the name of the file that sets the environment
# variables. FILE should be an absolute pathname.
#
# This configure.in file is easy to reuse on other projects. Just
# change the argument to AC_INIT(). And disable any features that
# you don't need (for example BLT) by erasing or commenting out
# the corresponding code.
#
AC_INIT(src/sqlite.h.in)
dnl Put the RCS revision string after AC_INIT so that it will also
dnl show in in configure.
# The following RCS revision string applies to configure.in
# $Revision: 1.31 $
#########
# Programs needed
#
AC_PROG_LIBTOOL
AC_PROG_INSTALL
AC_PROG_AWK
#########
# Set up an appropriate program prefix
#
if test "$program_prefix" = "NONE"; then
program_prefix=""
fi
AC_SUBST(program_prefix)
VERSION=[`cat $srcdir/VERSION | sed 's/^\([0-9]*\.*[0-9]*\).*/\1/'`]
echo "Version set to $VERSION"
AC_SUBST(VERSION)
RELEASE=`cat $srcdir/VERSION`
echo "Release set to $RELEASE"
AC_SUBST(RELEASE)
VERSION_NUMBER=[`cat $srcdir/VERSION \
| sed 's/[^0-9]/ /g' \
| awk '{printf "%d%03d%03d",$1,$2,$3}'`]
echo "Version number set to $VERSION_NUMBER"
AC_SUBST(VERSION_NUMBER)
#########
# Check to see if the --with-hints=FILE option is used. If there is none,
# then check for a files named "$host.hints" and ../$hosts.hints where
# $host is the hostname of the build system. If still no hints are
# found, try looking in $system.hints and ../$system.hints where
# $system is the result of uname -s.
#
AC_ARG_WITH(hints,
AC_HELP_STRING([--with-hints=FILE],[Read configuration options from FILE]),
hints=$withval)
if test "$hints" = ""; then
host=`hostname | sed 's/\..*//'`
if test -r $host.hints; then
hints=$host.hints
else
if test -r ../$host.hints; then
hints=../$host.hints
fi
fi
fi
if test "$hints" = ""; then
sys=`uname -s`
if test -r $sys.hints; then
hints=$sys.hints
else
if test -r ../$sys.hints; then
hints=../$sys.hints
fi
fi
fi
if test "$hints" != ""; then
AC_MSG_RESULT(reading hints from $hints)
. $hints
fi
#########
# Locate a compiler for the build machine. This compiler should
# generate command-line programs that run on the build machine.
#
if test x"$cross_compiling" = xno; then
BUILD_CC=$CC
BUILD_CFLAGS=$CFLAGS
else
if test "${BUILD_CC+set}" != set; then
AC_CHECK_PROGS(BUILD_CC, gcc cc cl)
fi
if test "${BUILD_CFLAGS+set}" != set; then
BUILD_CFLAGS="-g"
fi
fi
AC_SUBST(BUILD_CC)
AC_SUBST(BUILD_CFLAGS)
##########
# Do we want to support multithreaded use of sqlite
#
AC_ARG_ENABLE(threadsafe,
AC_HELP_STRING([--enable-threadsafe],[Support threadsafe operation]),,enable_threadsafe=yes)
AC_MSG_CHECKING([whether to support threadsafe operation])
if test "$enable_threadsafe" = "no"; then
SQLITE_THREADSAFE=0
AC_MSG_RESULT([no])
else
SQLITE_THREADSAFE=1
AC_MSG_RESULT([yes])
fi
AC_SUBST(SQLITE_THREADSAFE)
if test "$SQLITE_THREADSAFE" = "1"; then
LIBS=""
AC_CHECK_LIB(pthread, pthread_create)
TARGET_THREAD_LIB="$LIBS"
LIBS=""
else
TARGET_THREAD_LIB=""
fi
AC_SUBST(TARGET_THREAD_LIB)
##########
# Do we want to allow a connection created in one thread to be used
# in another thread. This does not work on many Linux systems (ex: RedHat 9)
# due to bugs in the threading implementations. This is thus off by default.
#
AC_ARG_ENABLE(cross-thread-connections,
AC_HELP_STRING([--enable-cross-thread-connections],[Allow connection sharing across threads]),,enable_xthreadconnect=no)
AC_MSG_CHECKING([whether to allow connections to be shared across threads])
if test "$enable_xthreadconnect" = "no"; then
XTHREADCONNECT=''
AC_MSG_RESULT([no])
else
XTHREADCONNECT='-DSQLITE_ALLOW_XTHREAD_CONNECT=1'
AC_MSG_RESULT([yes])
fi
AC_SUBST(XTHREADCONNECT)
##########
# Do we want to set threadsOverrideEachOthersLocks variable to be 1 (true) by
# default. Normally, a test at runtime is performed to determine the
# appropriate value of this variable. Use this option only if you're sure that
# threads can safely override each others locks in all runtime situations.
#
AC_ARG_ENABLE(threads-override-locks,
AC_HELP_STRING([--enable-threads-override-locks],[Threads can override each others locks]),,enable_threads_override_locks=no)
AC_MSG_CHECKING([whether threads can override each others locks])
if test "$enable_threads_override_locks" = "no"; then
THREADSOVERRIDELOCKS='-1'
AC_MSG_RESULT([no])
else
THREADSOVERRIDELOCKS='1'
AC_MSG_RESULT([yes])
fi
AC_SUBST(THREADSOVERRIDELOCKS)
##########
# Do we want to support release
#
AC_ARG_ENABLE(releasemode,
AC_HELP_STRING([--enable-releasemode],[Support libtool link to release mode]),,enable_releasemode=no)
AC_MSG_CHECKING([whether to support shared library linked as release mode or not])
if test "$enable_releasemode" = "no"; then
ALLOWRELEASE=""
AC_MSG_RESULT([no])
else
ALLOWRELEASE="-release `cat $srcdir/VERSION`"
AC_MSG_RESULT([yes])
fi
AC_SUBST(ALLOWRELEASE)
##########
# Do we want temporary databases in memory
#
AC_ARG_ENABLE(tempstore,
AC_HELP_STRING([--enable-tempstore],[Use an in-ram database for temporary tables (never,no,yes,always)]),,enable_tempstore=no)
AC_MSG_CHECKING([whether to use an in-ram database for temporary tables])
case "$enable_tempstore" in
never )
TEMP_STORE=0
AC_MSG_RESULT([never])
;;
no )
TEMP_STORE=1
AC_MSG_RESULT([no])
;;
always )
TEMP_STORE=3
AC_MSG_RESULT([always])
;;
yes )
TEMP_STORE=3
AC_MSG_RESULT([always])
;;
* )
TEMP_STORE=1
AC_MSG_RESULT([yes])
;;
esac
AC_SUBST(TEMP_STORE)
###########
# Lots of things are different if we are compiling for Windows using
# the CYGWIN environment. So check for that special case and handle
# things accordingly.
#
AC_MSG_CHECKING([if executables have the .exe suffix])
if test "$config_BUILD_EXEEXT" = ".exe"; then
CYGWIN=yes
AC_MSG_RESULT(yes)
else
AC_MSG_RESULT(unknown)
fi
if test "$CYGWIN" != "yes"; then
AC_CYGWIN
fi
if test "$CYGWIN" = "yes"; then
BUILD_EXEEXT=.exe
else
BUILD_EXEEXT=$EXEEXT
fi
if test x"$cross_compiling" = xno; then
TARGET_EXEEXT=$BUILD_EXEEXT
else
TARGET_EXEEXT=$config_TARGET_EXEEXT
fi
if test "$TARGET_EXEEXT" = ".exe"; then
if test $OS2_SHELL ; then
OS_UNIX=0
OS_WIN=0
OS_OS2=1
TARGET_CFLAGS="$TARGET_CFLAGS -DOS_OS2=1"
if test "$ac_compiler_gnu" == "yes" ; then
TARGET_CFLAGS="$TARGET_CFLAGS -Zomf -Zexe -Zmap"
BUILD_CFLAGS="$BUILD_CFLAGS -Zomf -Zexe"
fi
else
OS_UNIX=0
OS_WIN=1
OS_OS2=0
tclsubdir=win
TARGET_CFLAGS="$TARGET_CFLAGS -DOS_WIN=1"
fi
else
OS_UNIX=1
OS_WIN=0
OS_OS2=0
tclsubdir=unix
TARGET_CFLAGS="$TARGET_CFLAGS -DOS_UNIX=1"
fi
AC_SUBST(BUILD_EXEEXT)
AC_SUBST(OS_UNIX)
AC_SUBST(OS_WIN)
AC_SUBST(OS_OS2)
AC_SUBST(TARGET_EXEEXT)
##########
# Figure out all the parameters needed to compile against Tcl.
#
# This code is derived from the SC_PATH_TCLCONFIG and SC_LOAD_TCLCONFIG
# macros in the in the tcl.m4 file of the standard TCL distribution.
# Those macros could not be used directly since we have to make some
# minor changes to accomodate systems that do not have TCL installed.
#
AC_ARG_ENABLE(tcl, AC_HELP_STRING([--disable-tcl],[do not build TCL extension]),
[use_tcl=$enableval],[use_tcl=yes])
if test "${use_tcl}" = "yes" ; then
AC_ARG_WITH(tcl, AC_HELP_STRING([--with-tcl=DIR],[directory containing tcl configuration (tclConfig.sh)]), with_tclconfig=${withval})
AC_MSG_CHECKING([for Tcl configuration])
AC_CACHE_VAL(ac_cv_c_tclconfig,[
# First check to see if --with-tcl was specified.
if test x"${with_tclconfig}" != x ; then
if test -f "${with_tclconfig}/tclConfig.sh" ; then
ac_cv_c_tclconfig=`(cd ${with_tclconfig}; pwd)`
else
AC_MSG_ERROR([${with_tclconfig} directory doesn't contain tclConfig.sh])
fi
fi
# then check for a private Tcl installation
if test x"${ac_cv_c_tclconfig}" = x ; then
for i in \
../tcl \
`ls -dr ../tcl[[8-9]].[[0-9]].[[0-9]]* 2>/dev/null` \
`ls -dr ../tcl[[8-9]].[[0-9]] 2>/dev/null` \
`ls -dr ../tcl[[8-9]].[[0-9]]* 2>/dev/null` \
../../tcl \
`ls -dr ../../tcl[[8-9]].[[0-9]].[[0-9]]* 2>/dev/null` \
`ls -dr ../../tcl[[8-9]].[[0-9]] 2>/dev/null` \
`ls -dr ../../tcl[[8-9]].[[0-9]]* 2>/dev/null` \
../../../tcl \
`ls -dr ../../../tcl[[8-9]].[[0-9]].[[0-9]]* 2>/dev/null` \
`ls -dr ../../../tcl[[8-9]].[[0-9]] 2>/dev/null` \
`ls -dr ../../../tcl[[8-9]].[[0-9]]* 2>/dev/null`
do
if test -f "$i/unix/tclConfig.sh" ; then
ac_cv_c_tclconfig=`(cd $i/unix; pwd)`
break
fi
done
fi
# check in a few common install locations
if test x"${ac_cv_c_tclconfig}" = x ; then
for i in \
`ls -d ${libdir} 2>/dev/null` \
`ls -d /usr/local/lib 2>/dev/null` \
`ls -d /usr/contrib/lib 2>/dev/null` \
`ls -d /usr/lib 2>/dev/null`
do
if test -f "$i/tclConfig.sh" ; then
ac_cv_c_tclconfig=`(cd $i; pwd)`
break
fi
done
fi
# check in a few other private locations
if test x"${ac_cv_c_tclconfig}" = x ; then
for i in \
${srcdir}/../tcl \
`ls -dr ${srcdir}/../tcl[[8-9]].[[0-9]].[[0-9]]* 2>/dev/null` \
`ls -dr ${srcdir}/../tcl[[8-9]].[[0-9]] 2>/dev/null` \
`ls -dr ${srcdir}/../tcl[[8-9]].[[0-9]]* 2>/dev/null`
do
if test -f "$i/unix/tclConfig.sh" ; then
ac_cv_c_tclconfig=`(cd $i/unix; pwd)`
break
fi
done
fi
])
if test x"${ac_cv_c_tclconfig}" = x ; then
use_tcl=no
AC_MSG_WARN(Can't find Tcl configuration definitions)
AC_MSG_WARN(*** Without Tcl the regression tests cannot be executed ***)
AC_MSG_WARN(*** Consider using --with-tcl=... to define location of Tcl ***)
else
TCL_BIN_DIR=${ac_cv_c_tclconfig}
AC_MSG_RESULT(found $TCL_BIN_DIR/tclConfig.sh)
AC_MSG_CHECKING([for existence of $TCL_BIN_DIR/tclConfig.sh])
if test -f "$TCL_BIN_DIR/tclConfig.sh" ; then
AC_MSG_RESULT([loading])
. $TCL_BIN_DIR/tclConfig.sh
else
AC_MSG_RESULT([file not found])
fi
#
# If the TCL_BIN_DIR is the build directory (not the install directory),
# then set the common variable name to the value of the build variables.
# For example, the variable TCL_LIB_SPEC will be set to the value
# of TCL_BUILD_LIB_SPEC. An extension should make use of TCL_LIB_SPEC
# instead of TCL_BUILD_LIB_SPEC since it will work with both an
# installed and uninstalled version of Tcl.
#
if test -f $TCL_BIN_DIR/Makefile ; then
TCL_LIB_SPEC=${TCL_BUILD_LIB_SPEC}
TCL_STUB_LIB_SPEC=${TCL_BUILD_STUB_LIB_SPEC}
TCL_STUB_LIB_PATH=${TCL_BUILD_STUB_LIB_PATH}
fi
#
# eval is required to do the TCL_DBGX substitution
#
eval "TCL_LIB_FILE=\"${TCL_LIB_FILE}\""
eval "TCL_LIB_FLAG=\"${TCL_LIB_FLAG}\""
eval "TCL_LIB_SPEC=\"${TCL_LIB_SPEC}\""
eval "TCL_STUB_LIB_FILE=\"${TCL_STUB_LIB_FILE}\""
eval "TCL_STUB_LIB_FLAG=\"${TCL_STUB_LIB_FLAG}\""
eval "TCL_STUB_LIB_SPEC=\"${TCL_STUB_LIB_SPEC}\""
AC_SUBST(TCL_VERSION)
AC_SUBST(TCL_BIN_DIR)
AC_SUBST(TCL_SRC_DIR)
AC_SUBST(TCL_LIBS)
AC_SUBST(TCL_INCLUDE_SPEC)
AC_SUBST(TCL_LIB_FILE)
AC_SUBST(TCL_LIB_FLAG)
AC_SUBST(TCL_LIB_SPEC)
AC_SUBST(TCL_STUB_LIB_FILE)
AC_SUBST(TCL_STUB_LIB_FLAG)
AC_SUBST(TCL_STUB_LIB_SPEC)
fi
fi
if test "${use_tcl}" = "no" ; then
HAVE_TCL=""
else
HAVE_TCL=1
fi
AC_SUBST(HAVE_TCL)
##########
# Figure out what C libraries are required to compile programs
# that use "readline()" library.
#
TARGET_READLINE_LIBS=""
TARGET_READLINE_INC=""
TARGET_HAVE_READLINE=0
AC_ARG_ENABLE([readline],
[AC_HELP_STRING([--disable-readline],[disable readline support [default=detect]])],
[with_readline=$enableval],
[with_readline=auto])
if test x"$with_readline" != xno; then
found="yes"
AC_ARG_WITH([readline-lib],
[AC_HELP_STRING([--with-readline-lib],[specify readline library])],
[with_readline_lib=$withval],
[with_readline_lib="auto"])
if test "x$with_readline_lib" = xauto; then
save_LIBS="$LIBS"
LIBS=""
AC_SEARCH_LIBS(tgetent, [readline ncurses curses termcap], [term_LIBS="$LIBS"], [term_LIBS=""])
AC_CHECK_LIB([readline], [readline], [TARGET_READLINE_LIBS="-lreadline"], [found="no"])
TARGET_READLINE_LIBS="$TARGET_READLINE_LIBS $term_LIBS"
LIBS="$save_LIBS"
else
TARGET_READLINE_LIBS="$with_readline_lib"
fi
AC_ARG_WITH([readline-inc],
[AC_HELP_STRING([--with-readline-inc],[specify readline include paths])],
[with_readline_inc=$withval],
[with_readline_inc="auto"])
if test "x$with_readline_inc" = xauto; then
AC_CHECK_HEADER(readline.h, [found="yes"], [
found="no"
if test "$cross_compiling" != yes; then
for dir in /usr /usr/local /usr/local/readline /usr/contrib /mingw; do
for subdir in include include/readline; do
AC_CHECK_FILE($dir/$subdir/readline.h, found=yes)
if test "$found" = "yes"; then
TARGET_READLINE_INC="-I$dir/$subdir"
break
fi
done
test "$found" = "yes" && break
done
fi
])
else
TARGET_READLINE_INC="$with_readline_inc"
fi
if test x"$found" = xno; then
TARGET_READLINE_LIBS=""
TARGET_READLINE_INC=""
TARGET_HAVE_READLINE=0
else
TARGET_HAVE_READLINE=1
fi
fi
AC_SUBST(TARGET_READLINE_LIBS)
AC_SUBST(TARGET_READLINE_INC)
AC_SUBST(TARGET_HAVE_READLINE)
##########
# Figure out what C libraries are required to compile programs
# that use "fdatasync()" function.
#
AC_SEARCH_LIBS(fdatasync, [rt])
#########
# check for debug enabled
AC_ARG_ENABLE(debug, AC_HELP_STRING([--enable-debug],[enable debugging & verbose explain]),
[use_debug=$enableval],[use_debug=no])
if test "${use_debug}" = "yes" ; then
TARGET_DEBUG="-DSQLITE_DEBUG=1"
else
TARGET_DEBUG="-DNDEBUG"
fi
AC_SUBST(TARGET_DEBUG)
#########
# Figure out whether or not we have a "usleep()" function.
#
AC_CHECK_FUNC(usleep, [TARGET_CFLAGS="$TARGET_CFLAGS -DHAVE_USLEEP=1"])
#--------------------------------------------------------------------
# Redefine fdatasync as fsync on systems that lack fdatasync
#--------------------------------------------------------------------
AC_CHECK_FUNC(fdatasync, [TARGET_CFLAGS="$TARGET_CFLAGS -DHAVE_FDATASYNC=1"])
#########
# Generate the output files.
#
AC_OUTPUT([
Makefile
sqlite3.pc
])

View file

@ -0,0 +1,679 @@
# A Tk console widget for SQLite. Invoke sqlitecon::create with a window name,
# a prompt string, a title to set a new top-level window, and the SQLite
# database handle. For example:
#
# sqlitecon::create .sqlcon {sql:- } {SQL Console} db
#
# A toplevel window is created that allows you to type in SQL commands to
# be processed on the spot.
#
# A limited set of dot-commands are supported:
#
# .table
# .schema ?TABLE?
# .mode list|column|multicolumn|line
# .exit
#
# In addition, a new SQL function named "edit()" is created. This function
# takes a single text argument and returns a text result. Whenever the
# the function is called, it pops up a new toplevel window containing a
# text editor screen initialized to the argument. When the "OK" button
# is pressed, whatever revised text is in the text editor is returned as
# the result of the edit() function. This allows text fields of SQL tables
# to be edited quickly and easily as follows:
#
# UPDATE table1 SET dscr = edit(dscr) WHERE rowid=15;
#
# Create a namespace to work in
#
namespace eval ::sqlitecon {
# do nothing
}
# Create a console widget named $w. The prompt string is $prompt.
# The title at the top of the window is $title. The database connection
# object is $db
#
proc sqlitecon::create {w prompt title db} {
upvar #0 $w.t v
if {[winfo exists $w]} {destroy $w}
if {[info exists v]} {unset v}
toplevel $w
wm title $w $title
wm iconname $w $title
frame $w.mb -bd 2 -relief raised
pack $w.mb -side top -fill x
menubutton $w.mb.file -text File -menu $w.mb.file.m
menubutton $w.mb.edit -text Edit -menu $w.mb.edit.m
pack $w.mb.file $w.mb.edit -side left -padx 8 -pady 1
set m [menu $w.mb.file.m -tearoff 0]
$m add command -label {Close} -command "destroy $w"
sqlitecon::create_child $w $prompt $w.mb.edit.m
set v(db) $db
$db function edit ::sqlitecon::_edit
}
# This routine creates a console as a child window within a larger
# window. It also creates an edit menu named "$editmenu" if $editmenu!="".
# The calling function is responsible for posting the edit menu.
#
proc sqlitecon::create_child {w prompt editmenu} {
upvar #0 $w.t v
if {$editmenu!=""} {
set m [menu $editmenu -tearoff 0]
$m add command -label Cut -command "sqlitecon::Cut $w.t"
$m add command -label Copy -command "sqlitecon::Copy $w.t"
$m add command -label Paste -command "sqlitecon::Paste $w.t"
$m add command -label {Clear Screen} -command "sqlitecon::Clear $w.t"
$m add separator
$m add command -label {Save As...} -command "sqlitecon::SaveFile $w.t"
catch {$editmenu config -postcommand "sqlitecon::EnableEditMenu $w"}
}
scrollbar $w.sb -orient vertical -command "$w.t yview"
pack $w.sb -side right -fill y
text $w.t -font fixed -yscrollcommand "$w.sb set"
pack $w.t -side right -fill both -expand 1
bindtags $w.t Sqlitecon
set v(editmenu) $editmenu
set v(history) 0
set v(historycnt) 0
set v(current) -1
set v(prompt) $prompt
set v(prior) {}
set v(plength) [string length $v(prompt)]
set v(x) 0
set v(y) 0
set v(mode) column
set v(header) on
$w.t mark set insert end
$w.t tag config ok -foreground blue
$w.t tag config err -foreground red
$w.t insert end $v(prompt)
$w.t mark set out 1.0
after idle "focus $w.t"
}
bind Sqlitecon <1> {sqlitecon::Button1 %W %x %y}
bind Sqlitecon <B1-Motion> {sqlitecon::B1Motion %W %x %y}
bind Sqlitecon <B1-Leave> {sqlitecon::B1Leave %W %x %y}
bind Sqlitecon <B1-Enter> {sqlitecon::cancelMotor %W}
bind Sqlitecon <ButtonRelease-1> {sqlitecon::cancelMotor %W}
bind Sqlitecon <KeyPress> {sqlitecon::Insert %W %A}
bind Sqlitecon <Left> {sqlitecon::Left %W}
bind Sqlitecon <Control-b> {sqlitecon::Left %W}
bind Sqlitecon <Right> {sqlitecon::Right %W}
bind Sqlitecon <Control-f> {sqlitecon::Right %W}
bind Sqlitecon <BackSpace> {sqlitecon::Backspace %W}
bind Sqlitecon <Control-h> {sqlitecon::Backspace %W}
bind Sqlitecon <Delete> {sqlitecon::Delete %W}
bind Sqlitecon <Control-d> {sqlitecon::Delete %W}
bind Sqlitecon <Home> {sqlitecon::Home %W}
bind Sqlitecon <Control-a> {sqlitecon::Home %W}
bind Sqlitecon <End> {sqlitecon::End %W}
bind Sqlitecon <Control-e> {sqlitecon::End %W}
bind Sqlitecon <Return> {sqlitecon::Enter %W}
bind Sqlitecon <KP_Enter> {sqlitecon::Enter %W}
bind Sqlitecon <Up> {sqlitecon::Prior %W}
bind Sqlitecon <Control-p> {sqlitecon::Prior %W}
bind Sqlitecon <Down> {sqlitecon::Next %W}
bind Sqlitecon <Control-n> {sqlitecon::Next %W}
bind Sqlitecon <Control-k> {sqlitecon::EraseEOL %W}
bind Sqlitecon <<Cut>> {sqlitecon::Cut %W}
bind Sqlitecon <<Copy>> {sqlitecon::Copy %W}
bind Sqlitecon <<Paste>> {sqlitecon::Paste %W}
bind Sqlitecon <<Clear>> {sqlitecon::Clear %W}
# Insert a single character at the insertion cursor
#
proc sqlitecon::Insert {w a} {
$w insert insert $a
$w yview insert
}
# Move the cursor one character to the left
#
proc sqlitecon::Left {w} {
upvar #0 $w v
scan [$w index insert] %d.%d row col
if {$col>$v(plength)} {
$w mark set insert "insert -1c"
}
}
# Erase the character to the left of the cursor
#
proc sqlitecon::Backspace {w} {
upvar #0 $w v
scan [$w index insert] %d.%d row col
if {$col>$v(plength)} {
$w delete {insert -1c}
}
}
# Erase to the end of the line
#
proc sqlitecon::EraseEOL {w} {
upvar #0 $w v
scan [$w index insert] %d.%d row col
if {$col>=$v(plength)} {
$w delete insert {insert lineend}
}
}
# Move the cursor one character to the right
#
proc sqlitecon::Right {w} {
$w mark set insert "insert +1c"
}
# Erase the character to the right of the cursor
#
proc sqlitecon::Delete w {
$w delete insert
}
# Move the cursor to the beginning of the current line
#
proc sqlitecon::Home w {
upvar #0 $w v
scan [$w index insert] %d.%d row col
$w mark set insert $row.$v(plength)
}
# Move the cursor to the end of the current line
#
proc sqlitecon::End w {
$w mark set insert {insert lineend}
}
# Add a line to the history
#
proc sqlitecon::addHistory {w line} {
upvar #0 $w v
if {$v(historycnt)>0} {
set last [lindex $v(history) [expr $v(historycnt)-1]]
if {[string compare $last $line]} {
lappend v(history) $line
incr v(historycnt)
}
} else {
set v(history) [list $line]
set v(historycnt) 1
}
set v(current) $v(historycnt)
}
# Called when "Enter" is pressed. Do something with the line
# of text that was entered.
#
proc sqlitecon::Enter w {
upvar #0 $w v
scan [$w index insert] %d.%d row col
set start $row.$v(plength)
set line [$w get $start "$start lineend"]
$w insert end \n
$w mark set out end
if {$v(prior)==""} {
set cmd $line
} else {
set cmd $v(prior)\n$line
}
if {[string index $cmd 0]=="." || [$v(db) complete $cmd]} {
regsub -all {\n} [string trim $cmd] { } cmd2
addHistory $w $cmd2
set rc [catch {DoCommand $w $cmd} res]
if {![winfo exists $w]} return
if {$rc} {
$w insert end $res\n err
} elseif {[string length $res]>0} {
$w insert end $res\n ok
}
set v(prior) {}
$w insert end $v(prompt)
} else {
set v(prior) $cmd
regsub -all {[^ ]} $v(prompt) . x
$w insert end $x
}
$w mark set insert end
$w mark set out {insert linestart}
$w yview insert
}
# Execute a single SQL command. Pay special attention to control
# directives that begin with "."
#
# The return value is the text output from the command, properly
# formatted.
#
proc sqlitecon::DoCommand {w cmd} {
upvar #0 $w v
set mode $v(mode)
set header $v(header)
if {[regexp {^(\.[a-z]+)} $cmd all word]} {
if {$word==".mode"} {
regexp {^.[a-z]+ +([a-z]+)} $cmd all v(mode)
return {}
} elseif {$word==".exit"} {
destroy [winfo toplevel $w]
return {}
} elseif {$word==".header"} {
regexp {^.[a-z]+ +([a-z]+)} $cmd all v(header)
return {}
} elseif {$word==".tables"} {
set mode multicolumn
set cmd {SELECT name FROM sqlite_master WHERE type='table'
UNION ALL
SELECT name FROM sqlite_temp_master WHERE type='table'}
$v(db) eval {PRAGMA database_list} {
if {$name!="temp" && $name!="main"} {
append cmd "UNION ALL SELECT name FROM $name.sqlite_master\
WHERE type='table'"
}
}
append cmd { ORDER BY 1}
} elseif {$word==".fullschema"} {
set pattern %
regexp {^.[a-z]+ +([^ ]+)} $cmd all pattern
set mode list
set header 0
set cmd "SELECT sql FROM sqlite_master WHERE tbl_name LIKE '$pattern'
AND sql NOT NULL UNION ALL SELECT sql FROM sqlite_temp_master
WHERE tbl_name LIKE '$pattern' AND sql NOT NULL"
$v(db) eval {PRAGMA database_list} {
if {$name!="temp" && $name!="main"} {
append cmd " UNION ALL SELECT sql FROM $name.sqlite_master\
WHERE tbl_name LIKE '$pattern' AND sql NOT NULL"
}
}
} elseif {$word==".schema"} {
set pattern %
regexp {^.[a-z]+ +([^ ]+)} $cmd all pattern
set mode list
set header 0
set cmd "SELECT sql FROM sqlite_master WHERE name LIKE '$pattern'
AND sql NOT NULL UNION ALL SELECT sql FROM sqlite_temp_master
WHERE name LIKE '$pattern' AND sql NOT NULL"
$v(db) eval {PRAGMA database_list} {
if {$name!="temp" && $name!="main"} {
append cmd " UNION ALL SELECT sql FROM $name.sqlite_master\
WHERE name LIKE '$pattern' AND sql NOT NULL"
}
}
} else {
return \
".exit\n.mode line|list|column\n.schema ?TABLENAME?\n.tables"
}
}
set res {}
if {$mode=="list"} {
$v(db) eval $cmd x {
set sep {}
foreach col $x(*) {
append res $sep$x($col)
set sep |
}
append res \n
}
if {[info exists x(*)] && $header} {
set sep {}
set hdr {}
foreach col $x(*) {
append hdr $sep$col
set sep |
}
set res $hdr\n$res
}
} elseif {[string range $mode 0 2]=="col"} {
set y {}
$v(db) eval $cmd x {
foreach col $x(*) {
if {![info exists cw($col)] || $cw($col)<[string length $x($col)]} {
set cw($col) [string length $x($col)]
}
lappend y $x($col)
}
}
if {[info exists x(*)] && $header} {
set hdr {}
set ln {}
set dash ---------------------------------------------------------------
append dash ------------------------------------------------------------
foreach col $x(*) {
if {![info exists cw($col)] || $cw($col)<[string length $col]} {
set cw($col) [string length $col]
}
lappend hdr $col
lappend ln [string range $dash 1 $cw($col)]
}
set y [concat $hdr $ln $y]
}
if {[info exists x(*)]} {
set format {}
set arglist {}
set arglist2 {}
set i 0
foreach col $x(*) {
lappend arglist x$i
append arglist2 " \$x$i"
incr i
append format " %-$cw($col)s"
}
set format [string trimleft $format]\n
if {[llength $arglist]>0} {
foreach $arglist $y "append res \[format [list $format] $arglist2\]"
}
}
} elseif {$mode=="multicolumn"} {
set y [$v(db) eval $cmd]
set max 0
foreach e $y {
if {$max<[string length $e]} {set max [string length $e]}
}
set ncol [expr {int(80/($max+2))}]
if {$ncol<1} {set ncol 1}
set nelem [llength $y]
set nrow [expr {($nelem+$ncol-1)/$ncol}]
set format "%-${max}s"
for {set i 0} {$i<$nrow} {incr i} {
set j $i
while 1 {
append res [format $format [lindex $y $j]]
incr j $nrow
if {$j>=$nelem} break
append res { }
}
append res \n
}
} else {
$v(db) eval $cmd x {
foreach col $x(*) {append res "$col = $x($col)\n"}
append res \n
}
}
return [string trimright $res]
}
# Change the line to the previous line
#
proc sqlitecon::Prior w {
upvar #0 $w v
if {$v(current)<=0} return
incr v(current) -1
set line [lindex $v(history) $v(current)]
sqlitecon::SetLine $w $line
}
# Change the line to the next line
#
proc sqlitecon::Next w {
upvar #0 $w v
if {$v(current)>=$v(historycnt)} return
incr v(current) 1
set line [lindex $v(history) $v(current)]
sqlitecon::SetLine $w $line
}
# Change the contents of the entry line
#
proc sqlitecon::SetLine {w line} {
upvar #0 $w v
scan [$w index insert] %d.%d row col
set start $row.$v(plength)
$w delete $start end
$w insert end $line
$w mark set insert end
$w yview insert
}
# Called when the mouse button is pressed at position $x,$y on
# the console widget.
#
proc sqlitecon::Button1 {w x y} {
global tkPriv
upvar #0 $w v
set v(mouseMoved) 0
set v(pressX) $x
set p [sqlitecon::nearestBoundry $w $x $y]
scan [$w index insert] %d.%d ix iy
scan $p %d.%d px py
if {$px==$ix} {
$w mark set insert $p
}
$w mark set anchor $p
focus $w
}
# Find the boundry between characters that is nearest
# to $x,$y
#
proc sqlitecon::nearestBoundry {w x y} {
set p [$w index @$x,$y]
set bb [$w bbox $p]
if {![string compare $bb ""]} {return $p}
if {($x-[lindex $bb 0])<([lindex $bb 2]/2)} {return $p}
$w index "$p + 1 char"
}
# This routine extends the selection to the point specified by $x,$y
#
proc sqlitecon::SelectTo {w x y} {
upvar #0 $w v
set cur [sqlitecon::nearestBoundry $w $x $y]
if {[catch {$w index anchor}]} {
$w mark set anchor $cur
}
set anchor [$w index anchor]
if {[$w compare $cur != $anchor] || (abs($v(pressX) - $x) >= 3)} {
if {$v(mouseMoved)==0} {
$w tag remove sel 0.0 end
}
set v(mouseMoved) 1
}
if {[$w compare $cur < anchor]} {
set first $cur
set last anchor
} else {
set first anchor
set last $cur
}
if {$v(mouseMoved)} {
$w tag remove sel 0.0 $first
$w tag add sel $first $last
$w tag remove sel $last end
update idletasks
}
}
# Called whenever the mouse moves while button-1 is held down.
#
proc sqlitecon::B1Motion {w x y} {
upvar #0 $w v
set v(y) $y
set v(x) $x
sqlitecon::SelectTo $w $x $y
}
# Called whenever the mouse leaves the boundries of the widget
# while button 1 is held down.
#
proc sqlitecon::B1Leave {w x y} {
upvar #0 $w v
set v(y) $y
set v(x) $x
sqlitecon::motor $w
}
# This routine is called to automatically scroll the window when
# the mouse drags offscreen.
#
proc sqlitecon::motor w {
upvar #0 $w v
if {![winfo exists $w]} return
if {$v(y)>=[winfo height $w]} {
$w yview scroll 1 units
} elseif {$v(y)<0} {
$w yview scroll -1 units
} else {
return
}
sqlitecon::SelectTo $w $v(x) $v(y)
set v(timer) [after 50 sqlitecon::motor $w]
}
# This routine cancels the scrolling motor if it is active
#
proc sqlitecon::cancelMotor w {
upvar #0 $w v
catch {after cancel $v(timer)}
catch {unset v(timer)}
}
# Do a Copy operation on the stuff currently selected.
#
proc sqlitecon::Copy w {
if {![catch {set text [$w get sel.first sel.last]}]} {
clipboard clear -displayof $w
clipboard append -displayof $w $text
}
}
# Return 1 if the selection exists and is contained
# entirely on the input line. Return 2 if the selection
# exists but is not entirely on the input line. Return 0
# if the selection does not exist.
#
proc sqlitecon::canCut w {
set r [catch {
scan [$w index sel.first] %d.%d s1x s1y
scan [$w index sel.last] %d.%d s2x s2y
scan [$w index insert] %d.%d ix iy
}]
if {$r==1} {return 0}
if {$s1x==$ix && $s2x==$ix} {return 1}
return 2
}
# Do a Cut operation if possible. Cuts are only allowed
# if the current selection is entirely contained on the
# current input line.
#
proc sqlitecon::Cut w {
if {[sqlitecon::canCut $w]==1} {
sqlitecon::Copy $w
$w delete sel.first sel.last
}
}
# Do a paste opeation.
#
proc sqlitecon::Paste w {
if {[sqlitecon::canCut $w]==1} {
$w delete sel.first sel.last
}
if {[catch {selection get -displayof $w -selection CLIPBOARD} topaste]
&& [catch {selection get -displayof $w -selection PRIMARY} topaste]} {
return
}
if {[info exists ::$w]} {
set prior 0
foreach line [split $topaste \n] {
if {$prior} {
sqlitecon::Enter $w
update
}
set prior 1
$w insert insert $line
}
} else {
$w insert insert $topaste
}
}
# Enable or disable entries in the Edit menu
#
proc sqlitecon::EnableEditMenu w {
upvar #0 $w.t v
set m $v(editmenu)
if {$m=="" || ![winfo exists $m]} return
switch [sqlitecon::canCut $w.t] {
0 {
$m entryconf Copy -state disabled
$m entryconf Cut -state disabled
}
1 {
$m entryconf Copy -state normal
$m entryconf Cut -state normal
}
2 {
$m entryconf Copy -state normal
$m entryconf Cut -state disabled
}
}
}
# Prompt the user for the name of a writable file. Then write the
# entire contents of the console screen to that file.
#
proc sqlitecon::SaveFile w {
set types {
{{Text Files} {.txt}}
{{All Files} *}
}
set f [tk_getSaveFile -filetypes $types -title "Write Screen To..."]
if {$f!=""} {
if {[catch {open $f w} fd]} {
tk_messageBox -type ok -icon error -message $fd
} else {
puts $fd [string trimright [$w get 1.0 end] \n]
close $fd
}
}
}
# Erase everything from the console above the insertion line.
#
proc sqlitecon::Clear w {
$w delete 1.0 {insert linestart}
}
# An in-line editor for SQL
#
proc sqlitecon::_edit {origtxt {title {}}} {
for {set i 0} {[winfo exists .ed$i]} {incr i} continue
set w .ed$i
toplevel $w
wm protocol $w WM_DELETE_WINDOW "$w.b.can invoke"
wm title $w {Inline SQL Editor}
frame $w.b
pack $w.b -side bottom -fill x
button $w.b.can -text Cancel -width 6 -command [list set ::$w 0]
button $w.b.ok -text OK -width 6 -command [list set ::$w 1]
button $w.b.cut -text Cut -width 6 -command [list ::sqlitecon::Cut $w.t]
button $w.b.copy -text Copy -width 6 -command [list ::sqlitecon::Copy $w.t]
button $w.b.paste -text Paste -width 6 -command [list ::sqlitecon::Paste $w.t]
set ::$w {}
pack $w.b.cut $w.b.copy $w.b.paste $w.b.can $w.b.ok\
-side left -padx 5 -pady 5 -expand 1
if {$title!=""} {
label $w.title -text $title
pack $w.title -side top -padx 5 -pady 5
}
text $w.t -bg white -fg black -yscrollcommand [list $w.sb set]
pack $w.t -side left -fill both -expand 1
scrollbar $w.sb -orient vertical -command [list $w.t yview]
pack $w.sb -side left -fill y
$w.t insert end $origtxt
vwait ::$w
if {[set ::$w]} {
set txt [string trimright [$w.t get 1.0 end]]
} else {
set txt $origtxt
}
destroy $w
return $txt
}

View file

@ -0,0 +1,892 @@
<html>
<head>
<title>The Lemon Parser Generator</title>
</head>
<body bgcolor=white>
<h1 align=center>The Lemon Parser Generator</h1>
<p>Lemon is an LALR(1) parser generator for C or C++.
It does the same job as ``bison'' and ``yacc''.
But lemon is not another bison or yacc clone. It
uses a different grammar syntax which is designed to
reduce the number of coding errors. Lemon also uses a more
sophisticated parsing engine that is faster than yacc and
bison and which is both reentrant and thread-safe.
Furthermore, Lemon implements features that can be used
to eliminate resource leaks, making is suitable for use
in long-running programs such as graphical user interfaces
or embedded controllers.</p>
<p>This document is an introduction to the Lemon
parser generator.</p>
<h2>Theory of Operation</h2>
<p>The main goal of Lemon is to translate a context free grammar (CFG)
for a particular language into C code that implements a parser for
that language.
The program has two inputs:
<ul>
<li>The grammar specification.
<li>A parser template file.
</ul>
Typically, only the grammar specification is supplied by the programmer.
Lemon comes with a default parser template which works fine for most
applications. But the user is free to substitute a different parser
template if desired.</p>
<p>Depending on command-line options, Lemon will generate between
one and three files of outputs.
<ul>
<li>C code to implement the parser.
<li>A header file defining an integer ID for each terminal symbol.
<li>An information file that describes the states of the generated parser
automaton.
</ul>
By default, all three of these output files are generated.
The header file is suppressed if the ``-m'' command-line option is
used and the report file is omitted when ``-q'' is selected.</p>
<p>The grammar specification file uses a ``.y'' suffix, by convention.
In the examples used in this document, we'll assume the name of the
grammar file is ``gram.y''. A typical use of Lemon would be the
following command:
<pre>
lemon gram.y
</pre>
This command will generate three output files named ``gram.c'',
``gram.h'' and ``gram.out''.
The first is C code to implement the parser. The second
is the header file that defines numerical values for all
terminal symbols, and the last is the report that explains
the states used by the parser automaton.</p>
<h3>Command Line Options</h3>
<p>The behavior of Lemon can be modified using command-line options.
You can obtain a list of the available command-line options together
with a brief explanation of what each does by typing
<pre>
lemon -?
</pre>
As of this writing, the following command-line options are supported:
<ul>
<li><tt>-b</tt>
<li><tt>-c</tt>
<li><tt>-g</tt>
<li><tt>-m</tt>
<li><tt>-q</tt>
<li><tt>-s</tt>
<li><tt>-x</tt>
</ul>
The ``-b'' option reduces the amount of text in the report file by
printing only the basis of each parser state, rather than the full
configuration.
The ``-c'' option suppresses action table compression. Using -c
will make the parser a little larger and slower but it will detect
syntax errors sooner.
The ``-g'' option causes no output files to be generated at all.
Instead, the input grammar file is printed on standard output but
with all comments, actions and other extraneous text deleted. This
is a useful way to get a quick summary of a grammar.
The ``-m'' option causes the output C source file to be compatible
with the ``makeheaders'' program.
Makeheaders is a program that automatically generates header files
from C source code. When the ``-m'' option is used, the header
file is not output since the makeheaders program will take care
of generated all header files automatically.
The ``-q'' option suppresses the report file.
Using ``-s'' causes a brief summary of parser statistics to be
printed. Like this:
<pre>
Parser statistics: 74 terminals, 70 nonterminals, 179 rules
340 states, 2026 parser table entries, 0 conflicts
</pre>
Finally, the ``-x'' option causes Lemon to print its version number
and then stops without attempting to read the grammar or generate a parser.</p>
<h3>The Parser Interface</h3>
<p>Lemon doesn't generate a complete, working program. It only generates
a few subroutines that implement a parser. This section describes
the interface to those subroutines. It is up to the programmer to
call these subroutines in an appropriate way in order to produce a
complete system.</p>
<p>Before a program begins using a Lemon-generated parser, the program
must first create the parser.
A new parser is created as follows:
<pre>
void *pParser = ParseAlloc( malloc );
</pre>
The ParseAlloc() routine allocates and initializes a new parser and
returns a pointer to it.
The actual data structure used to represent a parser is opaque --
its internal structure is not visible or usable by the calling routine.
For this reason, the ParseAlloc() routine returns a pointer to void
rather than a pointer to some particular structure.
The sole argument to the ParseAlloc() routine is a pointer to the
subroutine used to allocate memory. Typically this means ``malloc()''.</p>
<p>After a program is finished using a parser, it can reclaim all
memory allocated by that parser by calling
<pre>
ParseFree(pParser, free);
</pre>
The first argument is the same pointer returned by ParseAlloc(). The
second argument is a pointer to the function used to release bulk
memory back to the system.</p>
<p>After a parser has been allocated using ParseAlloc(), the programmer
must supply the parser with a sequence of tokens (terminal symbols) to
be parsed. This is accomplished by calling the following function
once for each token:
<pre>
Parse(pParser, hTokenID, sTokenData, pArg);
</pre>
The first argument to the Parse() routine is the pointer returned by
ParseAlloc().
The second argument is a small positive integer that tells the parse the
type of the next token in the data stream.
There is one token type for each terminal symbol in the grammar.
The gram.h file generated by Lemon contains #define statements that
map symbolic terminal symbol names into appropriate integer values.
(A value of 0 for the second argument is a special flag to the
parser to indicate that the end of input has been reached.)
The third argument is the value of the given token. By default,
the type of the third argument is integer, but the grammar will
usually redefine this type to be some kind of structure.
Typically the second argument will be a broad category of tokens
such as ``identifier'' or ``number'' and the third argument will
be the name of the identifier or the value of the number.</p>
<p>The Parse() function may have either three or four arguments,
depending on the grammar. If the grammar specification file request
it, the Parse() function will have a fourth parameter that can be
of any type chosen by the programmer. The parser doesn't do anything
with this argument except to pass it through to action routines.
This is a convenient mechanism for passing state information down
to the action routines without having to use global variables.</p>
<p>A typical use of a Lemon parser might look something like the
following:
<pre>
01 ParseTree *ParseFile(const char *zFilename){
02 Tokenizer *pTokenizer;
03 void *pParser;
04 Token sToken;
05 int hTokenId;
06 ParserState sState;
07
08 pTokenizer = TokenizerCreate(zFilename);
09 pParser = ParseAlloc( malloc );
10 InitParserState(&sState);
11 while( GetNextToken(pTokenizer, &hTokenId, &sToken) ){
12 Parse(pParser, hTokenId, sToken, &sState);
13 }
14 Parse(pParser, 0, sToken, &sState);
15 ParseFree(pParser, free );
16 TokenizerFree(pTokenizer);
17 return sState.treeRoot;
18 }
</pre>
This example shows a user-written routine that parses a file of
text and returns a pointer to the parse tree.
(We've omitted all error-handling from this example to keep it
simple.)
We assume the existence of some kind of tokenizer which is created
using TokenizerCreate() on line 8 and deleted by TokenizerFree()
on line 16. The GetNextToken() function on line 11 retrieves the
next token from the input file and puts its type in the
integer variable hTokenId. The sToken variable is assumed to be
some kind of structure that contains details about each token,
such as its complete text, what line it occurs on, etc. </p>
<p>This example also assumes the existence of structure of type
ParserState that holds state information about a particular parse.
An instance of such a structure is created on line 6 and initialized
on line 10. A pointer to this structure is passed into the Parse()
routine as the optional 4th argument.
The action routine specified by the grammar for the parser can use
the ParserState structure to hold whatever information is useful and
appropriate. In the example, we note that the treeRoot field of
the ParserState structure is left pointing to the root of the parse
tree.</p>
<p>The core of this example as it relates to Lemon is as follows:
<pre>
ParseFile(){
pParser = ParseAlloc( malloc );
while( GetNextToken(pTokenizer,&hTokenId, &sToken) ){
Parse(pParser, hTokenId, sToken);
}
Parse(pParser, 0, sToken);
ParseFree(pParser, free );
}
</pre>
Basically, what a program has to do to use a Lemon-generated parser
is first create the parser, then send it lots of tokens obtained by
tokenizing an input source. When the end of input is reached, the
Parse() routine should be called one last time with a token type
of 0. This step is necessary to inform the parser that the end of
input has been reached. Finally, we reclaim memory used by the
parser by calling ParseFree().</p>
<p>There is one other interface routine that should be mentioned
before we move on.
The ParseTrace() function can be used to generate debugging output
from the parser. A prototype for this routine is as follows:
<pre>
ParseTrace(FILE *stream, char *zPrefix);
</pre>
After this routine is called, a short (one-line) message is written
to the designated output stream every time the parser changes states
or calls an action routine. Each such message is prefaced using
the text given by zPrefix. This debugging output can be turned off
by calling ParseTrace() again with a first argument of NULL (0).</p>
<h3>Differences With YACC and BISON</h3>
<p>Programmers who have previously used the yacc or bison parser
generator will notice several important differences between yacc and/or
bison and Lemon.
<ul>
<li>In yacc and bison, the parser calls the tokenizer. In Lemon,
the tokenizer calls the parser.
<li>Lemon uses no global variables. Yacc and bison use global variables
to pass information between the tokenizer and parser.
<li>Lemon allows multiple parsers to be running simultaneously. Yacc
and bison do not.
</ul>
These differences may cause some initial confusion for programmers
with prior yacc and bison experience.
But after years of experience using Lemon, I firmly
believe that the Lemon way of doing things is better.</p>
<h2>Input File Syntax</h2>
<p>The main purpose of the grammar specification file for Lemon is
to define the grammar for the parser. But the input file also
specifies additional information Lemon requires to do its job.
Most of the work in using Lemon is in writing an appropriate
grammar file.</p>
<p>The grammar file for lemon is, for the most part, free format.
It does not have sections or divisions like yacc or bison. Any
declaration can occur at any point in the file.
Lemon ignores whitespace (except where it is needed to separate
tokens) and it honors the same commenting conventions as C and C++.</p>
<h3>Terminals and Nonterminals</h3>
<p>A terminal symbol (token) is any string of alphanumeric
and underscore characters
that begins with an upper case letter.
A terminal can contain lower class letters after the first character,
but the usual convention is to make terminals all upper case.
A nonterminal, on the other hand, is any string of alphanumeric
and underscore characters than begins with a lower case letter.
Again, the usual convention is to make nonterminals use all lower
case letters.</p>
<p>In Lemon, terminal and nonterminal symbols do not need to
be declared or identified in a separate section of the grammar file.
Lemon is able to generate a list of all terminals and nonterminals
by examining the grammar rules, and it can always distinguish a
terminal from a nonterminal by checking the case of the first
character of the name.</p>
<p>Yacc and bison allow terminal symbols to have either alphanumeric
names or to be individual characters included in single quotes, like
this: ')' or '$'. Lemon does not allow this alternative form for
terminal symbols. With Lemon, all symbols, terminals and nonterminals,
must have alphanumeric names.</p>
<h3>Grammar Rules</h3>
<p>The main component of a Lemon grammar file is a sequence of grammar
rules.
Each grammar rule consists of a nonterminal symbol followed by
the special symbol ``::='' and then a list of terminals and/or nonterminals.
The rule is terminated by a period.
The list of terminals and nonterminals on the right-hand side of the
rule can be empty.
Rules can occur in any order, except that the left-hand side of the
first rule is assumed to be the start symbol for the grammar (unless
specified otherwise using the <tt>%start</tt> directive described below.)
A typical sequence of grammar rules might look something like this:
<pre>
expr ::= expr PLUS expr.
expr ::= expr TIMES expr.
expr ::= LPAREN expr RPAREN.
expr ::= VALUE.
</pre>
</p>
<p>There is one non-terminal in this example, ``expr'', and five
terminal symbols or tokens: ``PLUS'', ``TIMES'', ``LPAREN'',
``RPAREN'' and ``VALUE''.</p>
<p>Like yacc and bison, Lemon allows the grammar to specify a block
of C code that will be executed whenever a grammar rule is reduced
by the parser.
In Lemon, this action is specified by putting the C code (contained
within curly braces <tt>{...}</tt>) immediately after the
period that closes the rule.
For example:
<pre>
expr ::= expr PLUS expr. { printf("Doing an addition...\n"); }
</pre>
</p>
<p>In order to be useful, grammar actions must normally be linked to
their associated grammar rules.
In yacc and bison, this is accomplished by embedding a ``$$'' in the
action to stand for the value of the left-hand side of the rule and
symbols ``$1'', ``$2'', and so forth to stand for the value of
the terminal or nonterminal at position 1, 2 and so forth on the
right-hand side of the rule.
This idea is very powerful, but it is also very error-prone. The
single most common source of errors in a yacc or bison grammar is
to miscount the number of symbols on the right-hand side of a grammar
rule and say ``$7'' when you really mean ``$8''.</p>
<p>Lemon avoids the need to count grammar symbols by assigning symbolic
names to each symbol in a grammar rule and then using those symbolic
names in the action.
In yacc or bison, one would write this:
<pre>
expr -> expr PLUS expr { $$ = $1 + $3; };
</pre>
But in Lemon, the same rule becomes the following:
<pre>
expr(A) ::= expr(B) PLUS expr(C). { A = B+C; }
</pre>
In the Lemon rule, any symbol in parentheses after a grammar rule
symbol becomes a place holder for that symbol in the grammar rule.
This place holder can then be used in the associated C action to
stand for the value of that symbol.<p>
<p>The Lemon notation for linking a grammar rule with its reduce
action is superior to yacc/bison on several counts.
First, as mentioned above, the Lemon method avoids the need to
count grammar symbols.
Secondly, if a terminal or nonterminal in a Lemon grammar rule
includes a linking symbol in parentheses but that linking symbol
is not actually used in the reduce action, then an error message
is generated.
For example, the rule
<pre>
expr(A) ::= expr(B) PLUS expr(C). { A = B; }
</pre>
will generate an error because the linking symbol ``C'' is used
in the grammar rule but not in the reduce action.</p>
<p>The Lemon notation for linking grammar rules to reduce actions
also facilitates the use of destructors for reclaiming memory
allocated by the values of terminals and nonterminals on the
right-hand side of a rule.</p>
<h3>Precedence Rules</h3>
<p>Lemon resolves parsing ambiguities in exactly the same way as
yacc and bison. A shift-reduce conflict is resolved in favor
of the shift, and a reduce-reduce conflict is resolved by reducing
whichever rule comes first in the grammar file.</p>
<p>Just like in
yacc and bison, Lemon allows a measure of control
over the resolution of paring conflicts using precedence rules.
A precedence value can be assigned to any terminal symbol
using the %left, %right or %nonassoc directives. Terminal symbols
mentioned in earlier directives have a lower precedence that
terminal symbols mentioned in later directives. For example:</p>
<p><pre>
%left AND.
%left OR.
%nonassoc EQ NE GT GE LT LE.
%left PLUS MINUS.
%left TIMES DIVIDE MOD.
%right EXP NOT.
</pre></p>
<p>In the preceding sequence of directives, the AND operator is
defined to have the lowest precedence. The OR operator is one
precedence level higher. And so forth. Hence, the grammar would
attempt to group the ambiguous expression
<pre>
a AND b OR c
</pre>
like this
<pre>
a AND (b OR c).
</pre>
The associativity (left, right or nonassoc) is used to determine
the grouping when the precedence is the same. AND is left-associative
in our example, so
<pre>
a AND b AND c
</pre>
is parsed like this
<pre>
(a AND b) AND c.
</pre>
The EXP operator is right-associative, though, so
<pre>
a EXP b EXP c
</pre>
is parsed like this
<pre>
a EXP (b EXP c).
</pre>
The nonassoc precedence is used for non-associative operators.
So
<pre>
a EQ b EQ c
</pre>
is an error.</p>
<p>The precedence of non-terminals is transferred to rules as follows:
The precedence of a grammar rule is equal to the precedence of the
left-most terminal symbol in the rule for which a precedence is
defined. This is normally what you want, but in those cases where
you want to precedence of a grammar rule to be something different,
you can specify an alternative precedence symbol by putting the
symbol in square braces after the period at the end of the rule and
before any C-code. For example:</p>
<p><pre>
expr = MINUS expr. [NOT]
</pre></p>
<p>This rule has a precedence equal to that of the NOT symbol, not the
MINUS symbol as would have been the case by default.</p>
<p>With the knowledge of how precedence is assigned to terminal
symbols and individual
grammar rules, we can now explain precisely how parsing conflicts
are resolved in Lemon. Shift-reduce conflicts are resolved
as follows:
<ul>
<li> If either the token to be shifted or the rule to be reduced
lacks precedence information, then resolve in favor of the
shift, but report a parsing conflict.
<li> If the precedence of the token to be shifted is greater than
the precedence of the rule to reduce, then resolve in favor
of the shift. No parsing conflict is reported.
<li> If the precedence of the token it be shifted is less than the
precedence of the rule to reduce, then resolve in favor of the
reduce action. No parsing conflict is reported.
<li> If the precedences are the same and the shift token is
right-associative, then resolve in favor of the shift.
No parsing conflict is reported.
<li> If the precedences are the same the the shift token is
left-associative, then resolve in favor of the reduce.
No parsing conflict is reported.
<li> Otherwise, resolve the conflict by doing the shift and
report the parsing conflict.
</ul>
Reduce-reduce conflicts are resolved this way:
<ul>
<li> If either reduce rule
lacks precedence information, then resolve in favor of the
rule that appears first in the grammar and report a parsing
conflict.
<li> If both rules have precedence and the precedence is different
then resolve the dispute in favor of the rule with the highest
precedence and do not report a conflict.
<li> Otherwise, resolve the conflict by reducing by the rule that
appears first in the grammar and report a parsing conflict.
</ul>
<h3>Special Directives</h3>
<p>The input grammar to Lemon consists of grammar rules and special
directives. We've described all the grammar rules, so now we'll
talk about the special directives.</p>
<p>Directives in lemon can occur in any order. You can put them before
the grammar rules, or after the grammar rules, or in the mist of the
grammar rules. It doesn't matter. The relative order of
directives used to assign precedence to terminals is important, but
other than that, the order of directives in Lemon is arbitrary.</p>
<p>Lemon supports the following special directives:
<ul>
<li><tt>%code</tt>
<li><tt>%default_destructor</tt>
<li><tt>%default_type</tt>
<li><tt>%destructor</tt>
<li><tt>%extra_argument</tt>
<li><tt>%include</tt>
<li><tt>%left</tt>
<li><tt>%name</tt>
<li><tt>%nonassoc</tt>
<li><tt>%parse_accept</tt>
<li><tt>%parse_failure </tt>
<li><tt>%right</tt>
<li><tt>%stack_overflow</tt>
<li><tt>%stack_size</tt>
<li><tt>%start_symbol</tt>
<li><tt>%syntax_error</tt>
<li><tt>%token_destructor</tt>
<li><tt>%token_prefix</tt>
<li><tt>%token_type</tt>
<li><tt>%type</tt>
</ul>
Each of these directives will be described separately in the
following sections:</p>
<h4>The <tt>%code</tt> directive</h4>
<p>The %code directive is used to specify addition C/C++ code that
is added to the end of the main output file. This is similar to
the %include directive except that %include is inserted at the
beginning of the main output file.</p>
<p>%code is typically used to include some action routines or perhaps
a tokenizer as part of the output file.</p>
<h4>The <tt>%default_destructor</tt> directive</h4>
<p>The %default_destructor directive specifies a destructor to
use for non-terminals that do not have their own destructor
specified by a separate %destructor directive. See the documentation
on the %destructor directive below for additional information.</p>
<p>In some grammers, many different non-terminal symbols have the
same datatype and hence the same destructor. This directive is
a convenience way to specify the same destructor for all those
non-terminals using a single statement.</p>
<h4>The <tt>%default_type</tt> directive</h4>
<p>The %default_type directive specifies the datatype of non-terminal
symbols that do no have their own datatype defined using a separate
%type directive. See the documentation on %type below for addition
information.</p>
<h4>The <tt>%destructor</tt> directive</h4>
<p>The %destructor directive is used to specify a destructor for
a non-terminal symbol.
(See also the %token_destructor directive which is used to
specify a destructor for terminal symbols.)</p>
<p>A non-terminal's destructor is called to dispose of the
non-terminal's value whenever the non-terminal is popped from
the stack. This includes all of the following circumstances:
<ul>
<li> When a rule reduces and the value of a non-terminal on
the right-hand side is not linked to C code.
<li> When the stack is popped during error processing.
<li> When the ParseFree() function runs.
</ul>
The destructor can do whatever it wants with the value of
the non-terminal, but its design is to deallocate memory
or other resources held by that non-terminal.</p>
<p>Consider an example:
<pre>
%type nt {void*}
%destructor nt { free($$); }
nt(A) ::= ID NUM. { A = malloc( 100 ); }
</pre>
This example is a bit contrived but it serves to illustrate how
destructors work. The example shows a non-terminal named
``nt'' that holds values of type ``void*''. When the rule for
an ``nt'' reduces, it sets the value of the non-terminal to
space obtained from malloc(). Later, when the nt non-terminal
is popped from the stack, the destructor will fire and call
free() on this malloced space, thus avoiding a memory leak.
(Note that the symbol ``$$'' in the destructor code is replaced
by the value of the non-terminal.)</p>
<p>It is important to note that the value of a non-terminal is passed
to the destructor whenever the non-terminal is removed from the
stack, unless the non-terminal is used in a C-code action. If
the non-terminal is used by C-code, then it is assumed that the
C-code will take care of destroying it if it should really
be destroyed. More commonly, the value is used to build some
larger structure and we don't want to destroy it, which is why
the destructor is not called in this circumstance.</p>
<p>By appropriate use of destructors, it is possible to
build a parser using Lemon that can be used within a long-running
program, such as a GUI, that will not leak memory or other resources.
To do the same using yacc or bison is much more difficult.</p>
<h4>The <tt>%extra_argument</tt> directive</h4>
The %extra_argument directive instructs Lemon to add a 4th parameter
to the parameter list of the Parse() function it generates. Lemon
doesn't do anything itself with this extra argument, but it does
make the argument available to C-code action routines, destructors,
and so forth. For example, if the grammar file contains:</p>
<p><pre>
%extra_argument { MyStruct *pAbc }
</pre></p>
<p>Then the Parse() function generated will have an 4th parameter
of type ``MyStruct*'' and all action routines will have access to
a variable named ``pAbc'' that is the value of the 4th parameter
in the most recent call to Parse().</p>
<h4>The <tt>%include</tt> directive</h4>
<p>The %include directive specifies C code that is included at the
top of the generated parser. You can include any text you want --
the Lemon parser generator copies it blindly. If you have multiple
%include directives in your grammar file the value of the last
%include directive overwrites all the others.</p.
<p>The %include directive is very handy for getting some extra #include
preprocessor statements at the beginning of the generated parser.
For example:</p>
<p><pre>
%include {#include &lt;unistd.h&gt;}
</pre></p>
<p>This might be needed, for example, if some of the C actions in the
grammar call functions that are prototyed in unistd.h.</p>
<h4>The <tt>%left</tt> directive</h4>
The %left directive is used (along with the %right and
%nonassoc directives) to declare precedences of terminal
symbols. Every terminal symbol whose name appears after
a %left directive but before the next period (``.'') is
given the same left-associative precedence value. Subsequent
%left directives have higher precedence. For example:</p>
<p><pre>
%left AND.
%left OR.
%nonassoc EQ NE GT GE LT LE.
%left PLUS MINUS.
%left TIMES DIVIDE MOD.
%right EXP NOT.
</pre></p>
<p>Note the period that terminates each %left, %right or %nonassoc
directive.</p>
<p>LALR(1) grammars can get into a situation where they require
a large amount of stack space if you make heavy use or right-associative
operators. For this reason, it is recommended that you use %left
rather than %right whenever possible.</p>
<h4>The <tt>%name</tt> directive</h4>
<p>By default, the functions generated by Lemon all begin with the
five-character string ``Parse''. You can change this string to something
different using the %name directive. For instance:</p>
<p><pre>
%name Abcde
</pre></p>
<p>Putting this directive in the grammar file will cause Lemon to generate
functions named
<ul>
<li> AbcdeAlloc(),
<li> AbcdeFree(),
<li> AbcdeTrace(), and
<li> Abcde().
</ul>
The %name directive allows you to generator two or more different
parsers and link them all into the same executable.
</p>
<h4>The <tt>%nonassoc</tt> directive</h4>
<p>This directive is used to assign non-associative precedence to
one or more terminal symbols. See the section on precedence rules
or on the %left directive for additional information.</p>
<h4>The <tt>%parse_accept</tt> directive</h4>
<p>The %parse_accept directive specifies a block of C code that is
executed whenever the parser accepts its input string. To ``accept''
an input string means that the parser was able to process all tokens
without error.</p>
<p>For example:</p>
<p><pre>
%parse_accept {
printf("parsing complete!\n");
}
</pre></p>
<h4>The <tt>%parse_failure</tt> directive</h4>
<p>The %parse_failure directive specifies a block of C code that
is executed whenever the parser fails complete. This code is not
executed until the parser has tried and failed to resolve an input
error using is usual error recovery strategy. The routine is
only invoked when parsing is unable to continue.</p>
<p><pre>
%parse_failure {
fprintf(stderr,"Giving up. Parser is hopelessly lost...\n");
}
</pre></p>
<h4>The <tt>%right</tt> directive</h4>
<p>This directive is used to assign right-associative precedence to
one or more terminal symbols. See the section on precedence rules
or on the %left directive for additional information.</p>
<h4>The <tt>%stack_overflow</tt> directive</h4>
<p>The %stack_overflow directive specifies a block of C code that
is executed if the parser's internal stack ever overflows. Typically
this just prints an error message. After a stack overflow, the parser
will be unable to continue and must be reset.</p>
<p><pre>
%stack_overflow {
fprintf(stderr,"Giving up. Parser stack overflow\n");
}
</pre></p>
<p>You can help prevent parser stack overflows by avoiding the use
of right recursion and right-precedence operators in your grammar.
Use left recursion and and left-precedence operators instead, to
encourage rules to reduce sooner and keep the stack size down.
For example, do rules like this:
<pre>
list ::= list element. // left-recursion. Good!
list ::= .
</pre>
Not like this:
<pre>
list ::= element list. // right-recursion. Bad!
list ::= .
</pre>
<h4>The <tt>%stack_size</tt> directive</h4>
<p>If stack overflow is a problem and you can't resolve the trouble
by using left-recursion, then you might want to increase the size
of the parser's stack using this directive. Put an positive integer
after the %stack_size directive and Lemon will generate a parse
with a stack of the requested size. The default value is 100.</p>
<p><pre>
%stack_size 2000
</pre></p>
<h4>The <tt>%start_symbol</tt> directive</h4>
<p>By default, the start-symbol for the grammar that Lemon generates
is the first non-terminal that appears in the grammar file. But you
can choose a different start-symbol using the %start_symbol directive.</p>
<p><pre>
%start_symbol prog
</pre></p>
<h4>The <tt>%token_destructor</tt> directive</h4>
<p>The %destructor directive assigns a destructor to a non-terminal
symbol. (See the description of the %destructor directive above.)
This directive does the same thing for all terminal symbols.</p>
<p>Unlike non-terminal symbols which may each have a different data type
for their values, terminals all use the same data type (defined by
the %token_type directive) and so they use a common destructor. Other
than that, the token destructor works just like the non-terminal
destructors.</p>
<h4>The <tt>%token_prefix</tt> directive</h4>
<p>Lemon generates #defines that assign small integer constants
to each terminal symbol in the grammar. If desired, Lemon will
add a prefix specified by this directive
to each of the #defines it generates.
So if the default output of Lemon looked like this:
<pre>
#define AND 1
#define MINUS 2
#define OR 3
#define PLUS 4
</pre>
You can insert a statement into the grammar like this:
<pre>
%token_prefix TOKEN_
</pre>
to cause Lemon to produce these symbols instead:
<pre>
#define TOKEN_AND 1
#define TOKEN_MINUS 2
#define TOKEN_OR 3
#define TOKEN_PLUS 4
</pre>
<h4>The <tt>%token_type</tt> and <tt>%type</tt> directives</h4>
<p>These directives are used to specify the data types for values
on the parser's stack associated with terminal and non-terminal
symbols. The values of all terminal symbols must be of the same
type. This turns out to be the same data type as the 3rd parameter
to the Parse() function generated by Lemon. Typically, you will
make the value of a terminal symbol by a pointer to some kind of
token structure. Like this:</p>
<p><pre>
%token_type {Token*}
</pre></p>
<p>If the data type of terminals is not specified, the default value
is ``int''.</p>
<p>Non-terminal symbols can each have their own data types. Typically
the data type of a non-terminal is a pointer to the root of a parse-tree
structure that contains all information about that non-terminal.
For example:</p>
<p><pre>
%type expr {Expr*}
</pre></p>
<p>Each entry on the parser's stack is actually a union containing
instances of all data types for every non-terminal and terminal symbol.
Lemon will automatically use the correct element of this union depending
on what the corresponding non-terminal or terminal symbol is. But
the grammar designer should keep in mind that the size of the union
will be the size of its largest element. So if you have a single
non-terminal whose data type requires 1K of storage, then your 100
entry parser stack will require 100K of heap space. If you are willing
and able to pay that price, fine. You just need to know.</p>
<h3>Error Processing</h3>
<p>After extensive experimentation over several years, it has been
discovered that the error recovery strategy used by yacc is about
as good as it gets. And so that is what Lemon uses.</p>
<p>When a Lemon-generated parser encounters a syntax error, it
first invokes the code specified by the %syntax_error directive, if
any. It then enters its error recovery strategy. The error recovery
strategy is to begin popping the parsers stack until it enters a
state where it is permitted to shift a special non-terminal symbol
named ``error''. It then shifts this non-terminal and continues
parsing. But the %syntax_error routine will not be called again
until at least three new tokens have been successfully shifted.</p>
<p>If the parser pops its stack until the stack is empty, and it still
is unable to shift the error symbol, then the %parse_failed routine
is invoked and the parser resets itself to its start state, ready
to begin parsing a new file. This is what will happen at the very
first syntax error, of course, if there are no instances of the
``error'' non-terminal in your grammar.</p>
</body>
</html>

View file

@ -0,0 +1,121 @@
An SQLite (version 1.0) database was used in a large military application
where the database contained 105 tables and indices. The following is
a breakdown on the sizes of keys and data within these tables and indices:
Entries: 967089
Size: 45896104
Avg Size: 48
Key Size: 11112265
Avg Key Size: 12
Max Key Size: 99
0..8 263 0%
9..12 5560 0%
13..16 71394 7%
17..24 180717 26%
25..32 215442 48%
33..40 151118 64%
41..48 77479 72%
49..56 13983 74%
57..64 14481 75%
65..80 41342 79%
81..96 127098 92%
97..112 38054 96%
113..128 14197 98%
129..144 8208 99%
145..160 3326 99%
161..176 1242 99%
177..192 604 99%
193..208 222 99%
209..224 213 99%
225..240 132 99%
241..256 58 99%
257..288 515 99%
289..320 64 99%
321..352 39 99%
353..384 44 99%
385..416 25 99%
417..448 24 99%
449..480 26 99%
481..512 27 99%
513..1024 470 99%
1025..2048 396 99%
2049..4096 187 99%
4097..8192 78 99%
8193..16384 35 99%
16385..32768 17 99%
32769..65536 6 99%
65537..65541 3 100%
If the indices are omitted, the statistics for the 49 tables
become the following:
Entries: 451103
Size: 30930282
Avg Size: 69
Key Size: 1804412
Avg Key Size: 4
Max Key Size: 4
0..24 89 0%
25..32 9417 2%
33..40 119162 28%
41..48 68710 43%
49..56 9539 45%
57..64 12435 48%
65..80 38650 57%
81..96 126877 85%
97..112 38030 93%
113..128 14183 96%
129..144 7668 98%
145..160 3302 99%
161..176 1238 99%
177..192 597 99%
193..208 217 99%
209..224 211 99%
225..240 130 99%
241..256 57 99%
257..288 100 99%
289..320 62 99%
321..352 34 99%
353..384 43 99%
385..416 24 99%
417..448 24 99%
449..480 25 99%
481..512 27 99%
513..1024 153 99%
1025..2048 92 99%
2049..4096 7 100%
The 56 indices have these statistics:
Entries: 512422
Size: 14879828
Avg Size: 30
Key Size: 9253204
Avg Key Size: 19
Max Key Size: 99
0..8 246 0%
9..12 5486 1%
13..16 70717 14%
17..24 178246 49%
25..32 205722 89%
33..40 31951 96%
41..48 8768 97%
49..56 4444 98%
57..64 2046 99%
65..80 2691 99%
81..96 202 99%
97..112 11 99%
113..144 527 99%
145..160 20 99%
161..288 406 99%
289..1024 316 99%
1025..2048 304 99%
2049..4096 180 99%
4097..8192 78 99%
8193..16384 35 99%
16385..32768 17 99%
32769..65536 6 99%
65537..65541 3 100%

BIN
client/src/thirdparty/sqlite-3.4.2/ex1 vendored Normal file

Binary file not shown.

View file

@ -0,0 +1,4 @@
cls
gpp example1.cpp libsqlite3.a -o example1.exe
pause
example1.exe

View file

@ -0,0 +1,4 @@
cls
gpp example2.cpp libsqlite3.a -o example2.exe
pause
example2.exe

View file

@ -0,0 +1,4 @@
cls
gcc example3.c libsqlite3.a -o example3.exe
pause
example3.exe

View file

@ -0,0 +1,36 @@
#include <iostream>
#include <stdlib.h>
#include "sqlite3.h"
sqlite3 *database;
using namespace std;
int Callback( void *Pointer, int argc, char **argv, char **columnNames)
// (pointer to database,number of columns retrieved,
// array of pointers to the strings in the fields,column name)
{
cout<< *argv <<endl; // prints all entries in column "entry"
return 0;
}
int main(int argc, char *argv[])
{
// open database
int resultcode = sqlite3_open( "mydatabase" , &database);
//create table and insert entries
sqlite3_exec( database , "create table test(number integer , entry varchar(50) , primary key(number));" , NULL , NULL , NULL);
sqlite3_exec( database , "insert into test (number , entry) values (1 , 'Testentry 0');" , NULL , NULL , NULL);
sqlite3_exec( database , "insert into test (number , entry) values (2 , 'Testentry 1');" , NULL , NULL , NULL);
sqlite3_exec( database , "insert into test (number , entry) values (3 , 'Testentry 2');" , NULL , NULL , NULL);
// query database: retrieve all fields in column "entry":
sqlite3_exec( database , "select entry from test;" , Callback , NULL , NULL);
// close database again
sqlite3_close(database);
return 0;
}

View file

@ -0,0 +1,71 @@
#include <stdio.h>
#include "sqlite3.h"
// This is the callback function to display the select data in the table
static int callback(void *NotUsed, int argc, char **argv, char **azColName)
{
int i;
for(i=0; i<argc; i++)
{
printf("%s = %s\n", azColName[i], argv[i] ? argv[i] : "NULL");
}
printf("\n");
return 0;
}
int main(int argc, char* argv[])
{
sqlite3 *db; // sqlite3 db struct
char *zErrMsg = 0;
int rc;
// Open the test.db file
rc = sqlite3_open("ex2.db", &db);
if( rc ){
// failed
fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db));
}
else
{
// success
fprintf(stderr, "Opened database successfully\n\n");
}
const char *pSQL[6];
// Create a new myTable in database
pSQL[0] = "create table myTable (FirstName varchar(30), LastName varchar(30), Age smallint)";
// Insert first data item into myTable
pSQL[1] = "insert into myTable (FirstName, LastName, Age) values ('Washington', 'George', 22)";
// Insert second data item into myTable
pSQL[2] = "insert into myTable (FirstName, LastName, Age) values ('Lincoln', 'Abraham', 33)";
// Select all data in myTable
pSQL[3] = "select * from myTable";
// Remove all data in myTable
pSQL[4] = "delete from myTable";
// Drop the table from database
pSQL[5] = "drop table myTable";
// execute all the sql statement
for(int i = 0; i < 6; i++)
{
rc = sqlite3_exec(db, pSQL[i], callback, 0, &zErrMsg);
if( rc!=SQLITE_OK ){
fprintf(stderr, "SQL error: %s\n", zErrMsg);
sqlite3_free(zErrMsg);
break;
}
}
sqlite3_close(db);
return 0;
}

View file

@ -0,0 +1,64 @@
#include <stdio.h>
#include <stdlib.h>
#include "sqlite3.h"
int main(void) {
sqlite3 *db;
sqlite3_stmt *res;
int rc = 0;
int rec_count = 0;
const char *errMSG;
const char *tail;
rc = sqlite3_open("ex3.db", &db);
if (rc) {
puts("Can not open database");
exit(0);
}
// Create a new table called people in database
rc = sqlite3_exec(db,"create table people (id integer, firstname varchar(20), \
lastname varchar(20), phonenumber char(10))", 0, 0, 0);
//rc = sqlite3_exec(db,"delete from people", 0, 0, 0); //erase previous inserts
rc = sqlite3_exec(db,"insert into people (id, firstname, lastname, phonenumber) \
values (1, 'Fred', 'Flintstone', '5055551234');", NULL , NULL , NULL);
rc = sqlite3_exec(db,"insert into people (id, firstname, lastname, phonenumber) \
values (2, 'Wilma', 'Flintstone', '5055551234');" , NULL , NULL , NULL);
rc = sqlite3_exec(db,"insert into people (id, firstname, lastname, phonenumber) \
values (3, 'Barny', 'Rubble', '5055554321');" , NULL , NULL , NULL);
rc = sqlite3_exec(db,"update people set phonenumber=\'5055559999\' where id=3", 0, 0, 0);
rc = sqlite3_prepare_v2(db,"select lastname,firstname,phonenumber,id from people order by id",
1000, &res, &tail);
if (rc != SQLITE_OK) {
puts("We did not get any data!");
exit(0);
}
puts("==========================");
while (sqlite3_step(res) == SQLITE_ROW) {
printf("%s|", sqlite3_column_text(res, 0));
printf("%s|", sqlite3_column_text(res, 1));
printf("%s|", sqlite3_column_text(res, 2));
printf("%u\n", sqlite3_column_int(res, 3));
rec_count++;
}
puts("==========================");
printf("We received %d records.\n", rec_count);
sqlite3_finalize(res);
sqlite3_close(db);
return 0;
}

Binary file not shown.

View file

@ -0,0 +1,14 @@
This directory contains three examples how to use SQLite with DJGPP.
I found these examples on the net and do not claim copyright on them.
Compile these with the three batch files included c1-c3.bat.
You can also copy libsqlite3.a into djgpp\lib and sqlite3.h into djgpp\inc.
Links to tutorials:
http://zetcode.com/db/sqlite/
http://souptonuts.sourceforge.net/readme_sqlite_tutorial.html
http://www.yolinux.com/TUTORIALS/SQLite.html
20th Feb. 2013 Georg

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,2 @@
Version loadable extensions to SQLite are found in subfolders
of this folder.

View file

@ -0,0 +1,15 @@
/README.txt/1.1/Tue Aug 22 14:45:37 2006//
/fts1.h/1.2/Wed Sep 13 18:40:26 2006//
/fts1_hash.c/1.1/Thu Aug 31 15:07:15 2006//
/fts1_hash.h/1.1/Thu Aug 31 15:07:15 2006//
/fts1_porter.c/1.6/Fri Nov 30 01:28:10 2007//
/fts1_tokenizer.h/1.4/Sun Oct 1 18:41:20 2006//
/fts1_tokenizer1.c/1.8/Fri Nov 30 01:28:10 2007//
/fts1.c/1.55/Thu Dec 13 21:45:43 2007//
/ft_hash.c/1.1/Wed Aug 23 23:58:50 2006//
/ft_hash.h/1.1/Wed Aug 23 23:58:50 2006//
/fulltext.c/1.3/Mon Aug 28 23:46:02 2006//
/fulltext.h/1.1/Wed Aug 23 23:58:50 2006//
/simple_tokenizer.c/1.3/Wed Aug 30 21:40:30 2006//
/tokenizer.h/1.1/Wed Aug 23 23:58:50 2006//
D

View file

@ -0,0 +1 @@
sqlite/ext/fts1

View file

@ -0,0 +1 @@
:pserver:drh@sqlite.org:/sqlite

View file

@ -0,0 +1,2 @@
This folder contains source code to the first full-text search
extension for SQLite.

View file

@ -0,0 +1,404 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "ft_hash.h"
void *malloc_and_zero(int n){
void *p = malloc(n);
if( p ){
memset(p, 0, n);
}
return p;
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants HASH_INT, HASH_POINTER,
** HASH_BINARY, or HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer. CopyKey only makes
** sense for HASH_STRING and HASH_BINARY and is ignored
** for other key classes.
*/
void HashInit(Hash *pNew, int keyClass, int copyKey){
assert( pNew!=0 );
assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY );
pNew->keyClass = keyClass;
#if 0
if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0;
#endif
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
pNew->xMalloc = malloc_and_zero;
pNew->xFree = free;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void HashClear(Hash *pH){
HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree(elem);
elem = next_elem;
}
pH->count = 0;
}
#if 0 /* NOT USED */
/*
** Hash and comparison functions when the mode is HASH_INT
*/
static int intHash(const void *pKey, int nKey){
return nKey ^ (nKey<<8) ^ (nKey>>8);
}
static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){
return n2 - n1;
}
#endif
#if 0 /* NOT USED */
/*
** Hash and comparison functions when the mode is HASH_POINTER
*/
static int ptrHash(const void *pKey, int nKey){
uptr x = Addr(pKey);
return x ^ (x<<8) ^ (x>>8);
}
static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( pKey1==pKey2 ) return 0;
if( pKey1<pKey2 ) return -1;
return 1;
}
#endif
/*
** Hash and comparison functions when the mode is HASH_STRING
*/
static int strHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
}
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is HASH_BINARY
*/
static int binHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "hashFunction". The function takes a
** single parameter "keyClass". The return value of hashFunction()
** is a pointer to another function. Specifically, the return value
** of hashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*hashFunction(int keyClass))(const void*,int){
#if 0 /* HASH_INT and HASH_POINTER are never used */
switch( keyClass ){
case HASH_INT: return &intHash;
case HASH_POINTER: return &ptrHash;
case HASH_STRING: return &strHash;
case HASH_BINARY: return &binHash;;
default: break;
}
return 0;
#else
if( keyClass==HASH_STRING ){
return &strHash;
}else{
assert( keyClass==HASH_BINARY );
return &binHash;
}
#endif
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
#if 0 /* HASH_INT and HASH_POINTER are never used */
switch( keyClass ){
case HASH_INT: return &intCompare;
case HASH_POINTER: return &ptrCompare;
case HASH_STRING: return &strCompare;
case HASH_BINARY: return &binCompare;
default: break;
}
return 0;
#else
if( keyClass==HASH_STRING ){
return &strCompare;
}else{
assert( keyClass==HASH_BINARY );
return &binCompare;
}
#endif
}
/* Link an element into the hash table
*/
static void insertElement(
Hash *pH, /* The complete hash table */
struct _ht *pEntry, /* The entry into which pNew is inserted */
HashElem *pNew /* The element to be inserted */
){
HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
*/
static void rehash(Hash *pH, int new_size){
struct _ht *new_ht; /* The new hash table */
HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) );
if( new_ht==0 ) return;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = hashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
insertElement(pH, &new_ht[h], elem);
}
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static HashElem *findElementGivenHash(
const Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = compareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void removeElementGivenHash(
Hash *pH, /* The pH containing "elem" */
HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
HashClear(pH);
}
}
/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *HashFind(const Hash *pH, const void *pKey, int nKey){
int h; /* A hash on key */
HashElem *elem; /* The element that matches key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
return elem ? elem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
HashElem *elem; /* Used to loop thru the element list */
HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = findElementGivenHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
removeElementGivenHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = pH->xMalloc( nKey );
if( new_elem->pKey==0 ){
pH->xFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
if( pH->htsize==0 ){
rehash(pH,8);
if( pH->htsize==0 ){
pH->count = 0;
pH->xFree(new_elem);
return data;
}
}
if( pH->count > pH->htsize ){
rehash(pH,pH->htsize*2);
}
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
insertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}

View file

@ -0,0 +1,111 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implemenation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _HASH_H_
#define _HASH_H_
/* Forward declarations of structures. */
typedef struct Hash Hash;
typedef struct HashElem HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
HashElem *first; /* The first element of the array */
void *(*xMalloc)(int); /* malloc() function to use */
void (*xFree)(void *); /* free() function to use */
int htsize; /* Number of buckets in the hash table */
struct _ht { /* the hash table */
int count; /* Number of entries with this hash */
HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct HashElem {
HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 4 different modes of operation for a hash table:
**
** HASH_INT nKey is used as the key and pKey is ignored.
**
** HASH_POINTER pKey is used as the key and nKey is ignored.
**
** HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made for HASH_STRING and HASH_BINARY
** if the copyKey parameter to HashInit is 1.
*/
/* #define HASH_INT 1 // NOT USED */
/* #define HASH_POINTER 2 // NOT USED */
#define HASH_STRING 3
#define HASH_BINARY 4
/*
** Access routines. To delete, insert a NULL pointer.
*/
void HashInit(Hash*, int keytype, int copyKey);
void *HashInsert(Hash*, const void *pKey, int nKey, void *pData);
void *HashFind(const Hash*, const void *pKey, int nKey);
void HashClear(Hash*);
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** Hash h;
** HashElem *p;
** ...
** for(p=HashFirst(&h); p; p=HashNext(p)){
** SomeStructure *pData = HashData(p);
** // do something with pData
** }
*/
#define HashFirst(H) ((H)->first)
#define HashNext(E) ((E)->next)
#define HashData(E) ((E)->data)
#define HashKey(E) ((E)->pKey)
#define HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define HashCount(H) ((H)->count)
#endif /* _HASH_H_ */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,11 @@
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int sqlite3Fts1Init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View file

@ -0,0 +1,369 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
/*
** The code in this file is only compiled if:
**
** * The FTS1 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS1 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
#include "fts1_hash.h"
static void *malloc_and_zero(int n){
void *p = malloc(n);
if( p ){
memset(p, 0, n);
}
return p;
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants
** FTS1_HASH_BINARY or FTS1_HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer.
*/
void sqlite3Fts1HashInit(fts1Hash *pNew, int keyClass, int copyKey){
assert( pNew!=0 );
assert( keyClass>=FTS1_HASH_STRING && keyClass<=FTS1_HASH_BINARY );
pNew->keyClass = keyClass;
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
pNew->xMalloc = malloc_and_zero;
pNew->xFree = free;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void sqlite3Fts1HashClear(fts1Hash *pH){
fts1HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
fts1HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree(elem);
elem = next_elem;
}
pH->count = 0;
}
/*
** Hash and comparison functions when the mode is FTS1_HASH_STRING
*/
static int strHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
}
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is FTS1_HASH_BINARY
*/
static int binHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "hashFunction". The function takes a
** single parameter "keyClass". The return value of hashFunction()
** is a pointer to another function. Specifically, the return value
** of hashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*hashFunction(int keyClass))(const void*,int){
if( keyClass==FTS1_HASH_STRING ){
return &strHash;
}else{
assert( keyClass==FTS1_HASH_BINARY );
return &binHash;
}
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
if( keyClass==FTS1_HASH_STRING ){
return &strCompare;
}else{
assert( keyClass==FTS1_HASH_BINARY );
return &binCompare;
}
}
/* Link an element into the hash table
*/
static void insertElement(
fts1Hash *pH, /* The complete hash table */
struct _fts1ht *pEntry, /* The entry into which pNew is inserted */
fts1HashElem *pNew /* The element to be inserted */
){
fts1HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
*/
static void rehash(fts1Hash *pH, int new_size){
struct _fts1ht *new_ht; /* The new hash table */
fts1HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _fts1ht *)pH->xMalloc( new_size*sizeof(struct _fts1ht) );
if( new_ht==0 ) return;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = hashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
insertElement(pH, &new_ht[h], elem);
}
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static fts1HashElem *findElementGivenHash(
const fts1Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
fts1HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _fts1ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = compareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void removeElementGivenHash(
fts1Hash *pH, /* The pH containing "elem" */
fts1HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _fts1ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
fts1HashClear(pH);
}
}
/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *sqlite3Fts1HashFind(const fts1Hash *pH, const void *pKey, int nKey){
int h; /* A hash on key */
fts1HashElem *elem; /* The element that matches key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
return elem ? elem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *sqlite3Fts1HashInsert(
fts1Hash *pH, /* The hash table to insert into */
const void *pKey, /* The key */
int nKey, /* Number of bytes in the key */
void *data /* The data */
){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
fts1HashElem *elem; /* Used to loop thru the element list */
fts1HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = findElementGivenHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
removeElementGivenHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
new_elem = (fts1HashElem*)pH->xMalloc( sizeof(fts1HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = pH->xMalloc( nKey );
if( new_elem->pKey==0 ){
pH->xFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
if( pH->htsize==0 ){
rehash(pH,8);
if( pH->htsize==0 ){
pH->count = 0;
pH->xFree(new_elem);
return data;
}
}
if( pH->count > pH->htsize ){
rehash(pH,pH->htsize*2);
}
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
insertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

View file

@ -0,0 +1,112 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implemenation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _FTS1_HASH_H_
#define _FTS1_HASH_H_
/* Forward declarations of structures. */
typedef struct fts1Hash fts1Hash;
typedef struct fts1HashElem fts1HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct fts1Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
fts1HashElem *first; /* The first element of the array */
void *(*xMalloc)(int); /* malloc() function to use */
void (*xFree)(void *); /* free() function to use */
int htsize; /* Number of buckets in the hash table */
struct _fts1ht { /* the hash table */
int count; /* Number of entries with this hash */
fts1HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct fts1HashElem {
fts1HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 2 different modes of operation for a hash table:
**
** FTS1_HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** FTS1_HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made if the copyKey parameter to fts1HashInit is 1.
*/
#define FTS1_HASH_STRING 1
#define FTS1_HASH_BINARY 2
/*
** Access routines. To delete, insert a NULL pointer.
*/
void sqlite3Fts1HashInit(fts1Hash*, int keytype, int copyKey);
void *sqlite3Fts1HashInsert(fts1Hash*, const void *pKey, int nKey, void *pData);
void *sqlite3Fts1HashFind(const fts1Hash*, const void *pKey, int nKey);
void sqlite3Fts1HashClear(fts1Hash*);
/*
** Shorthand for the functions above
*/
#define fts1HashInit sqlite3Fts1HashInit
#define fts1HashInsert sqlite3Fts1HashInsert
#define fts1HashFind sqlite3Fts1HashFind
#define fts1HashClear sqlite3Fts1HashClear
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** fts1Hash h;
** fts1HashElem *p;
** ...
** for(p=fts1HashFirst(&h); p; p=fts1HashNext(p)){
** SomeStructure *pData = fts1HashData(p);
** // do something with pData
** }
*/
#define fts1HashFirst(H) ((H)->first)
#define fts1HashNext(E) ((E)->next)
#define fts1HashData(E) ((E)->data)
#define fts1HashKey(E) ((E)->pKey)
#define fts1HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define fts1HashCount(H) ((H)->count)
#endif /* _FTS1_HASH_H_ */

View file

@ -0,0 +1,643 @@
/*
** 2006 September 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** Implementation of the full-text-search tokenizer that implements
** a Porter stemmer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS1 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS1 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "fts1_tokenizer.h"
/*
** Class derived from sqlite3_tokenizer
*/
typedef struct porter_tokenizer {
sqlite3_tokenizer base; /* Base class */
} porter_tokenizer;
/*
** Class derived from sqlit3_tokenizer_cursor
*/
typedef struct porter_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *zInput; /* input we are tokenizing */
int nInput; /* size of the input */
int iOffset; /* current position in zInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nAllocated; /* space allocated to zToken buffer */
} porter_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module porterTokenizerModule;
/*
** Create a new tokenizer instance.
*/
static int porterCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
porter_tokenizer *t;
t = (porter_tokenizer *) calloc(sizeof(*t), 1);
if( t==NULL ) return SQLITE_NOMEM;
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is zInput[0..nInput-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int porterOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, int nInput, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
porter_tokenizer_cursor *c;
c = (porter_tokenizer_cursor *) malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->zInput = zInput;
if( zInput==0 ){
c->nInput = 0;
}else if( nInput<0 ){
c->nInput = (int)strlen(zInput);
}else{
c->nInput = nInput;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** porterOpen() above.
*/
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
free(c->zToken);
free(c);
return SQLITE_OK;
}
/*
** Vowel or consonant
*/
static const char cType[] = {
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 2, 1
};
/*
** isConsonant() and isVowel() determine if their first character in
** the string they point to is a consonant or a vowel, according
** to Porter ruls.
**
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
** 'Y' is a consonant unless it follows another consonant,
** in which case it is a vowel.
**
** In these routine, the letters are in reverse order. So the 'y' rule
** is that 'y' is a consonant unless it is followed by another
** consonent.
*/
static int isVowel(const char*);
static int isConsonant(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return j;
return z[1]==0 || isVowel(z + 1);
}
static int isVowel(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return 1-j;
return isConsonant(z + 1);
}
/*
** Let any sequence of one or more vowels be represented by V and let
** C be sequence of one or more consonants. Then every word can be
** represented as:
**
** [C] (VC){m} [V]
**
** In prose: A word is an optional consonant followed by zero or
** vowel-consonant pairs followed by an optional vowel. "m" is the
** number of vowel consonant pairs. This routine computes the value
** of m for the first i bytes of a word.
**
** Return true if the m-value for z is 1 or more. In other words,
** return true if z contains at least one vowel that is followed
** by a consonant.
**
** In this routine z[] is in reverse order. So we are really looking
** for an instance of of a consonant followed by a vowel.
*/
static int m_gt_0(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/* Like mgt0 above except we are looking for a value of m which is
** exactly 1
*/
static int m_eq_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 1;
while( isConsonant(z) ){ z++; }
return *z==0;
}
/* Like mgt0 above except we are looking for a value of m>1 instead
** or m>0
*/
static int m_gt_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if there is a vowel anywhere within z[0..n-1]
*/
static int hasVowel(const char *z){
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if the word ends in a double consonant.
**
** The text is reversed here. So we are really looking at
** the first two characters of z[].
*/
static int doubleConsonant(const char *z){
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
}
/*
** Return TRUE if the word ends with three letters which
** are consonant-vowel-consonent and where the final consonant
** is not 'w', 'x', or 'y'.
**
** The word is reversed here. So we are really checking the
** first three letters and the first one cannot be in [wxy].
*/
static int star_oh(const char *z){
return
z[0]!=0 && isConsonant(z) &&
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
z[1]!=0 && isVowel(z+1) &&
z[2]!=0 && isConsonant(z+2);
}
/*
** If the word ends with zFrom and xCond() is true for the stem
** of the word that preceeds the zFrom ending, then change the
** ending to zTo.
**
** The input word *pz and zFrom are both in reverse order. zTo
** is in normal order.
**
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
** match. Not that TRUE is returned even if xCond() fails and
** no substitution occurs.
*/
static int stem(
char **pz, /* The word being stemmed (Reversed) */
const char *zFrom, /* If the ending matches this... (Reversed) */
const char *zTo, /* ... change the ending to this (not reversed) */
int (*xCond)(const char*) /* Condition that must be true */
){
char *z = *pz;
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
if( *zFrom!=0 ) return 0;
if( xCond && !xCond(z) ) return 1;
while( *zTo ){
*(--z) = *(zTo++);
}
*pz = z;
return 1;
}
/*
** This is the fallback stemmer used when the porter stemmer is
** inappropriate. The input word is copied into the output with
** US-ASCII case folding. If the input word is too long (more
** than 20 bytes if it contains no digits or more than 6 bytes if
** it contains digits) then word is truncated to 20 or 6 bytes
** by taking 10 or 3 bytes from the beginning and end.
*/
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, mx, j;
int hasDigit = 0;
for(i=0; i<nIn; i++){
int c = zIn[i];
if( c>='A' && c<='Z' ){
zOut[i] = c - 'A' + 'a';
}else{
if( c>='0' && c<='9' ) hasDigit = 1;
zOut[i] = c;
}
}
mx = hasDigit ? 3 : 10;
if( nIn>mx*2 ){
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
zOut[j] = zOut[i];
}
i = j;
}
zOut[i] = 0;
*pnOut = i;
}
/*
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
** zOut is at least big enough to hold nIn bytes. Write the actual
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
**
** Any upper-case characters in the US-ASCII character set ([A-Z])
** are converted to lower case. Upper-case UTF characters are
** unchanged.
**
** Words that are longer than about 20 bytes are stemmed by retaining
** a few bytes from the beginning and the end of the word. If the
** word contains digits, 3 bytes are taken from the beginning and
** 3 bytes from the end. For long words without digits, 10 bytes
** are taken from each end. US-ASCII case folding still applies.
**
** If the input word contains not digits but does characters not
** in [a-zA-Z] then no stemming is attempted and this routine just
** copies the input into the input into the output with US-ASCII
** case folding.
**
** Stemming never increases the length of the word. So there is
** no chance of overflowing the zOut buffer.
*/
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, j, c;
char zReverse[28];
char *z, *z2;
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
/* The word is too big or too small for the porter stemmer.
** Fallback to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
c = zIn[i];
if( c>='A' && c<='Z' ){
zReverse[j] = c + 'a' - 'A';
}else if( c>='a' && c<='z' ){
zReverse[j] = c;
}else{
/* The use of a character not in [a-zA-Z] means that we fallback
** to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
}
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
z = &zReverse[j+1];
/* Step 1a */
if( z[0]=='s' ){
if(
!stem(&z, "sess", "ss", 0) &&
!stem(&z, "sei", "i", 0) &&
!stem(&z, "ss", "ss", 0)
){
z++;
}
}
/* Step 1b */
z2 = z;
if( stem(&z, "dee", "ee", m_gt_0) ){
/* Do nothing. The work was all in the test */
}else if(
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
&& z!=z2
){
if( stem(&z, "ta", "ate", 0) ||
stem(&z, "lb", "ble", 0) ||
stem(&z, "zi", "ize", 0) ){
/* Do nothing. The work was all in the test */
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
z++;
}else if( m_eq_1(z) && star_oh(z) ){
*(--z) = 'e';
}
}
/* Step 1c */
if( z[0]=='y' && hasVowel(z+1) ){
z[0] = 'i';
}
/* Step 2 */
switch( z[1] ){
case 'a':
stem(&z, "lanoita", "ate", m_gt_0) ||
stem(&z, "lanoit", "tion", m_gt_0);
break;
case 'c':
stem(&z, "icne", "ence", m_gt_0) ||
stem(&z, "icna", "ance", m_gt_0);
break;
case 'e':
stem(&z, "rezi", "ize", m_gt_0);
break;
case 'g':
stem(&z, "igol", "log", m_gt_0);
break;
case 'l':
stem(&z, "ilb", "ble", m_gt_0) ||
stem(&z, "illa", "al", m_gt_0) ||
stem(&z, "iltne", "ent", m_gt_0) ||
stem(&z, "ile", "e", m_gt_0) ||
stem(&z, "ilsuo", "ous", m_gt_0);
break;
case 'o':
stem(&z, "noitazi", "ize", m_gt_0) ||
stem(&z, "noita", "ate", m_gt_0) ||
stem(&z, "rota", "ate", m_gt_0);
break;
case 's':
stem(&z, "msila", "al", m_gt_0) ||
stem(&z, "ssenevi", "ive", m_gt_0) ||
stem(&z, "ssenluf", "ful", m_gt_0) ||
stem(&z, "ssensuo", "ous", m_gt_0);
break;
case 't':
stem(&z, "itila", "al", m_gt_0) ||
stem(&z, "itivi", "ive", m_gt_0) ||
stem(&z, "itilib", "ble", m_gt_0);
break;
}
/* Step 3 */
switch( z[0] ){
case 'e':
stem(&z, "etaci", "ic", m_gt_0) ||
stem(&z, "evita", "", m_gt_0) ||
stem(&z, "ezila", "al", m_gt_0);
break;
case 'i':
stem(&z, "itici", "ic", m_gt_0);
break;
case 'l':
stem(&z, "laci", "ic", m_gt_0) ||
stem(&z, "luf", "", m_gt_0);
break;
case 's':
stem(&z, "ssen", "", m_gt_0);
break;
}
/* Step 4 */
switch( z[1] ){
case 'a':
if( z[0]=='l' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'c':
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'e':
if( z[0]=='r' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'i':
if( z[0]=='c' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'l':
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'n':
if( z[0]=='t' ){
if( z[2]=='a' ){
if( m_gt_1(z+3) ){
z += 3;
}
}else if( z[2]=='e' ){
stem(&z, "tneme", "", m_gt_1) ||
stem(&z, "tnem", "", m_gt_1) ||
stem(&z, "tne", "", m_gt_1);
}
}
break;
case 'o':
if( z[0]=='u' ){
if( m_gt_1(z+2) ){
z += 2;
}
}else if( z[3]=='s' || z[3]=='t' ){
stem(&z, "noi", "", m_gt_1);
}
break;
case 's':
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
case 't':
stem(&z, "eta", "", m_gt_1) ||
stem(&z, "iti", "", m_gt_1);
break;
case 'u':
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
z += 3;
}
break;
case 'v':
case 'z':
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
}
/* Step 5a */
if( z[0]=='e' ){
if( m_gt_1(z+1) ){
z++;
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
z++;
}
}
/* Step 5b */
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
z++;
}
/* z[] is now the stemmed word in reverse order. Flip it back
** around into forward order and return.
*/
*pnOut = i = strlen(z);
zOut[i] = 0;
while( *z ){
zOut[--i] = *(z++);
}
}
/*
** Characters that can be part of a token. We assume any character
** whose value is greater than 0x80 (any UTF character) can be
** part of a token. In other words, delimiters all must have
** values of 0x7f or lower.
*/
static const char isIdChar[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};
#define idChar(C) (((ch=C)&0x80)!=0 || (ch>0x2f && isIdChar[ch-0x30]))
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !isIdChar[ch-0x30]))
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to porterOpen().
*/
static int porterNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
const char **pzToken, /* OUT: *pzToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
const char *z = c->zInput;
while( c->iOffset<c->nInput ){
int iStartOffset, ch;
/* Scan past delimiter characters */
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int n = c->iOffset-iStartOffset;
if( n>c->nAllocated ){
c->nAllocated = n+20;
c->zToken = realloc(c->zToken, c->nAllocated);
if( c->zToken==NULL ) return SQLITE_NOMEM;
}
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
*pzToken = c->zToken;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the porter-stemmer tokenizer
*/
static const sqlite3_tokenizer_module porterTokenizerModule = {
0,
porterCreate,
porterDestroy,
porterOpen,
porterClose,
porterNext,
};
/*
** Allocate a new porter tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts1PorterTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &porterTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

View file

@ -0,0 +1,90 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _FTS1_TOKENIZER_H_
#define _FTS1_TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface.
*/
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
struct sqlite3_tokenizer_module {
int iVersion; /* currently 0 */
/*
** Create and destroy a tokenizer. argc/argv are passed down from
** the fulltext virtual table creation to allow customization.
*/
int (*xCreate)(int argc, const char *const*argv,
sqlite3_tokenizer **ppTokenizer);
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Tokenize a particular input. Call xOpen() to prepare to
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
** xClose() to free any internal state. The pInput passed to
** xOpen() must exist until the cursor is closed. The ppToken
** result from xNext() is only valid until the next call to xNext()
** or until xClose() is called.
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
const char *pInput, int nBytes,
sqlite3_tokenizer_cursor **ppCursor);
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
const char **ppToken, int *pnBytes,
int *piStartOffset, int *piEndOffset, int *piPosition);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
/*
** Get the module for a tokenizer which generates tokens based on a
** set of non-token characters. The default is to break tokens at any
** non-alnum character, though the set of delimiters can also be
** specified by the first argv argument to xCreate().
*/
/* TODO(shess) This doesn't belong here. Need some sort of
** registration process.
*/
void sqlite3Fts1SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts1PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif /* _FTS1_TOKENIZER_H_ */

View file

@ -0,0 +1,221 @@
/*
** The author disclaims copyright to this source code.
**
*************************************************************************
** Implementation of the "simple" full-text-search tokenizer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS1 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS1 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "fts1_tokenizer.h"
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
char delim[128]; /* flag ASCII delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
int iOffset; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *pToken; /* storage for current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module simpleTokenizerModule;
static int isDelim(simple_tokenizer *t, unsigned char c){
return c<0x80 && t->delim[c];
}
/*
** Create a new tokenizer instance.
*/
static int simpleCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) calloc(sizeof(*t), 1);
if( t==NULL ) return SQLITE_NOMEM;
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
int i, n = strlen(argv[1]);
for(i=0; i<n; i++){
unsigned char ch = argv[1][i];
/* We explicitly don't support UTF-8 delimiters for now. */
if( ch>=0x80 ){
free(t);
return SQLITE_ERROR;
}
t->delim[ch] = 1;
}
} else {
/* Mark non-alphanumeric ASCII characters as delimiters */
int i;
for(i=1; i<0x80; i++){
t->delim[i] = !isalnum(i);
}
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int simpleOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *pInput, int nBytes, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
simple_tokenizer_cursor *c;
c = (simple_tokenizer_cursor *) malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->pInput = pInput;
if( pInput==0 ){
c->nBytes = 0;
}else if( nBytes<0 ){
c->nBytes = (int)strlen(pInput);
}else{
c->nBytes = nBytes;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->pToken = NULL; /* no space allocated, yet. */
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** simpleOpen() above.
*/
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
free(c->pToken);
free(c);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to simpleOpen().
*/
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
unsigned char *p = (unsigned char *)c->pInput;
while( c->iOffset<c->nBytes ){
int iStartOffset;
/* Scan past delimiter characters */
while( c->iOffset<c->nBytes && isDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nBytes && !isDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int i, n = c->iOffset-iStartOffset;
if( n>c->nTokenAllocated ){
c->nTokenAllocated = n+20;
c->pToken = realloc(c->pToken, c->nTokenAllocated);
if( c->pToken==NULL ) return SQLITE_NOMEM;
}
for(i=0; i<n; i++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
unsigned char ch = p[iStartOffset+i];
c->pToken[i] = ch<0x80 ? tolower(ch) : ch;
}
*ppToken = c->pToken;
*pnBytes = n;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
};
/*
** Allocate a new simple tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts1SimpleTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &simpleTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,11 @@
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int fulltext_init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View file

@ -0,0 +1,174 @@
/*
** The author disclaims copyright to this source code.
**
*************************************************************************
** Implementation of the "simple" full-text-search tokenizer.
*/
#include <assert.h>
#if !defined(__APPLE__)
#include <malloc.h>
#else
#include <stdlib.h>
#endif
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "tokenizer.h"
/* Duplicate a string; the caller must free() the returned string.
* (We don't use strdup() since it's not part of the standard C library and
* may not be available everywhere.) */
/* TODO(shess) Copied from fulltext.c, consider util.c for such
** things. */
static char *string_dup(const char *s){
char *str = malloc(strlen(s) + 1);
strcpy(str, s);
return str;
}
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
const char *zDelim; /* token delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
const char *pCurrent; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nTokenBytes; /* actual size of current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
static sqlite3_tokenizer_module simpleTokenizerModule;/* forward declaration */
static int simpleCreate(
int argc, const char **argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) malloc(sizeof(simple_tokenizer));
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
t->zDelim = string_dup(argv[1]);
} else {
/* Build a string excluding alphanumeric ASCII characters */
char zDelim[0x80]; /* nul-terminated, so nul not a member */
int i, j;
for(i=1, j=0; i<0x80; i++){
if( !isalnum(i) ){
zDelim[j++] = i;
}
}
zDelim[j++] = '\0';
assert( j<=sizeof(zDelim) );
t->zDelim = string_dup(zDelim);
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
simple_tokenizer *t = (simple_tokenizer *) pTokenizer;
free((void *) t->zDelim);
free(t);
return SQLITE_OK;
}
static int simpleOpen(
sqlite3_tokenizer *pTokenizer,
const char *pInput, int nBytes,
sqlite3_tokenizer_cursor **ppCursor
){
simple_tokenizer_cursor *c;
c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor));
c->pInput = pInput;
c->nBytes = nBytes<0 ? (int) strlen(pInput) : nBytes;
c->pCurrent = c->pInput; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nTokenBytes = 0;
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
if( NULL!=c->zToken ){
free(c->zToken);
}
free(c);
return SQLITE_OK;
}
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor,
const char **ppToken, int *pnBytes,
int *piStartOffset, int *piEndOffset, int *piPosition
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
int ii;
while( c->pCurrent-c->pInput<c->nBytes ){
int n = (int) strcspn(c->pCurrent, t->zDelim);
if( n>0 ){
if( n+1>c->nTokenAllocated ){
c->zToken = realloc(c->zToken, n+1);
}
for(ii=0; ii<n; ii++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
char ch = c->pCurrent[ii];
c->zToken[ii] = (unsigned char)ch<0x80 ? tolower(ch) : ch;
}
c->zToken[n] = '\0';
*ppToken = c->zToken;
*pnBytes = n;
*piStartOffset = (int) (c->pCurrent-c->pInput);
*piEndOffset = *piStartOffset+n;
*piPosition = c->iToken++;
c->pCurrent += n + 1;
return SQLITE_OK;
}
c->pCurrent += n + 1;
/* TODO(shess) could strspn() to skip delimiters en masse. Needs
** to happen in two places, though, which is annoying.
*/
}
return SQLITE_DONE;
}
static sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
};
void get_simple_tokenizer_module(
sqlite3_tokenizer_module **ppModule
){
*ppModule = &simpleTokenizerModule;
}

View file

@ -0,0 +1,89 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _TOKENIZER_H_
#define _TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface.
*/
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
struct sqlite3_tokenizer_module {
int iVersion; /* currently 0 */
/*
** Create and destroy a tokenizer. argc/argv are passed down from
** the fulltext virtual table creation to allow customization.
*/
int (*xCreate)(int argc, const char **argv,
sqlite3_tokenizer **ppTokenizer);
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Tokenize a particular input. Call xOpen() to prepare to
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
** xClose() to free any internal state. The pInput passed to
** xOpen() must exist until the cursor is closed. The ppToken
** result from xNext() is only valid until the next call to xNext()
** or until xClose() is called.
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
const char *pInput, int nBytes,
sqlite3_tokenizer_cursor **ppCursor);
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
const char **ppToken, int *pnBytes,
int *piStartOffset, int *piEndOffset, int *piPosition);
};
struct sqlite3_tokenizer {
sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
/*
** Get the module for a tokenizer which generates tokens based on a
** set of non-token characters. The default is to break tokens at any
** non-alnum character, though the set of delimiters can also be
** specified by the first argv argument to xCreate().
*/
/* TODO(shess) This doesn't belong here. Need some sort of
** registration process.
*/
void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule);
#endif /* _TOKENIZER_H_ */

View file

@ -0,0 +1,13 @@
/README.txt/1.1/Tue Oct 10 17:37:14 2006//
/fts2.h/1.3/Fri Nov 30 01:28:10 2007//
/fts2_hash.c/1.2/Fri Nov 30 01:28:10 2007//
/fts2_hash.h/1.1/Tue Oct 10 17:37:14 2006//
/fts2_icu.c/1.1/Fri Jun 22 15:21:16 2007//
/fts2_porter.c/1.7/Fri Nov 30 01:28:10 2007//
/fts2_tokenizer.c/1.6/Tue Jun 26 10:55:02 2007//
/fts2_tokenizer.h/1.3/Fri Nov 30 01:28:10 2007//
/fts2_tokenizer1.c/1.5/Fri Nov 30 01:28:10 2007//
/mkfts2amal.tcl/1.3/Fri Nov 30 01:28:10 2007//
/README.tokenizers/1.3/Thu Dec 13 21:44:03 2007//
/fts2.c/1.48/Thu Dec 13 21:45:31 2007//
D

View file

@ -0,0 +1 @@
sqlite/ext/fts2

View file

@ -0,0 +1 @@
:pserver:drh@sqlite.org:/sqlite

View file

@ -0,0 +1,133 @@
1. FTS2 Tokenizers
When creating a new full-text table, FTS2 allows the user to select
the text tokenizer implementation to be used when indexing text
by specifying a "tokenizer" clause as part of the CREATE VIRTUAL TABLE
statement:
CREATE VIRTUAL TABLE <table-name> USING fts2(
<columns ...> [, tokenizer <tokenizer-name> [<tokenizer-args>]]
);
The built-in tokenizers (valid values to pass as <tokenizer name>) are
"simple" and "porter".
<tokenizer-args> should consist of zero or more white-space separated
arguments to pass to the selected tokenizer implementation. The
interpretation of the arguments, if any, depends on the individual
tokenizer.
2. Custom Tokenizers
FTS2 allows users to provide custom tokenizer implementations. The
interface used to create a new tokenizer is defined and described in
the fts2_tokenizer.h source file.
Registering a new FTS2 tokenizer is similar to registering a new
virtual table module with SQLite. The user passes a pointer to a
structure containing pointers to various callback functions that
make up the implementation of the new tokenizer type. For tokenizers,
the structure (defined in fts2_tokenizer.h) is called
"sqlite3_tokenizer_module".
FTS2 does not expose a C-function that users call to register new
tokenizer types with a database handle. Instead, the pointer must
be encoded as an SQL blob value and passed to FTS2 through the SQL
engine by evaluating a special scalar function, "fts2_tokenizer()".
The fts2_tokenizer() function may be called with one or two arguments,
as follows:
SELECT fts2_tokenizer(<tokenizer-name>);
SELECT fts2_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
Where <tokenizer-name> is a string identifying the tokenizer and
<sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
structure encoded as an SQL blob. If the second argument is present,
it is registered as tokenizer <tokenizer-name> and a copy of it
returned. If only one argument is passed, a pointer to the tokenizer
implementation currently registered as <tokenizer-name> is returned,
encoded as a blob. Or, if no such tokenizer exists, an SQL exception
(error) is raised.
SECURITY: If the fts2 extension is used in an environment where potentially
malicious users may execute arbitrary SQL (i.e. gears), they should be
prevented from invoking the fts2_tokenizer() function, possibly using the
authorisation callback.
See "Sample code" below for an example of calling the fts2_tokenizer()
function from C code.
3. ICU Library Tokenizers
If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
symbol defined, then there exists a built-in tokenizer named "icu"
implemented using the ICU library. The first argument passed to the
xCreate() method (see fts2_tokenizer.h) of this tokenizer may be
an ICU locale identifier. For example "tr_TR" for Turkish as used
in Turkey, or "en_AU" for English as used in Australia. For example:
"CREATE VIRTUAL TABLE thai_text USING fts2(text, tokenizer icu th_TH)"
The ICU tokenizer implementation is very simple. It splits the input
text according to the ICU rules for finding word boundaries and discards
any tokens that consist entirely of white-space. This may be suitable
for some applications in some locales, but not all. If more complex
processing is required, for example to implement stemming or
discard punctuation, this can be done by creating a tokenizer
implementation that uses the ICU tokenizer as part of its implementation.
When using the ICU tokenizer this way, it is safe to overwrite the
contents of the strings returned by the xNext() method (see
fts2_tokenizer.h).
4. Sample code.
The following two code samples illustrate the way C code should invoke
the fts2_tokenizer() scalar function:
int registerTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module *p
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sqlite3_step(pStmt);
return sqlite3_finalize(pStmt);
}
int queryTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?)";
*pp = 0;
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
}
}
return sqlite3_finalize(pStmt);
}

View file

@ -0,0 +1,4 @@
This folder contains source code to the second full-text search
extension for SQLite. While the API is the same, this version uses a
substantially different storage schema from fts1, so tables will need
to be rebuilt.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,26 @@
/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This header file is used by programs that want to link against the
** FTS2 library. All it does is declare the sqlite3Fts2Init() interface.
*/
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int sqlite3Fts2Init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View file

@ -0,0 +1,369 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "fts2_hash.h"
static void *malloc_and_zero(int n){
void *p = malloc(n);
if( p ){
memset(p, 0, n);
}
return p;
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants
** FTS2_HASH_BINARY or FTS2_HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer.
*/
void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){
assert( pNew!=0 );
assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY );
pNew->keyClass = keyClass;
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
pNew->xMalloc = malloc_and_zero;
pNew->xFree = free;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void sqlite3Fts2HashClear(fts2Hash *pH){
fts2HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
fts2HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree(elem);
elem = next_elem;
}
pH->count = 0;
}
/*
** Hash and comparison functions when the mode is FTS2_HASH_STRING
*/
static int strHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
}
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is FTS2_HASH_BINARY
*/
static int binHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "hashFunction". The function takes a
** single parameter "keyClass". The return value of hashFunction()
** is a pointer to another function. Specifically, the return value
** of hashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*hashFunction(int keyClass))(const void*,int){
if( keyClass==FTS2_HASH_STRING ){
return &strHash;
}else{
assert( keyClass==FTS2_HASH_BINARY );
return &binHash;
}
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
if( keyClass==FTS2_HASH_STRING ){
return &strCompare;
}else{
assert( keyClass==FTS2_HASH_BINARY );
return &binCompare;
}
}
/* Link an element into the hash table
*/
static void insertElement(
fts2Hash *pH, /* The complete hash table */
struct _fts2ht *pEntry, /* The entry into which pNew is inserted */
fts2HashElem *pNew /* The element to be inserted */
){
fts2HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
*/
static void rehash(fts2Hash *pH, int new_size){
struct _fts2ht *new_ht; /* The new hash table */
fts2HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _fts2ht *)pH->xMalloc( new_size*sizeof(struct _fts2ht) );
if( new_ht==0 ) return;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = hashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
insertElement(pH, &new_ht[h], elem);
}
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static fts2HashElem *findElementGivenHash(
const fts2Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
fts2HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _fts2ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = compareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void removeElementGivenHash(
fts2Hash *pH, /* The pH containing "elem" */
fts2HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _fts2ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
fts2HashClear(pH);
}
}
/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){
int h; /* A hash on key */
fts2HashElem *elem; /* The element that matches key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
return elem ? elem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *sqlite3Fts2HashInsert(
fts2Hash *pH, /* The hash table to insert into */
const void *pKey, /* The key */
int nKey, /* Number of bytes in the key */
void *data /* The data */
){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
fts2HashElem *elem; /* Used to loop thru the element list */
fts2HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = findElementGivenHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
removeElementGivenHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
new_elem = (fts2HashElem*)pH->xMalloc( sizeof(fts2HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = pH->xMalloc( nKey );
if( new_elem->pKey==0 ){
pH->xFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
if( pH->htsize==0 ){
rehash(pH,8);
if( pH->htsize==0 ){
pH->count = 0;
pH->xFree(new_elem);
return data;
}
}
if( pH->count > pH->htsize ){
rehash(pH,pH->htsize*2);
}
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
insertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View file

@ -0,0 +1,112 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implemenation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _FTS2_HASH_H_
#define _FTS2_HASH_H_
/* Forward declarations of structures. */
typedef struct fts2Hash fts2Hash;
typedef struct fts2HashElem fts2HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct fts2Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
fts2HashElem *first; /* The first element of the array */
void *(*xMalloc)(int); /* malloc() function to use */
void (*xFree)(void *); /* free() function to use */
int htsize; /* Number of buckets in the hash table */
struct _fts2ht { /* the hash table */
int count; /* Number of entries with this hash */
fts2HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct fts2HashElem {
fts2HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 2 different modes of operation for a hash table:
**
** FTS2_HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** FTS2_HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made if the copyKey parameter to fts2HashInit is 1.
*/
#define FTS2_HASH_STRING 1
#define FTS2_HASH_BINARY 2
/*
** Access routines. To delete, insert a NULL pointer.
*/
void sqlite3Fts2HashInit(fts2Hash*, int keytype, int copyKey);
void *sqlite3Fts2HashInsert(fts2Hash*, const void *pKey, int nKey, void *pData);
void *sqlite3Fts2HashFind(const fts2Hash*, const void *pKey, int nKey);
void sqlite3Fts2HashClear(fts2Hash*);
/*
** Shorthand for the functions above
*/
#define fts2HashInit sqlite3Fts2HashInit
#define fts2HashInsert sqlite3Fts2HashInsert
#define fts2HashFind sqlite3Fts2HashFind
#define fts2HashClear sqlite3Fts2HashClear
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** fts2Hash h;
** fts2HashElem *p;
** ...
** for(p=fts2HashFirst(&h); p; p=fts2HashNext(p)){
** SomeStructure *pData = fts2HashData(p);
** // do something with pData
** }
*/
#define fts2HashFirst(H) ((H)->first)
#define fts2HashNext(E) ((E)->next)
#define fts2HashData(E) ((E)->data)
#define fts2HashKey(E) ((E)->pKey)
#define fts2HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define fts2HashCount(H) ((H)->count)
#endif /* _FTS2_HASH_H_ */

View file

@ -0,0 +1,257 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements a tokenizer for fts2 based on the ICU library.
**
** $Id: fts2_icu.c,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#ifdef SQLITE_ENABLE_ICU
#include <assert.h>
#include <string.h>
#include "fts2_tokenizer.h"
#include <unicode/ubrk.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/utf16.h>
typedef struct IcuTokenizer IcuTokenizer;
typedef struct IcuCursor IcuCursor;
struct IcuTokenizer {
sqlite3_tokenizer base;
char *zLocale;
};
struct IcuCursor {
sqlite3_tokenizer_cursor base;
UBreakIterator *pIter; /* ICU break-iterator object */
int nChar; /* Number of UChar elements in pInput */
UChar *aChar; /* Copy of input using utf-16 encoding */
int *aOffset; /* Offsets of each character in utf-8 input */
int nBuffer;
char *zBuffer;
int iToken;
};
/*
** Create a new tokenizer instance.
*/
static int icuCreate(
int argc, /* Number of entries in argv[] */
const char * const *argv, /* Tokenizer creation arguments */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
){
IcuTokenizer *p;
int n = 0;
if( argc>0 ){
n = strlen(argv[0])+1;
}
p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
if( !p ){
return SQLITE_NOMEM;
}
memset(p, 0, sizeof(IcuTokenizer));
if( n ){
p->zLocale = (char *)&p[1];
memcpy(p->zLocale, argv[0], n);
}
*ppTokenizer = (sqlite3_tokenizer *)p;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
sqlite3_free(p);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int icuOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, /* Input string */
int nInput, /* Length of zInput in bytes */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
IcuCursor *pCsr;
const int32_t opt = U_FOLD_CASE_DEFAULT;
UErrorCode status = U_ZERO_ERROR;
int nChar;
UChar32 c;
int iInput = 0;
int iOut = 0;
*ppCursor = 0;
nChar = nInput+1;
pCsr = (IcuCursor *)sqlite3_malloc(
sizeof(IcuCursor) + /* IcuCursor */
nChar * sizeof(UChar) + /* IcuCursor.aChar[] */
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
);
if( !pCsr ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(IcuCursor));
pCsr->aChar = (UChar *)&pCsr[1];
pCsr->aOffset = (int *)&pCsr->aChar[nChar];
pCsr->aOffset[iOut] = iInput;
U8_NEXT(zInput, iInput, nInput, c);
while( c>0 ){
int isError = 0;
c = u_foldCase(c, opt);
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
if( isError ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->aOffset[iOut] = iInput;
if( iInput<nInput ){
U8_NEXT(zInput, iInput, nInput, c);
}else{
c = 0;
}
}
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
if( !U_SUCCESS(status) ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->nChar = iOut;
ubrk_first(pCsr->pIter);
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to icuOpen().
*/
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
IcuCursor *pCsr = (IcuCursor *)pCursor;
ubrk_close(pCsr->pIter);
sqlite3_free(pCsr->zBuffer);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor.
*/
static int icuNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
IcuCursor *pCsr = (IcuCursor *)pCursor;
int iStart = 0;
int iEnd = 0;
int nByte = 0;
while( iStart==iEnd ){
UChar32 c;
iStart = ubrk_current(pCsr->pIter);
iEnd = ubrk_next(pCsr->pIter);
if( iEnd==UBRK_DONE ){
return SQLITE_DONE;
}
while( iStart<iEnd ){
int iWhite = iStart;
U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
if( u_isspace(c) ){
iStart = iWhite;
}else{
break;
}
}
assert(iStart<=iEnd);
}
do {
UErrorCode status = U_ZERO_ERROR;
if( nByte ){
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
if( !zNew ){
return SQLITE_NOMEM;
}
pCsr->zBuffer = zNew;
pCsr->nBuffer = nByte;
}
u_strToUTF8(
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
&status /* Output success/failure */
);
} while( nByte>pCsr->nBuffer );
*ppToken = pCsr->zBuffer;
*pnBytes = nByte;
*piStartOffset = pCsr->aOffset[iStart];
*piEndOffset = pCsr->aOffset[iEnd];
*piPosition = pCsr->iToken++;
return SQLITE_OK;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module icuTokenizerModule = {
0, /* iVersion */
icuCreate, /* xCreate */
icuDestroy, /* xCreate */
icuOpen, /* xOpen */
icuClose, /* xClose */
icuNext, /* xNext */
};
/*
** Set *ppModule to point at the implementation of the ICU tokenizer.
*/
void sqlite3Fts2IcuTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &icuTokenizerModule;
}
#endif /* defined(SQLITE_ENABLE_ICU) */
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View file

@ -0,0 +1,642 @@
/*
** 2006 September 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** Implementation of the full-text-search tokenizer that implements
** a Porter stemmer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "fts2_tokenizer.h"
/*
** Class derived from sqlite3_tokenizer
*/
typedef struct porter_tokenizer {
sqlite3_tokenizer base; /* Base class */
} porter_tokenizer;
/*
** Class derived from sqlit3_tokenizer_cursor
*/
typedef struct porter_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *zInput; /* input we are tokenizing */
int nInput; /* size of the input */
int iOffset; /* current position in zInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nAllocated; /* space allocated to zToken buffer */
} porter_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module porterTokenizerModule;
/*
** Create a new tokenizer instance.
*/
static int porterCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
porter_tokenizer *t;
t = (porter_tokenizer *) calloc(sizeof(*t), 1);
if( t==NULL ) return SQLITE_NOMEM;
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is zInput[0..nInput-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int porterOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, int nInput, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
porter_tokenizer_cursor *c;
c = (porter_tokenizer_cursor *) malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->zInput = zInput;
if( zInput==0 ){
c->nInput = 0;
}else if( nInput<0 ){
c->nInput = (int)strlen(zInput);
}else{
c->nInput = nInput;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** porterOpen() above.
*/
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
free(c->zToken);
free(c);
return SQLITE_OK;
}
/*
** Vowel or consonant
*/
static const char cType[] = {
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 2, 1
};
/*
** isConsonant() and isVowel() determine if their first character in
** the string they point to is a consonant or a vowel, according
** to Porter ruls.
**
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
** 'Y' is a consonant unless it follows another consonant,
** in which case it is a vowel.
**
** In these routine, the letters are in reverse order. So the 'y' rule
** is that 'y' is a consonant unless it is followed by another
** consonent.
*/
static int isVowel(const char*);
static int isConsonant(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return j;
return z[1]==0 || isVowel(z + 1);
}
static int isVowel(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return 1-j;
return isConsonant(z + 1);
}
/*
** Let any sequence of one or more vowels be represented by V and let
** C be sequence of one or more consonants. Then every word can be
** represented as:
**
** [C] (VC){m} [V]
**
** In prose: A word is an optional consonant followed by zero or
** vowel-consonant pairs followed by an optional vowel. "m" is the
** number of vowel consonant pairs. This routine computes the value
** of m for the first i bytes of a word.
**
** Return true if the m-value for z is 1 or more. In other words,
** return true if z contains at least one vowel that is followed
** by a consonant.
**
** In this routine z[] is in reverse order. So we are really looking
** for an instance of of a consonant followed by a vowel.
*/
static int m_gt_0(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/* Like mgt0 above except we are looking for a value of m which is
** exactly 1
*/
static int m_eq_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 1;
while( isConsonant(z) ){ z++; }
return *z==0;
}
/* Like mgt0 above except we are looking for a value of m>1 instead
** or m>0
*/
static int m_gt_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if there is a vowel anywhere within z[0..n-1]
*/
static int hasVowel(const char *z){
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if the word ends in a double consonant.
**
** The text is reversed here. So we are really looking at
** the first two characters of z[].
*/
static int doubleConsonant(const char *z){
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
}
/*
** Return TRUE if the word ends with three letters which
** are consonant-vowel-consonent and where the final consonant
** is not 'w', 'x', or 'y'.
**
** The word is reversed here. So we are really checking the
** first three letters and the first one cannot be in [wxy].
*/
static int star_oh(const char *z){
return
z[0]!=0 && isConsonant(z) &&
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
z[1]!=0 && isVowel(z+1) &&
z[2]!=0 && isConsonant(z+2);
}
/*
** If the word ends with zFrom and xCond() is true for the stem
** of the word that preceeds the zFrom ending, then change the
** ending to zTo.
**
** The input word *pz and zFrom are both in reverse order. zTo
** is in normal order.
**
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
** match. Not that TRUE is returned even if xCond() fails and
** no substitution occurs.
*/
static int stem(
char **pz, /* The word being stemmed (Reversed) */
const char *zFrom, /* If the ending matches this... (Reversed) */
const char *zTo, /* ... change the ending to this (not reversed) */
int (*xCond)(const char*) /* Condition that must be true */
){
char *z = *pz;
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
if( *zFrom!=0 ) return 0;
if( xCond && !xCond(z) ) return 1;
while( *zTo ){
*(--z) = *(zTo++);
}
*pz = z;
return 1;
}
/*
** This is the fallback stemmer used when the porter stemmer is
** inappropriate. The input word is copied into the output with
** US-ASCII case folding. If the input word is too long (more
** than 20 bytes if it contains no digits or more than 6 bytes if
** it contains digits) then word is truncated to 20 or 6 bytes
** by taking 10 or 3 bytes from the beginning and end.
*/
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, mx, j;
int hasDigit = 0;
for(i=0; i<nIn; i++){
int c = zIn[i];
if( c>='A' && c<='Z' ){
zOut[i] = c - 'A' + 'a';
}else{
if( c>='0' && c<='9' ) hasDigit = 1;
zOut[i] = c;
}
}
mx = hasDigit ? 3 : 10;
if( nIn>mx*2 ){
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
zOut[j] = zOut[i];
}
i = j;
}
zOut[i] = 0;
*pnOut = i;
}
/*
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
** zOut is at least big enough to hold nIn bytes. Write the actual
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
**
** Any upper-case characters in the US-ASCII character set ([A-Z])
** are converted to lower case. Upper-case UTF characters are
** unchanged.
**
** Words that are longer than about 20 bytes are stemmed by retaining
** a few bytes from the beginning and the end of the word. If the
** word contains digits, 3 bytes are taken from the beginning and
** 3 bytes from the end. For long words without digits, 10 bytes
** are taken from each end. US-ASCII case folding still applies.
**
** If the input word contains not digits but does characters not
** in [a-zA-Z] then no stemming is attempted and this routine just
** copies the input into the input into the output with US-ASCII
** case folding.
**
** Stemming never increases the length of the word. So there is
** no chance of overflowing the zOut buffer.
*/
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, j, c;
char zReverse[28];
char *z, *z2;
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
/* The word is too big or too small for the porter stemmer.
** Fallback to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
c = zIn[i];
if( c>='A' && c<='Z' ){
zReverse[j] = c + 'a' - 'A';
}else if( c>='a' && c<='z' ){
zReverse[j] = c;
}else{
/* The use of a character not in [a-zA-Z] means that we fallback
** to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
}
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
z = &zReverse[j+1];
/* Step 1a */
if( z[0]=='s' ){
if(
!stem(&z, "sess", "ss", 0) &&
!stem(&z, "sei", "i", 0) &&
!stem(&z, "ss", "ss", 0)
){
z++;
}
}
/* Step 1b */
z2 = z;
if( stem(&z, "dee", "ee", m_gt_0) ){
/* Do nothing. The work was all in the test */
}else if(
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
&& z!=z2
){
if( stem(&z, "ta", "ate", 0) ||
stem(&z, "lb", "ble", 0) ||
stem(&z, "zi", "ize", 0) ){
/* Do nothing. The work was all in the test */
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
z++;
}else if( m_eq_1(z) && star_oh(z) ){
*(--z) = 'e';
}
}
/* Step 1c */
if( z[0]=='y' && hasVowel(z+1) ){
z[0] = 'i';
}
/* Step 2 */
switch( z[1] ){
case 'a':
stem(&z, "lanoita", "ate", m_gt_0) ||
stem(&z, "lanoit", "tion", m_gt_0);
break;
case 'c':
stem(&z, "icne", "ence", m_gt_0) ||
stem(&z, "icna", "ance", m_gt_0);
break;
case 'e':
stem(&z, "rezi", "ize", m_gt_0);
break;
case 'g':
stem(&z, "igol", "log", m_gt_0);
break;
case 'l':
stem(&z, "ilb", "ble", m_gt_0) ||
stem(&z, "illa", "al", m_gt_0) ||
stem(&z, "iltne", "ent", m_gt_0) ||
stem(&z, "ile", "e", m_gt_0) ||
stem(&z, "ilsuo", "ous", m_gt_0);
break;
case 'o':
stem(&z, "noitazi", "ize", m_gt_0) ||
stem(&z, "noita", "ate", m_gt_0) ||
stem(&z, "rota", "ate", m_gt_0);
break;
case 's':
stem(&z, "msila", "al", m_gt_0) ||
stem(&z, "ssenevi", "ive", m_gt_0) ||
stem(&z, "ssenluf", "ful", m_gt_0) ||
stem(&z, "ssensuo", "ous", m_gt_0);
break;
case 't':
stem(&z, "itila", "al", m_gt_0) ||
stem(&z, "itivi", "ive", m_gt_0) ||
stem(&z, "itilib", "ble", m_gt_0);
break;
}
/* Step 3 */
switch( z[0] ){
case 'e':
stem(&z, "etaci", "ic", m_gt_0) ||
stem(&z, "evita", "", m_gt_0) ||
stem(&z, "ezila", "al", m_gt_0);
break;
case 'i':
stem(&z, "itici", "ic", m_gt_0);
break;
case 'l':
stem(&z, "laci", "ic", m_gt_0) ||
stem(&z, "luf", "", m_gt_0);
break;
case 's':
stem(&z, "ssen", "", m_gt_0);
break;
}
/* Step 4 */
switch( z[1] ){
case 'a':
if( z[0]=='l' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'c':
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'e':
if( z[0]=='r' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'i':
if( z[0]=='c' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'l':
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'n':
if( z[0]=='t' ){
if( z[2]=='a' ){
if( m_gt_1(z+3) ){
z += 3;
}
}else if( z[2]=='e' ){
stem(&z, "tneme", "", m_gt_1) ||
stem(&z, "tnem", "", m_gt_1) ||
stem(&z, "tne", "", m_gt_1);
}
}
break;
case 'o':
if( z[0]=='u' ){
if( m_gt_1(z+2) ){
z += 2;
}
}else if( z[3]=='s' || z[3]=='t' ){
stem(&z, "noi", "", m_gt_1);
}
break;
case 's':
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
case 't':
stem(&z, "eta", "", m_gt_1) ||
stem(&z, "iti", "", m_gt_1);
break;
case 'u':
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
z += 3;
}
break;
case 'v':
case 'z':
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
}
/* Step 5a */
if( z[0]=='e' ){
if( m_gt_1(z+1) ){
z++;
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
z++;
}
}
/* Step 5b */
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
z++;
}
/* z[] is now the stemmed word in reverse order. Flip it back
** around into forward order and return.
*/
*pnOut = i = strlen(z);
zOut[i] = 0;
while( *z ){
zOut[--i] = *(z++);
}
}
/*
** Characters that can be part of a token. We assume any character
** whose value is greater than 0x80 (any UTF character) can be
** part of a token. In other words, delimiters all must have
** values of 0x7f or lower.
*/
static const char porterIdChar[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to porterOpen().
*/
static int porterNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
const char **pzToken, /* OUT: *pzToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
const char *z = c->zInput;
while( c->iOffset<c->nInput ){
int iStartOffset, ch;
/* Scan past delimiter characters */
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int n = c->iOffset-iStartOffset;
if( n>c->nAllocated ){
c->nAllocated = n+20;
c->zToken = realloc(c->zToken, c->nAllocated);
if( c->zToken==NULL ) return SQLITE_NOMEM;
}
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
*pzToken = c->zToken;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the porter-stemmer tokenizer
*/
static const sqlite3_tokenizer_module porterTokenizerModule = {
0,
porterCreate,
porterDestroy,
porterOpen,
porterClose,
porterNext,
};
/*
** Allocate a new porter tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts2PorterTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &porterTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View file

@ -0,0 +1,371 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is part of an SQLite module implementing full-text search.
** This particular file implements the generic tokenizer interface.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#include "fts2_hash.h"
#include "fts2_tokenizer.h"
#include <assert.h>
/*
** Implementation of the SQL scalar function for accessing the underlying
** hash table. This function may be called as follows:
**
** SELECT <function-name>(<key-name>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
**
** If the <pointer> argument is specified, it must be a blob value
** containing a pointer to be stored as the hash data corresponding
** to the string <key-name>. If <pointer> is not specified, then
** the string <key-name> must already exist in the has table. Otherwise,
** an error is returned.
**
** Whether or not the <pointer> argument is specified, the value returned
** is a blob containing the pointer stored as the hash data corresponding
** to string <key-name> (after the hash-table is updated, if applicable).
*/
static void scalarFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts2Hash *pHash;
void *pPtr = 0;
const unsigned char *zName;
int nName;
assert( argc==1 || argc==2 );
pHash = (fts2Hash *)sqlite3_user_data(context);
zName = sqlite3_value_text(argv[0]);
nName = sqlite3_value_bytes(argv[0])+1;
if( argc==2 ){
void *pOld;
int n = sqlite3_value_bytes(argv[1]);
if( n!=sizeof(pPtr) ){
sqlite3_result_error(context, "argument type mismatch", -1);
return;
}
pPtr = *(void **)sqlite3_value_blob(argv[1]);
pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
if( pOld==pPtr ){
sqlite3_result_error(context, "out of memory", -1);
return;
}
}else{
pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
if( !pPtr ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
}
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}
#ifdef SQLITE_TEST
#include <tcl.h>
#include <string.h>
/*
** Implementation of a special SQL scalar function for testing tokenizers
** designed to be used in concert with the Tcl testing framework. This
** function must be called with two arguments:
**
** SELECT <function-name>(<key-name>, <input-string>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
**
** The return value is a string that may be interpreted as a Tcl
** list. For each token in the <input-string>, three elements are
** added to the returned list. The first is the token position, the
** second is the token text (folded, stemmed, etc.) and the third is the
** substring of <input-string> associated with the token. For example,
** using the built-in "simple" tokenizer:
**
** SELECT fts_tokenizer_test('simple', 'I don't see how');
**
** will return the string:
**
** "{0 i I 1 dont don't 2 see see 3 how how}"
**
*/
static void testFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts2Hash *pHash;
sqlite3_tokenizer_module *p;
sqlite3_tokenizer *pTokenizer = 0;
sqlite3_tokenizer_cursor *pCsr = 0;
const char *zErr = 0;
const char *zName;
int nName;
const char *zInput;
int nInput;
const char *zArg = 0;
const char *zToken;
int nToken;
int iStart;
int iEnd;
int iPos;
Tcl_Obj *pRet;
assert( argc==2 || argc==3 );
nName = sqlite3_value_bytes(argv[0]);
zName = (const char *)sqlite3_value_text(argv[0]);
nInput = sqlite3_value_bytes(argv[argc-1]);
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
if( argc==3 ){
zArg = (const char *)sqlite3_value_text(argv[1]);
}
pHash = (fts2Hash *)sqlite3_user_data(context);
p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
if( !p ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
zErr = "error in xCreate()";
goto finish;
}
pTokenizer->pModule = p;
if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
zErr = "error in xOpen()";
goto finish;
}
pCsr->pTokenizer = pTokenizer;
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
zToken = &zInput[iStart];
nToken = iEnd-iStart;
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
}
if( SQLITE_OK!=p->xClose(pCsr) ){
zErr = "error in xClose()";
goto finish;
}
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
zErr = "error in xDestroy()";
goto finish;
}
finish:
if( zErr ){
sqlite3_result_error(context, zErr, -1);
}else{
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
}
Tcl_DecrRefCount(pRet);
}
static
int registerTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module *p
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sqlite3_step(pStmt);
return sqlite3_finalize(pStmt);
}
static
int queryTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?)";
*pp = 0;
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
}
}
return sqlite3_finalize(pStmt);
}
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
/*
** Implementation of the scalar function fts2_tokenizer_internal_test().
** This function is used for testing only, it is not included in the
** build unless SQLITE_TEST is defined.
**
** The purpose of this is to test that the fts2_tokenizer() function
** can be used as designed by the C-code in the queryTokenizer and
** registerTokenizer() functions above. These two functions are repeated
** in the README.tokenizer file as an example, so it is important to
** test them.
**
** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
** function with no arguments. An assert() will fail if a problem is
** detected. i.e.:
**
** SELECT fts2_tokenizer_internal_test();
**
*/
static void intTestFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
int rc;
const sqlite3_tokenizer_module *p1;
const sqlite3_tokenizer_module *p2;
sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
/* Test the query function */
sqlite3Fts2SimpleTokenizerModule(&p1);
rc = queryTokenizer(db, "simple", &p2);
assert( rc==SQLITE_OK );
assert( p1==p2 );
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_ERROR );
assert( p2==0 );
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
/* Test the storage function */
rc = registerTokenizer(db, "nosuchtokenizer", p1);
assert( rc==SQLITE_OK );
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_OK );
assert( p2==p1 );
sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
}
#endif
/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
** been initialised to use string keys, and to take a private copy
** of the key when a value is inserted. i.e. by a call similar to:
**
** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
**
** This function adds a scalar function (see header comment above
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
** defined at compilation time, a temporary virtual table (see header
** comment above struct HashTableVtab) to the database schema. Both
** provide read/write access to the contents of *pHash.
**
** The third argument to this function, zName, is used as the name
** of both the scalar and, if created, the virtual table.
*/
int sqlite3Fts2InitHashTable(
sqlite3 *db,
fts2Hash *pHash,
const char *zName
){
int rc = SQLITE_OK;
void *p = (void *)pHash;
const int any = SQLITE_ANY;
char *zTest = 0;
char *zTest2 = 0;
#ifdef SQLITE_TEST
void *pdb = (void *)db;
zTest = sqlite3_mprintf("%s_test", zName);
zTest2 = sqlite3_mprintf("%s_internal_test", zName);
if( !zTest || !zTest2 ){
rc = SQLITE_NOMEM;
}
#endif
if( rc!=SQLITE_OK
|| (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
#ifdef SQLITE_TEST
|| (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
#endif
);
sqlite3_free(zTest);
sqlite3_free(zTest2);
return rc;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View file

@ -0,0 +1,145 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _FTS2_TOKENIZER_H_
#define _FTS2_TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface. When a new tokenizer
** implementation is registered, the caller provides a pointer to
** an sqlite3_tokenizer_module containing pointers to the callback
** functions that make up an implementation.
**
** When an fts2 table is created, it passes any arguments passed to
** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
** implementation. The xCreate() function in turn returns an
** sqlite3_tokenizer structure representing the specific tokenizer to
** be used for the fts2 table (customized by the tokenizer clause arguments).
**
** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
** method is called. It returns an sqlite3_tokenizer_cursor object
** that may be used to tokenize a specific input buffer based on
** the tokenization rules supplied by a specific sqlite3_tokenizer
** object.
*/
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
struct sqlite3_tokenizer_module {
/*
** Structure version. Should always be set to 0.
*/
int iVersion;
/*
** Create a new tokenizer. The values in the argv[] array are the
** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
** TABLE statement that created the fts2 table. For example, if
** the following SQL is executed:
**
** CREATE .. USING fts2( ... , tokenizer <tokenizer-name> arg1 arg2)
**
** then argc is set to 2, and the argv[] array contains pointers
** to the strings "arg1" and "arg2".
**
** This method should return either SQLITE_OK (0), or an SQLite error
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
** to point at the newly created tokenizer structure. The generic
** sqlite3_tokenizer.pModule variable should not be initialised by
** this callback. The caller will do so.
*/
int (*xCreate)(
int argc, /* Size of argv array */
const char *const*argv, /* Tokenizer argument strings */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
);
/*
** Destroy an existing tokenizer. The fts2 module calls this method
** exactly once for each successful call to xCreate().
*/
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Create a tokenizer cursor to tokenize an input buffer. The caller
** is responsible for ensuring that the input buffer remains valid
** until the cursor is closed (using the xClose() method).
*/
int (*xOpen)(
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
const char *pInput, int nBytes, /* Input buffer */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
);
/*
** Destroy an existing tokenizer cursor. The fts2 module calls this
** method exactly once for each successful call to xOpen().
*/
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
/*
** Retrieve the next token from the tokenizer cursor pCursor. This
** method should either return SQLITE_OK and set the values of the
** "OUT" variables identified below, or SQLITE_DONE to indicate that
** the end of the buffer has been reached, or an SQLite error code.
**
** *ppToken should be set to point at a buffer containing the
** normalized version of the token (i.e. after any case-folding and/or
** stemming has been performed). *pnBytes should be set to the length
** of this buffer in bytes. The input text that generated the token is
** identified by the byte offsets returned in *piStartOffset and
** *piEndOffset.
**
** The buffer *ppToken is set to point at is managed by the tokenizer
** implementation. It is only required to be valid until the next call
** to xNext() or xClose().
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xNext)(
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
int *piPosition /* OUT: Number of tokens returned before this one */
);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
#endif /* _FTS2_TOKENIZER_H_ */

View file

@ -0,0 +1,229 @@
/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Implementation of the "simple" full-text-search tokenizer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "fts2_tokenizer.h"
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
char delim[128]; /* flag ASCII delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
int iOffset; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *pToken; /* storage for current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module simpleTokenizerModule;
static int simpleDelim(simple_tokenizer *t, unsigned char c){
return c<0x80 && t->delim[c];
}
/*
** Create a new tokenizer instance.
*/
static int simpleCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) calloc(sizeof(*t), 1);
if( t==NULL ) return SQLITE_NOMEM;
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
int i, n = strlen(argv[1]);
for(i=0; i<n; i++){
unsigned char ch = argv[1][i];
/* We explicitly don't support UTF-8 delimiters for now. */
if( ch>=0x80 ){
free(t);
return SQLITE_ERROR;
}
t->delim[ch] = 1;
}
} else {
/* Mark non-alphanumeric ASCII characters as delimiters */
int i;
for(i=1; i<0x80; i++){
t->delim[i] = !isalnum(i);
}
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int simpleOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *pInput, int nBytes, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
simple_tokenizer_cursor *c;
c = (simple_tokenizer_cursor *) malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->pInput = pInput;
if( pInput==0 ){
c->nBytes = 0;
}else if( nBytes<0 ){
c->nBytes = (int)strlen(pInput);
}else{
c->nBytes = nBytes;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->pToken = NULL; /* no space allocated, yet. */
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** simpleOpen() above.
*/
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
free(c->pToken);
free(c);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to simpleOpen().
*/
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
unsigned char *p = (unsigned char *)c->pInput;
while( c->iOffset<c->nBytes ){
int iStartOffset;
/* Scan past delimiter characters */
while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int i, n = c->iOffset-iStartOffset;
if( n>c->nTokenAllocated ){
c->nTokenAllocated = n+20;
c->pToken = realloc(c->pToken, c->nTokenAllocated);
if( c->pToken==NULL ) return SQLITE_NOMEM;
}
for(i=0; i<n; i++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
unsigned char ch = p[iStartOffset+i];
c->pToken[i] = ch<0x80 ? tolower(ch) : ch;
}
*ppToken = c->pToken;
*pnBytes = n;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
};
/*
** Allocate a new simple tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts2SimpleTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &simpleTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View file

@ -0,0 +1,116 @@
#!/usr/bin/tclsh
#
# This script builds a single C code file holding all of FTS2 code.
# The name of the output file is fts2amal.c. To build this file,
# first do:
#
# make target_source
#
# The make target above moves all of the source code files into
# a subdirectory named "tsrc". (This script expects to find the files
# there and will not work if they are not found.)
#
# After the "tsrc" directory has been created and populated, run
# this script:
#
# tclsh mkfts2amal.tcl
#
# The amalgamated FTS2 code will be written into fts2amal.c
#
# Open the output file and write a header comment at the beginning
# of the file.
#
set out [open fts2amal.c w]
set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1]
puts $out [subst \
{/******************************************************************************
** This file is an amalgamation of separate C source files from the SQLite
** Full Text Search extension 2 (fts2). By combining all the individual C
** code files into this single large file, the entire code can be compiled
** as a one translation unit. This allows many compilers to do optimizations
** that would not be possible if the files were compiled separately. It also
** makes the code easier to import into other projects.
**
** This amalgamation was generated on $today.
*/}]
# These are the header files used by FTS2. The first time any of these
# files are seen in a #include statement in the C code, include the complete
# text of the file in-line. The file only needs to be included once.
#
foreach hdr {
fts2.h
fts2_hash.h
fts2_tokenizer.h
sqlite3.h
sqlite3ext.h
} {
set available_hdr($hdr) 1
}
# 78 stars used for comment formatting.
set s78 \
{*****************************************************************************}
# Insert a comment into the code
#
proc section_comment {text} {
global out s78
set n [string length $text]
set nstar [expr {60 - $n}]
set stars [string range $s78 0 $nstar]
puts $out "/************** $text $stars/"
}
# Read the source file named $filename and write it into the
# sqlite3.c output file. If any #include statements are seen,
# process them approprately.
#
proc copy_file {filename} {
global seen_hdr available_hdr out
set tail [file tail $filename]
section_comment "Begin file $tail"
set in [open $filename r]
while {![eof $in]} {
set line [gets $in]
if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} {
if {[info exists available_hdr($hdr)]} {
if {$available_hdr($hdr)} {
section_comment "Include $hdr in the middle of $tail"
copy_file tsrc/$hdr
section_comment "Continuing where we left off in $tail"
}
} elseif {![info exists seen_hdr($hdr)]} {
set seen_hdr($hdr) 1
puts $out $line
}
} elseif {[regexp {^#ifdef __cplusplus} $line]} {
puts $out "#if 0"
} elseif {[regexp {^#line} $line]} {
# Skip #line directives.
} else {
puts $out $line
}
}
close $in
section_comment "End of $tail"
}
# Process the source files. Process files containing commonly
# used subroutines first in order to help the compiler find
# inlining opportunities.
#
foreach file {
fts2.c
fts2_hash.c
fts2_porter.c
fts2_tokenizer.c
fts2_tokenizer1.c
fts2_icu.c
} {
copy_file tsrc/$file
}
close $out

View file

@ -0,0 +1,13 @@
/README.txt/1.1/Mon Aug 20 17:37:03 2007//
/fts3.h/1.1/Mon Aug 20 17:37:04 2007//
/fts3_hash.c/1.4/Sat Nov 24 00:41:52 2007//
/fts3_hash.h/1.2/Thu Sep 20 12:53:28 2007//
/fts3_icu.c/1.2/Wed Oct 24 21:52:37 2007//
/fts3_tokenizer.h/1.1/Mon Aug 20 17:37:04 2007//
/fts3_tokenizer1.c/1.2/Fri Nov 23 17:31:18 2007//
/README.tokenizers/1.2/Thu Dec 13 21:44:26 2007//
/mkfts3amal.tcl/1.2/Thu Jan 31 12:32:59 2008//
/fts3_porter.c/1.2/Fri Feb 1 15:40:34 2008//
/fts3.c/1.15/Sat Feb 2 16:24:34 2008//
/fts3_tokenizer.c/1.3/Sat Feb 2 16:24:34 2008//
D

View file

@ -0,0 +1 @@
sqlite/ext/fts3

View file

@ -0,0 +1 @@
:pserver:drh@sqlite.org:/sqlite

View file

@ -0,0 +1,133 @@
1. FTS3 Tokenizers
When creating a new full-text table, FTS3 allows the user to select
the text tokenizer implementation to be used when indexing text
by specifying a "tokenizer" clause as part of the CREATE VIRTUAL TABLE
statement:
CREATE VIRTUAL TABLE <table-name> USING fts3(
<columns ...> [, tokenizer <tokenizer-name> [<tokenizer-args>]]
);
The built-in tokenizers (valid values to pass as <tokenizer name>) are
"simple" and "porter".
<tokenizer-args> should consist of zero or more white-space separated
arguments to pass to the selected tokenizer implementation. The
interpretation of the arguments, if any, depends on the individual
tokenizer.
2. Custom Tokenizers
FTS3 allows users to provide custom tokenizer implementations. The
interface used to create a new tokenizer is defined and described in
the fts3_tokenizer.h source file.
Registering a new FTS3 tokenizer is similar to registering a new
virtual table module with SQLite. The user passes a pointer to a
structure containing pointers to various callback functions that
make up the implementation of the new tokenizer type. For tokenizers,
the structure (defined in fts3_tokenizer.h) is called
"sqlite3_tokenizer_module".
FTS3 does not expose a C-function that users call to register new
tokenizer types with a database handle. Instead, the pointer must
be encoded as an SQL blob value and passed to FTS3 through the SQL
engine by evaluating a special scalar function, "fts3_tokenizer()".
The fts3_tokenizer() function may be called with one or two arguments,
as follows:
SELECT fts3_tokenizer(<tokenizer-name>);
SELECT fts3_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
Where <tokenizer-name> is a string identifying the tokenizer and
<sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
structure encoded as an SQL blob. If the second argument is present,
it is registered as tokenizer <tokenizer-name> and a copy of it
returned. If only one argument is passed, a pointer to the tokenizer
implementation currently registered as <tokenizer-name> is returned,
encoded as a blob. Or, if no such tokenizer exists, an SQL exception
(error) is raised.
SECURITY: If the fts3 extension is used in an environment where potentially
malicious users may execute arbitrary SQL (i.e. gears), they should be
prevented from invoking the fts3_tokenizer() function, possibly using the
authorisation callback.
See "Sample code" below for an example of calling the fts3_tokenizer()
function from C code.
3. ICU Library Tokenizers
If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
symbol defined, then there exists a built-in tokenizer named "icu"
implemented using the ICU library. The first argument passed to the
xCreate() method (see fts3_tokenizer.h) of this tokenizer may be
an ICU locale identifier. For example "tr_TR" for Turkish as used
in Turkey, or "en_AU" for English as used in Australia. For example:
"CREATE VIRTUAL TABLE thai_text USING fts3(text, tokenizer icu th_TH)"
The ICU tokenizer implementation is very simple. It splits the input
text according to the ICU rules for finding word boundaries and discards
any tokens that consist entirely of white-space. This may be suitable
for some applications in some locales, but not all. If more complex
processing is required, for example to implement stemming or
discard punctuation, this can be done by creating a tokenizer
implementation that uses the ICU tokenizer as part of its implementation.
When using the ICU tokenizer this way, it is safe to overwrite the
contents of the strings returned by the xNext() method (see
fts3_tokenizer.h).
4. Sample code.
The following two code samples illustrate the way C code should invoke
the fts3_tokenizer() scalar function:
int registerTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module *p
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sqlite3_step(pStmt);
return sqlite3_finalize(pStmt);
}
int queryTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts3_tokenizer(?)";
*pp = 0;
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
}
}
return sqlite3_finalize(pStmt);
}

View file

@ -0,0 +1,4 @@
This folder contains source code to the second full-text search
extension for SQLite. While the API is the same, this version uses a
substantially different storage schema from fts1, so tables will need
to be rebuilt.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,26 @@
/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This header file is used by programs that want to link against the
** FTS3 library. All it does is declare the sqlite3Fts3Init() interface.
*/
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int sqlite3Fts3Init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View file

@ -0,0 +1,374 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS3 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS3 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite3.h"
#include "fts3_hash.h"
/*
** Malloc and Free functions
*/
static void *fts3HashMalloc(int n){
void *p = sqlite3_malloc(n);
if( p ){
memset(p, 0, n);
}
return p;
}
static void fts3HashFree(void *p){
sqlite3_free(p);
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants
** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer.
*/
void sqlite3Fts3HashInit(fts3Hash *pNew, int keyClass, int copyKey){
assert( pNew!=0 );
assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY );
pNew->keyClass = keyClass;
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void sqlite3Fts3HashClear(fts3Hash *pH){
fts3HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
fts3HashFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
fts3HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
fts3HashFree(elem->pKey);
}
fts3HashFree(elem);
elem = next_elem;
}
pH->count = 0;
}
/*
** Hash and comparison functions when the mode is FTS3_HASH_STRING
*/
static int fts3StrHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
}
static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is FTS3_HASH_BINARY
*/
static int fts3BinHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "ftsHashFunction". The function takes a
** single parameter "keyClass". The return value of ftsHashFunction()
** is a pointer to another function. Specifically, the return value
** of ftsHashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*ftsHashFunction(int keyClass))(const void*,int){
if( keyClass==FTS3_HASH_STRING ){
return &fts3StrHash;
}else{
assert( keyClass==FTS3_HASH_BINARY );
return &fts3BinHash;
}
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){
if( keyClass==FTS3_HASH_STRING ){
return &fts3StrCompare;
}else{
assert( keyClass==FTS3_HASH_BINARY );
return &fts3BinCompare;
}
}
/* Link an element into the hash table
*/
static void fts3HashInsertElement(
fts3Hash *pH, /* The complete hash table */
struct _fts3ht *pEntry, /* The entry into which pNew is inserted */
fts3HashElem *pNew /* The element to be inserted */
){
fts3HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
*/
static void fts3Rehash(fts3Hash *pH, int new_size){
struct _fts3ht *new_ht; /* The new hash table */
fts3HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) );
if( new_ht==0 ) return;
fts3HashFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = ftsHashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
fts3HashInsertElement(pH, &new_ht[h], elem);
}
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static fts3HashElem *fts3FindElementByHash(
const fts3Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
fts3HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _fts3ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = ftsCompareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void fts3RemoveElementByHash(
fts3Hash *pH, /* The pH containing "elem" */
fts3HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _fts3ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
fts3HashFree(elem->pKey);
}
fts3HashFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
fts3HashClear(pH);
}
}
/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *sqlite3Fts3HashFind(const fts3Hash *pH, const void *pKey, int nKey){
int h; /* A hash on key */
fts3HashElem *elem; /* The element that matches key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = ftsHashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
elem = fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));
return elem ? elem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *sqlite3Fts3HashInsert(
fts3Hash *pH, /* The hash table to insert into */
const void *pKey, /* The key */
int nKey, /* Number of bytes in the key */
void *data /* The data */
){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
fts3HashElem *elem; /* Used to loop thru the element list */
fts3HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = ftsHashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = fts3FindElementByHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
fts3RemoveElementByHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
new_elem = (fts3HashElem*)fts3HashMalloc( sizeof(fts3HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = fts3HashMalloc( nKey );
if( new_elem->pKey==0 ){
fts3HashFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
if( pH->htsize==0 ){
fts3Rehash(pH,8);
if( pH->htsize==0 ){
pH->count = 0;
fts3HashFree(new_elem);
return data;
}
}
if( pH->count > pH->htsize ){
fts3Rehash(pH,pH->htsize*2);
}
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
fts3HashInsertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View file

@ -0,0 +1,110 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implemenation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _FTS3_HASH_H_
#define _FTS3_HASH_H_
/* Forward declarations of structures. */
typedef struct fts3Hash fts3Hash;
typedef struct fts3HashElem fts3HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct fts3Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
fts3HashElem *first; /* The first element of the array */
int htsize; /* Number of buckets in the hash table */
struct _fts3ht { /* the hash table */
int count; /* Number of entries with this hash */
fts3HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct fts3HashElem {
fts3HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 2 different modes of operation for a hash table:
**
** FTS3_HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** FTS3_HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made if the copyKey parameter to fts3HashInit is 1.
*/
#define FTS3_HASH_STRING 1
#define FTS3_HASH_BINARY 2
/*
** Access routines. To delete, insert a NULL pointer.
*/
void sqlite3Fts3HashInit(fts3Hash*, int keytype, int copyKey);
void *sqlite3Fts3HashInsert(fts3Hash*, const void *pKey, int nKey, void *pData);
void *sqlite3Fts3HashFind(const fts3Hash*, const void *pKey, int nKey);
void sqlite3Fts3HashClear(fts3Hash*);
/*
** Shorthand for the functions above
*/
#define fts3HashInit sqlite3Fts3HashInit
#define fts3HashInsert sqlite3Fts3HashInsert
#define fts3HashFind sqlite3Fts3HashFind
#define fts3HashClear sqlite3Fts3HashClear
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** fts3Hash h;
** fts3HashElem *p;
** ...
** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){
** SomeStructure *pData = fts3HashData(p);
** // do something with pData
** }
*/
#define fts3HashFirst(H) ((H)->first)
#define fts3HashNext(E) ((E)->next)
#define fts3HashData(E) ((E)->data)
#define fts3HashKey(E) ((E)->pKey)
#define fts3HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define fts3HashCount(H) ((H)->count)
#endif /* _FTS3_HASH_H_ */

View file

@ -0,0 +1,258 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements a tokenizer for fts3 based on the ICU library.
**
** $Id: fts3_icu.c,v 1.2 2007/10/24 21:52:37 shess Exp $
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#ifdef SQLITE_ENABLE_ICU
#include <assert.h>
#include <string.h>
#include "fts3_tokenizer.h"
#include <unicode/ubrk.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/utf16.h>
typedef struct IcuTokenizer IcuTokenizer;
typedef struct IcuCursor IcuCursor;
struct IcuTokenizer {
sqlite3_tokenizer base;
char *zLocale;
};
struct IcuCursor {
sqlite3_tokenizer_cursor base;
UBreakIterator *pIter; /* ICU break-iterator object */
int nChar; /* Number of UChar elements in pInput */
UChar *aChar; /* Copy of input using utf-16 encoding */
int *aOffset; /* Offsets of each character in utf-8 input */
int nBuffer;
char *zBuffer;
int iToken;
};
/*
** Create a new tokenizer instance.
*/
static int icuCreate(
int argc, /* Number of entries in argv[] */
const char * const *argv, /* Tokenizer creation arguments */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
){
IcuTokenizer *p;
int n = 0;
if( argc>0 ){
n = strlen(argv[0])+1;
}
p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
if( !p ){
return SQLITE_NOMEM;
}
memset(p, 0, sizeof(IcuTokenizer));
if( n ){
p->zLocale = (char *)&p[1];
memcpy(p->zLocale, argv[0], n);
}
*ppTokenizer = (sqlite3_tokenizer *)p;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
sqlite3_free(p);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int icuOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, /* Input string */
int nInput, /* Length of zInput in bytes */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
IcuCursor *pCsr;
const int32_t opt = U_FOLD_CASE_DEFAULT;
UErrorCode status = U_ZERO_ERROR;
int nChar;
UChar32 c;
int iInput = 0;
int iOut = 0;
*ppCursor = 0;
if( -1 == nInput ) nInput = strlen(nInput);
nChar = nInput+1;
pCsr = (IcuCursor *)sqlite3_malloc(
sizeof(IcuCursor) + /* IcuCursor */
nChar * sizeof(UChar) + /* IcuCursor.aChar[] */
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
);
if( !pCsr ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(IcuCursor));
pCsr->aChar = (UChar *)&pCsr[1];
pCsr->aOffset = (int *)&pCsr->aChar[nChar];
pCsr->aOffset[iOut] = iInput;
U8_NEXT(zInput, iInput, nInput, c);
while( c>0 ){
int isError = 0;
c = u_foldCase(c, opt);
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
if( isError ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->aOffset[iOut] = iInput;
if( iInput<nInput ){
U8_NEXT(zInput, iInput, nInput, c);
}else{
c = 0;
}
}
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
if( !U_SUCCESS(status) ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->nChar = iOut;
ubrk_first(pCsr->pIter);
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to icuOpen().
*/
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
IcuCursor *pCsr = (IcuCursor *)pCursor;
ubrk_close(pCsr->pIter);
sqlite3_free(pCsr->zBuffer);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor.
*/
static int icuNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
IcuCursor *pCsr = (IcuCursor *)pCursor;
int iStart = 0;
int iEnd = 0;
int nByte = 0;
while( iStart==iEnd ){
UChar32 c;
iStart = ubrk_current(pCsr->pIter);
iEnd = ubrk_next(pCsr->pIter);
if( iEnd==UBRK_DONE ){
return SQLITE_DONE;
}
while( iStart<iEnd ){
int iWhite = iStart;
U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
if( u_isspace(c) ){
iStart = iWhite;
}else{
break;
}
}
assert(iStart<=iEnd);
}
do {
UErrorCode status = U_ZERO_ERROR;
if( nByte ){
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
if( !zNew ){
return SQLITE_NOMEM;
}
pCsr->zBuffer = zNew;
pCsr->nBuffer = nByte;
}
u_strToUTF8(
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
&status /* Output success/failure */
);
} while( nByte>pCsr->nBuffer );
*ppToken = pCsr->zBuffer;
*pnBytes = nByte;
*piStartOffset = pCsr->aOffset[iStart];
*piEndOffset = pCsr->aOffset[iEnd];
*piPosition = pCsr->iToken++;
return SQLITE_OK;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module icuTokenizerModule = {
0, /* iVersion */
icuCreate, /* xCreate */
icuDestroy, /* xCreate */
icuOpen, /* xOpen */
icuClose, /* xClose */
icuNext, /* xNext */
};
/*
** Set *ppModule to point at the implementation of the ICU tokenizer.
*/
void sqlite3Fts3IcuTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &icuTokenizerModule;
}
#endif /* defined(SQLITE_ENABLE_ICU) */
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View file

@ -0,0 +1,642 @@
/*
** 2006 September 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** Implementation of the full-text-search tokenizer that implements
** a Porter stemmer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS3 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS3 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "fts3_tokenizer.h"
/*
** Class derived from sqlite3_tokenizer
*/
typedef struct porter_tokenizer {
sqlite3_tokenizer base; /* Base class */
} porter_tokenizer;
/*
** Class derived from sqlit3_tokenizer_cursor
*/
typedef struct porter_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *zInput; /* input we are tokenizing */
int nInput; /* size of the input */
int iOffset; /* current position in zInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nAllocated; /* space allocated to zToken buffer */
} porter_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module porterTokenizerModule;
/*
** Create a new tokenizer instance.
*/
static int porterCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
porter_tokenizer *t;
t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t));
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
sqlite3_free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is zInput[0..nInput-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int porterOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, int nInput, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
porter_tokenizer_cursor *c;
c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->zInput = zInput;
if( zInput==0 ){
c->nInput = 0;
}else if( nInput<0 ){
c->nInput = (int)strlen(zInput);
}else{
c->nInput = nInput;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** porterOpen() above.
*/
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
sqlite3_free(c->zToken);
sqlite3_free(c);
return SQLITE_OK;
}
/*
** Vowel or consonant
*/
static const char cType[] = {
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 2, 1
};
/*
** isConsonant() and isVowel() determine if their first character in
** the string they point to is a consonant or a vowel, according
** to Porter ruls.
**
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
** 'Y' is a consonant unless it follows another consonant,
** in which case it is a vowel.
**
** In these routine, the letters are in reverse order. So the 'y' rule
** is that 'y' is a consonant unless it is followed by another
** consonent.
*/
static int isVowel(const char*);
static int isConsonant(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return j;
return z[1]==0 || isVowel(z + 1);
}
static int isVowel(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return 1-j;
return isConsonant(z + 1);
}
/*
** Let any sequence of one or more vowels be represented by V and let
** C be sequence of one or more consonants. Then every word can be
** represented as:
**
** [C] (VC){m} [V]
**
** In prose: A word is an optional consonant followed by zero or
** vowel-consonant pairs followed by an optional vowel. "m" is the
** number of vowel consonant pairs. This routine computes the value
** of m for the first i bytes of a word.
**
** Return true if the m-value for z is 1 or more. In other words,
** return true if z contains at least one vowel that is followed
** by a consonant.
**
** In this routine z[] is in reverse order. So we are really looking
** for an instance of of a consonant followed by a vowel.
*/
static int m_gt_0(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/* Like mgt0 above except we are looking for a value of m which is
** exactly 1
*/
static int m_eq_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 1;
while( isConsonant(z) ){ z++; }
return *z==0;
}
/* Like mgt0 above except we are looking for a value of m>1 instead
** or m>0
*/
static int m_gt_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if there is a vowel anywhere within z[0..n-1]
*/
static int hasVowel(const char *z){
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if the word ends in a double consonant.
**
** The text is reversed here. So we are really looking at
** the first two characters of z[].
*/
static int doubleConsonant(const char *z){
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
}
/*
** Return TRUE if the word ends with three letters which
** are consonant-vowel-consonent and where the final consonant
** is not 'w', 'x', or 'y'.
**
** The word is reversed here. So we are really checking the
** first three letters and the first one cannot be in [wxy].
*/
static int star_oh(const char *z){
return
z[0]!=0 && isConsonant(z) &&
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
z[1]!=0 && isVowel(z+1) &&
z[2]!=0 && isConsonant(z+2);
}
/*
** If the word ends with zFrom and xCond() is true for the stem
** of the word that preceeds the zFrom ending, then change the
** ending to zTo.
**
** The input word *pz and zFrom are both in reverse order. zTo
** is in normal order.
**
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
** match. Not that TRUE is returned even if xCond() fails and
** no substitution occurs.
*/
static int stem(
char **pz, /* The word being stemmed (Reversed) */
const char *zFrom, /* If the ending matches this... (Reversed) */
const char *zTo, /* ... change the ending to this (not reversed) */
int (*xCond)(const char*) /* Condition that must be true */
){
char *z = *pz;
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
if( *zFrom!=0 ) return 0;
if( xCond && !xCond(z) ) return 1;
while( *zTo ){
*(--z) = *(zTo++);
}
*pz = z;
return 1;
}
/*
** This is the fallback stemmer used when the porter stemmer is
** inappropriate. The input word is copied into the output with
** US-ASCII case folding. If the input word is too long (more
** than 20 bytes if it contains no digits or more than 6 bytes if
** it contains digits) then word is truncated to 20 or 6 bytes
** by taking 10 or 3 bytes from the beginning and end.
*/
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, mx, j;
int hasDigit = 0;
for(i=0; i<nIn; i++){
int c = zIn[i];
if( c>='A' && c<='Z' ){
zOut[i] = c - 'A' + 'a';
}else{
if( c>='0' && c<='9' ) hasDigit = 1;
zOut[i] = c;
}
}
mx = hasDigit ? 3 : 10;
if( nIn>mx*2 ){
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
zOut[j] = zOut[i];
}
i = j;
}
zOut[i] = 0;
*pnOut = i;
}
/*
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
** zOut is at least big enough to hold nIn bytes. Write the actual
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
**
** Any upper-case characters in the US-ASCII character set ([A-Z])
** are converted to lower case. Upper-case UTF characters are
** unchanged.
**
** Words that are longer than about 20 bytes are stemmed by retaining
** a few bytes from the beginning and the end of the word. If the
** word contains digits, 3 bytes are taken from the beginning and
** 3 bytes from the end. For long words without digits, 10 bytes
** are taken from each end. US-ASCII case folding still applies.
**
** If the input word contains not digits but does characters not
** in [a-zA-Z] then no stemming is attempted and this routine just
** copies the input into the input into the output with US-ASCII
** case folding.
**
** Stemming never increases the length of the word. So there is
** no chance of overflowing the zOut buffer.
*/
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, j, c;
char zReverse[28];
char *z, *z2;
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
/* The word is too big or too small for the porter stemmer.
** Fallback to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
c = zIn[i];
if( c>='A' && c<='Z' ){
zReverse[j] = c + 'a' - 'A';
}else if( c>='a' && c<='z' ){
zReverse[j] = c;
}else{
/* The use of a character not in [a-zA-Z] means that we fallback
** to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
}
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
z = &zReverse[j+1];
/* Step 1a */
if( z[0]=='s' ){
if(
!stem(&z, "sess", "ss", 0) &&
!stem(&z, "sei", "i", 0) &&
!stem(&z, "ss", "ss", 0)
){
z++;
}
}
/* Step 1b */
z2 = z;
if( stem(&z, "dee", "ee", m_gt_0) ){
/* Do nothing. The work was all in the test */
}else if(
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
&& z!=z2
){
if( stem(&z, "ta", "ate", 0) ||
stem(&z, "lb", "ble", 0) ||
stem(&z, "zi", "ize", 0) ){
/* Do nothing. The work was all in the test */
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
z++;
}else if( m_eq_1(z) && star_oh(z) ){
*(--z) = 'e';
}
}
/* Step 1c */
if( z[0]=='y' && hasVowel(z+1) ){
z[0] = 'i';
}
/* Step 2 */
switch( z[1] ){
case 'a':
stem(&z, "lanoita", "ate", m_gt_0) ||
stem(&z, "lanoit", "tion", m_gt_0);
break;
case 'c':
stem(&z, "icne", "ence", m_gt_0) ||
stem(&z, "icna", "ance", m_gt_0);
break;
case 'e':
stem(&z, "rezi", "ize", m_gt_0);
break;
case 'g':
stem(&z, "igol", "log", m_gt_0);
break;
case 'l':
stem(&z, "ilb", "ble", m_gt_0) ||
stem(&z, "illa", "al", m_gt_0) ||
stem(&z, "iltne", "ent", m_gt_0) ||
stem(&z, "ile", "e", m_gt_0) ||
stem(&z, "ilsuo", "ous", m_gt_0);
break;
case 'o':
stem(&z, "noitazi", "ize", m_gt_0) ||
stem(&z, "noita", "ate", m_gt_0) ||
stem(&z, "rota", "ate", m_gt_0);
break;
case 's':
stem(&z, "msila", "al", m_gt_0) ||
stem(&z, "ssenevi", "ive", m_gt_0) ||
stem(&z, "ssenluf", "ful", m_gt_0) ||
stem(&z, "ssensuo", "ous", m_gt_0);
break;
case 't':
stem(&z, "itila", "al", m_gt_0) ||
stem(&z, "itivi", "ive", m_gt_0) ||
stem(&z, "itilib", "ble", m_gt_0);
break;
}
/* Step 3 */
switch( z[0] ){
case 'e':
stem(&z, "etaci", "ic", m_gt_0) ||
stem(&z, "evita", "", m_gt_0) ||
stem(&z, "ezila", "al", m_gt_0);
break;
case 'i':
stem(&z, "itici", "ic", m_gt_0);
break;
case 'l':
stem(&z, "laci", "ic", m_gt_0) ||
stem(&z, "luf", "", m_gt_0);
break;
case 's':
stem(&z, "ssen", "", m_gt_0);
break;
}
/* Step 4 */
switch( z[1] ){
case 'a':
if( z[0]=='l' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'c':
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'e':
if( z[0]=='r' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'i':
if( z[0]=='c' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'l':
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'n':
if( z[0]=='t' ){
if( z[2]=='a' ){
if( m_gt_1(z+3) ){
z += 3;
}
}else if( z[2]=='e' ){
stem(&z, "tneme", "", m_gt_1) ||
stem(&z, "tnem", "", m_gt_1) ||
stem(&z, "tne", "", m_gt_1);
}
}
break;
case 'o':
if( z[0]=='u' ){
if( m_gt_1(z+2) ){
z += 2;
}
}else if( z[3]=='s' || z[3]=='t' ){
stem(&z, "noi", "", m_gt_1);
}
break;
case 's':
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
case 't':
stem(&z, "eta", "", m_gt_1) ||
stem(&z, "iti", "", m_gt_1);
break;
case 'u':
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
z += 3;
}
break;
case 'v':
case 'z':
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
}
/* Step 5a */
if( z[0]=='e' ){
if( m_gt_1(z+1) ){
z++;
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
z++;
}
}
/* Step 5b */
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
z++;
}
/* z[] is now the stemmed word in reverse order. Flip it back
** around into forward order and return.
*/
*pnOut = i = strlen(z);
zOut[i] = 0;
while( *z ){
zOut[--i] = *(z++);
}
}
/*
** Characters that can be part of a token. We assume any character
** whose value is greater than 0x80 (any UTF character) can be
** part of a token. In other words, delimiters all must have
** values of 0x7f or lower.
*/
static const char porterIdChar[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to porterOpen().
*/
static int porterNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
const char **pzToken, /* OUT: *pzToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
const char *z = c->zInput;
while( c->iOffset<c->nInput ){
int iStartOffset, ch;
/* Scan past delimiter characters */
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int n = c->iOffset-iStartOffset;
if( n>c->nAllocated ){
c->nAllocated = n+20;
c->zToken = sqlite3_realloc(c->zToken, c->nAllocated);
if( c->zToken==NULL ) return SQLITE_NOMEM;
}
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
*pzToken = c->zToken;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the porter-stemmer tokenizer
*/
static const sqlite3_tokenizer_module porterTokenizerModule = {
0,
porterCreate,
porterDestroy,
porterOpen,
porterClose,
porterNext,
};
/*
** Allocate a new porter tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts3PorterTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &porterTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View file

@ -0,0 +1,371 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is part of an SQLite module implementing full-text search.
** This particular file implements the generic tokenizer interface.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS3 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS3 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include "sqlite3ext.h"
#ifndef SQLITE_CORE
SQLITE_EXTENSION_INIT1
#endif
#include "fts3_hash.h"
#include "fts3_tokenizer.h"
#include <assert.h>
/*
** Implementation of the SQL scalar function for accessing the underlying
** hash table. This function may be called as follows:
**
** SELECT <function-name>(<key-name>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer').
**
** If the <pointer> argument is specified, it must be a blob value
** containing a pointer to be stored as the hash data corresponding
** to the string <key-name>. If <pointer> is not specified, then
** the string <key-name> must already exist in the has table. Otherwise,
** an error is returned.
**
** Whether or not the <pointer> argument is specified, the value returned
** is a blob containing the pointer stored as the hash data corresponding
** to string <key-name> (after the hash-table is updated, if applicable).
*/
static void scalarFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts3Hash *pHash;
void *pPtr = 0;
const unsigned char *zName;
int nName;
assert( argc==1 || argc==2 );
pHash = (fts3Hash *)sqlite3_user_data(context);
zName = sqlite3_value_text(argv[0]);
nName = sqlite3_value_bytes(argv[0])+1;
if( argc==2 ){
void *pOld;
int n = sqlite3_value_bytes(argv[1]);
if( n!=sizeof(pPtr) ){
sqlite3_result_error(context, "argument type mismatch", -1);
return;
}
pPtr = *(void **)sqlite3_value_blob(argv[1]);
pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr);
if( pOld==pPtr ){
sqlite3_result_error(context, "out of memory", -1);
return;
}
}else{
pPtr = sqlite3Fts3HashFind(pHash, zName, nName);
if( !pPtr ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
}
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}
#ifdef SQLITE_TEST
#include <tcl.h>
#include <string.h>
/*
** Implementation of a special SQL scalar function for testing tokenizers
** designed to be used in concert with the Tcl testing framework. This
** function must be called with two arguments:
**
** SELECT <function-name>(<key-name>, <input-string>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer')
** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test').
**
** The return value is a string that may be interpreted as a Tcl
** list. For each token in the <input-string>, three elements are
** added to the returned list. The first is the token position, the
** second is the token text (folded, stemmed, etc.) and the third is the
** substring of <input-string> associated with the token. For example,
** using the built-in "simple" tokenizer:
**
** SELECT fts_tokenizer_test('simple', 'I don't see how');
**
** will return the string:
**
** "{0 i I 1 dont don't 2 see see 3 how how}"
**
*/
static void testFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts3Hash *pHash;
sqlite3_tokenizer_module *p;
sqlite3_tokenizer *pTokenizer = 0;
sqlite3_tokenizer_cursor *pCsr = 0;
const char *zErr = 0;
const char *zName;
int nName;
const char *zInput;
int nInput;
const char *zArg = 0;
const char *zToken;
int nToken;
int iStart;
int iEnd;
int iPos;
Tcl_Obj *pRet;
assert( argc==2 || argc==3 );
nName = sqlite3_value_bytes(argv[0]);
zName = (const char *)sqlite3_value_text(argv[0]);
nInput = sqlite3_value_bytes(argv[argc-1]);
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
if( argc==3 ){
zArg = (const char *)sqlite3_value_text(argv[1]);
}
pHash = (fts3Hash *)sqlite3_user_data(context);
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
if( !p ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
zErr = "error in xCreate()";
goto finish;
}
pTokenizer->pModule = p;
if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
zErr = "error in xOpen()";
goto finish;
}
pCsr->pTokenizer = pTokenizer;
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
zToken = &zInput[iStart];
nToken = iEnd-iStart;
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
}
if( SQLITE_OK!=p->xClose(pCsr) ){
zErr = "error in xClose()";
goto finish;
}
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
zErr = "error in xDestroy()";
goto finish;
}
finish:
if( zErr ){
sqlite3_result_error(context, zErr, -1);
}else{
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
}
Tcl_DecrRefCount(pRet);
}
static
int registerTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module *p
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sqlite3_step(pStmt);
return sqlite3_finalize(pStmt);
}
static
int queryTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts3_tokenizer(?)";
*pp = 0;
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
}
}
return sqlite3_finalize(pStmt);
}
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
/*
** Implementation of the scalar function fts3_tokenizer_internal_test().
** This function is used for testing only, it is not included in the
** build unless SQLITE_TEST is defined.
**
** The purpose of this is to test that the fts3_tokenizer() function
** can be used as designed by the C-code in the queryTokenizer and
** registerTokenizer() functions above. These two functions are repeated
** in the README.tokenizer file as an example, so it is important to
** test them.
**
** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar
** function with no arguments. An assert() will fail if a problem is
** detected. i.e.:
**
** SELECT fts3_tokenizer_internal_test();
**
*/
static void intTestFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
int rc;
const sqlite3_tokenizer_module *p1;
const sqlite3_tokenizer_module *p2;
sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
/* Test the query function */
sqlite3Fts3SimpleTokenizerModule(&p1);
rc = queryTokenizer(db, "simple", &p2);
assert( rc==SQLITE_OK );
assert( p1==p2 );
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_ERROR );
assert( p2==0 );
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
/* Test the storage function */
rc = registerTokenizer(db, "nosuchtokenizer", p1);
assert( rc==SQLITE_OK );
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_OK );
assert( p2==p1 );
sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
}
#endif
/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
** been initialised to use string keys, and to take a private copy
** of the key when a value is inserted. i.e. by a call similar to:
**
** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
**
** This function adds a scalar function (see header comment above
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
** defined at compilation time, a temporary virtual table (see header
** comment above struct HashTableVtab) to the database schema. Both
** provide read/write access to the contents of *pHash.
**
** The third argument to this function, zName, is used as the name
** of both the scalar and, if created, the virtual table.
*/
int sqlite3Fts3InitHashTable(
sqlite3 *db,
fts3Hash *pHash,
const char *zName
){
int rc = SQLITE_OK;
void *p = (void *)pHash;
const int any = SQLITE_ANY;
char *zTest = 0;
char *zTest2 = 0;
#ifdef SQLITE_TEST
void *pdb = (void *)db;
zTest = sqlite3_mprintf("%s_test", zName);
zTest2 = sqlite3_mprintf("%s_internal_test", zName);
if( !zTest || !zTest2 ){
rc = SQLITE_NOMEM;
}
#endif
if( rc!=SQLITE_OK
|| (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
#ifdef SQLITE_TEST
|| (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
#endif
);
sqlite3_free(zTest);
sqlite3_free(zTest2);
return rc;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View file

@ -0,0 +1,145 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _FTS3_TOKENIZER_H_
#define _FTS3_TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface. When a new tokenizer
** implementation is registered, the caller provides a pointer to
** an sqlite3_tokenizer_module containing pointers to the callback
** functions that make up an implementation.
**
** When an fts3 table is created, it passes any arguments passed to
** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
** implementation. The xCreate() function in turn returns an
** sqlite3_tokenizer structure representing the specific tokenizer to
** be used for the fts3 table (customized by the tokenizer clause arguments).
**
** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
** method is called. It returns an sqlite3_tokenizer_cursor object
** that may be used to tokenize a specific input buffer based on
** the tokenization rules supplied by a specific sqlite3_tokenizer
** object.
*/
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
struct sqlite3_tokenizer_module {
/*
** Structure version. Should always be set to 0.
*/
int iVersion;
/*
** Create a new tokenizer. The values in the argv[] array are the
** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
** TABLE statement that created the fts3 table. For example, if
** the following SQL is executed:
**
** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
**
** then argc is set to 2, and the argv[] array contains pointers
** to the strings "arg1" and "arg2".
**
** This method should return either SQLITE_OK (0), or an SQLite error
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
** to point at the newly created tokenizer structure. The generic
** sqlite3_tokenizer.pModule variable should not be initialised by
** this callback. The caller will do so.
*/
int (*xCreate)(
int argc, /* Size of argv array */
const char *const*argv, /* Tokenizer argument strings */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
);
/*
** Destroy an existing tokenizer. The fts3 module calls this method
** exactly once for each successful call to xCreate().
*/
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Create a tokenizer cursor to tokenize an input buffer. The caller
** is responsible for ensuring that the input buffer remains valid
** until the cursor is closed (using the xClose() method).
*/
int (*xOpen)(
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
const char *pInput, int nBytes, /* Input buffer */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
);
/*
** Destroy an existing tokenizer cursor. The fts3 module calls this
** method exactly once for each successful call to xOpen().
*/
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
/*
** Retrieve the next token from the tokenizer cursor pCursor. This
** method should either return SQLITE_OK and set the values of the
** "OUT" variables identified below, or SQLITE_DONE to indicate that
** the end of the buffer has been reached, or an SQLite error code.
**
** *ppToken should be set to point at a buffer containing the
** normalized version of the token (i.e. after any case-folding and/or
** stemming has been performed). *pnBytes should be set to the length
** of this buffer in bytes. The input text that generated the token is
** identified by the byte offsets returned in *piStartOffset and
** *piEndOffset.
**
** The buffer *ppToken is set to point at is managed by the tokenizer
** implementation. It is only required to be valid until the next call
** to xNext() or xClose().
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xNext)(
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
int *piPosition /* OUT: Number of tokens returned before this one */
);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
#endif /* _FTS3_TOKENIZER_H_ */

View file

@ -0,0 +1,230 @@
/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Implementation of the "simple" full-text-search tokenizer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS3 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS3 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "fts3_tokenizer.h"
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
char delim[128]; /* flag ASCII delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
int iOffset; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *pToken; /* storage for current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module simpleTokenizerModule;
static int simpleDelim(simple_tokenizer *t, unsigned char c){
return c<0x80 && t->delim[c];
}
/*
** Create a new tokenizer instance.
*/
static int simpleCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t));
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
int i, n = strlen(argv[1]);
for(i=0; i<n; i++){
unsigned char ch = argv[1][i];
/* We explicitly don't support UTF-8 delimiters for now. */
if( ch>=0x80 ){
sqlite3_free(t);
return SQLITE_ERROR;
}
t->delim[ch] = 1;
}
} else {
/* Mark non-alphanumeric ASCII characters as delimiters */
int i;
for(i=1; i<0x80; i++){
t->delim[i] = !isalnum(i);
}
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
sqlite3_free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int simpleOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *pInput, int nBytes, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
simple_tokenizer_cursor *c;
c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->pInput = pInput;
if( pInput==0 ){
c->nBytes = 0;
}else if( nBytes<0 ){
c->nBytes = (int)strlen(pInput);
}else{
c->nBytes = nBytes;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->pToken = NULL; /* no space allocated, yet. */
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** simpleOpen() above.
*/
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
sqlite3_free(c->pToken);
sqlite3_free(c);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to simpleOpen().
*/
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
unsigned char *p = (unsigned char *)c->pInput;
while( c->iOffset<c->nBytes ){
int iStartOffset;
/* Scan past delimiter characters */
while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int i, n = c->iOffset-iStartOffset;
if( n>c->nTokenAllocated ){
c->nTokenAllocated = n+20;
c->pToken = sqlite3_realloc(c->pToken, c->nTokenAllocated);
if( c->pToken==NULL ) return SQLITE_NOMEM;
}
for(i=0; i<n; i++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
unsigned char ch = p[iStartOffset+i];
c->pToken[i] = ch<0x80 ? tolower(ch) : ch;
}
*ppToken = c->pToken;
*pnBytes = n;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
};
/*
** Allocate a new simple tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts3SimpleTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &simpleTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View file

@ -0,0 +1,115 @@
#!/usr/bin/tclsh
#
# This script builds a single C code file holding all of FTS3 code.
# The name of the output file is fts3amal.c. To build this file,
# first do:
#
# make target_source
#
# The make target above moves all of the source code files into
# a subdirectory named "tsrc". (This script expects to find the files
# there and will not work if they are not found.)
#
# After the "tsrc" directory has been created and populated, run
# this script:
#
# tclsh mkfts3amal.tcl
#
# The amalgamated FTS3 code will be written into fts3amal.c
#
# Open the output file and write a header comment at the beginning
# of the file.
#
set out [open fts3amal.c w]
set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1]
puts $out [subst \
{/******************************************************************************
** This file is an amalgamation of separate C source files from the SQLite
** Full Text Search extension 2 (fts3). By combining all the individual C
** code files into this single large file, the entire code can be compiled
** as a one translation unit. This allows many compilers to do optimizations
** that would not be possible if the files were compiled separately. It also
** makes the code easier to import into other projects.
**
** This amalgamation was generated on $today.
*/}]
# These are the header files used by FTS3. The first time any of these
# files are seen in a #include statement in the C code, include the complete
# text of the file in-line. The file only needs to be included once.
#
foreach hdr {
fts3.h
fts3_hash.h
fts3_tokenizer.h
sqlite3.h
sqlite3ext.h
} {
set available_hdr($hdr) 1
}
# 78 stars used for comment formatting.
set s78 \
{*****************************************************************************}
# Insert a comment into the code
#
proc section_comment {text} {
global out s78
set n [string length $text]
set nstar [expr {60 - $n}]
set stars [string range $s78 0 $nstar]
puts $out "/************** $text $stars/"
}
# Read the source file named $filename and write it into the
# sqlite3.c output file. If any #include statements are seen,
# process them approprately.
#
proc copy_file {filename} {
global seen_hdr available_hdr out
set tail [file tail $filename]
section_comment "Begin file $tail"
set in [open $filename r]
while {![eof $in]} {
set line [gets $in]
if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} {
if {[info exists available_hdr($hdr)]} {
if {$available_hdr($hdr)} {
section_comment "Include $hdr in the middle of $tail"
copy_file tsrc/$hdr
section_comment "Continuing where we left off in $tail"
}
} elseif {![info exists seen_hdr($hdr)]} {
set seen_hdr($hdr) 1
puts $out $line
}
} elseif {[regexp {^#ifdef __cplusplus} $line]} {
puts $out "#if 0"
} elseif {[regexp {^#line} $line]} {
# Skip #line directives.
} else {
puts $out $line
}
}
close $in
section_comment "End of $tail"
}
# Process the source files. Process files containing commonly
# used subroutines first in order to help the compiler find
# inlining opportunities.
#
foreach file {
fts3.c
fts3_hash.c
fts3_porter.c
fts3_tokenizer.c
fts3_tokenizer1.c
} {
copy_file tsrc/$file
}
close $out

View file

@ -0,0 +1,3 @@
/README.txt/1.2/Fri Jun 22 15:21:16 2007//
/icu.c/1.7/Thu Dec 13 21:54:11 2007//
D

View file

@ -0,0 +1 @@
sqlite/ext/icu

View file

@ -0,0 +1 @@
:pserver:drh@sqlite.org:/sqlite

View file

@ -0,0 +1,170 @@
This directory contains source code for the SQLite "ICU" extension, an
integration of the "International Components for Unicode" library with
SQLite. Documentation follows.
1. Features
1.1 SQL Scalars upper() and lower()
1.2 Unicode Aware LIKE Operator
1.3 ICU Collation Sequences
1.4 SQL REGEXP Operator
2. Compilation and Usage
3. Bugs, Problems and Security Issues
3.1 The "case_sensitive_like" Pragma
3.2 The SQLITE_MAX_LIKE_PATTERN_LENGTH Macro
3.3 Collation Sequence Security Issue
1. FEATURES
1.1 SQL Scalars upper() and lower()
SQLite's built-in implementations of these two functions only
provide case mapping for the 26 letters used in the English
language. The ICU based functions provided by this extension
provide case mapping, where defined, for the full range of
unicode characters.
ICU provides two types of case mapping, "general" case mapping and
"language specific". Refer to ICU documentation for the differences
between the two. Specifically:
http://www.icu-project.org/userguide/caseMappings.html
http://www.icu-project.org/userguide/posix.html#case_mappings
To utilise "general" case mapping, the upper() or lower() scalar
functions are invoked with one argument:
upper('ABC') -> 'abc'
lower('abc') -> 'ABC'
To access ICU "language specific" case mapping, upper() or lower()
should be invoked with two arguments. The second argument is the name
of the locale to use. Passing an empty string ("") or SQL NULL value
as the second argument is the same as invoking the 1 argument version
of upper() or lower():
lower('I', 'en_us') -> 'i'
lower('I', 'tr_tr') -> 'ı' (small dotless i)
1.2 Unicode Aware LIKE Operator
Similarly to the upper() and lower() functions, the built-in SQLite LIKE
operator understands case equivalence for the 26 letters of the English
language alphabet. The implementation of LIKE included in this
extension uses the ICU function u_foldCase() to provide case
independent comparisons for the full range of unicode characters.
The U_FOLD_CASE_DEFAULT flag is passed to u_foldCase(), meaning the
dotless 'I' character used in the Turkish language is considered
to be in the same equivalence class as the dotted 'I' character
used by many languages (including English).
1.3 ICU Collation Sequences
A special SQL scalar function, icu_load_collation() is provided that
may be used to register ICU collation sequences with SQLite. It
is always called with exactly two arguments, the ICU locale
identifying the collation sequence to ICU, and the name of the
SQLite collation sequence to create. For example, to create an
SQLite collation sequence named "turkish" using Turkish language
sorting rules, the SQL statement:
SELECT icu_load_collation('tr_TR', 'turkish');
Or, for Australian English:
SELECT icu_load_collation('en_AU', 'australian');
The identifiers "turkish" and "australian" may then be used
as collation sequence identifiers in SQL statements:
CREATE TABLE aust_turkish_penpals(
australian_penpal_name TEXT COLLATE australian,
turkish_penpal_name TEXT COLLATE turkish
);
1.4 SQL REGEXP Operator
This extension provides an implementation of the SQL binary
comparision operator "REGEXP", based on the regular expression functions
provided by the ICU library. The syntax of the operator is as described
in SQLite documentation:
<string> REGEXP <re-pattern>
This extension uses the ICU defaults for regular expression matching
behaviour. Specifically, this means that:
* Matching is case-sensitive,
* Regular expression comments are not allowed within patterns, and
* The '^' and '$' characters match the beginning and end of the
<string> argument, not the beginning and end of lines within
the <string> argument.
Even more specifically, the value passed to the "flags" parameter
of ICU C function uregex_open() is 0.
2 COMPILATION AND USAGE
The easiest way to compile and use the ICU extension is to build
and use it as a dynamically loadable SQLite extension. To do this
using gcc on *nix:
gcc -shared icu.c `icu-config --ldflags` -o libSqliteIcu.so
You may need to add "-I" flags so that gcc can find sqlite3ext.h
and sqlite3.h. The resulting shared lib, libSqliteIcu.so, may be
loaded into sqlite in the same way as any other dynamically loadable
extension.
3 BUGS, PROBLEMS AND SECURITY ISSUES
3.1 The "case_sensitive_like" Pragma
This extension does not work well with the "case_sensitive_like"
pragma. If this pragma is used before the ICU extension is loaded,
then the pragma has no effect. If the pragma is used after the ICU
extension is loaded, then SQLite ignores the ICU implementation and
always uses the built-in LIKE operator.
The ICU extension LIKE operator is always case insensitive.
3.2 The SQLITE_MAX_LIKE_PATTERN_LENGTH Macro
Passing very long patterns to the built-in SQLite LIKE operator can
cause a stack overflow. To curb this problem, SQLite defines the
SQLITE_MAX_LIKE_PATTERN_LENGTH macro as the maximum length of a
pattern in bytes (irrespective of encoding). The default value is
defined in internal header file "limits.h".
The ICU extension LIKE implementation suffers from the same
problem and uses the same solution. However, since the ICU extension
code does not include the SQLite file "limits.h", modifying
the default value therein does not affect the ICU extension.
The default value of SQLITE_MAX_LIKE_PATTERN_LENGTH used by
the ICU extension LIKE operator is 50000, defined in source
file "icu.c".
3.3 Collation Sequence Security Issue
Internally, SQLite assumes that indices stored in database files
are sorted according to the collation sequence indicated by the
SQL schema. Changing the definition of a collation sequence after
an index has been built is therefore equivalent to database
corruption. The SQLite library is not very well tested under
these conditions, and may contain potential buffer overruns
or other programming errors that could be exploited by a malicious
programmer.
If the ICU extension is used in an environment where potentially
malicious users may execute arbitrary SQL (i.e. gears), they
should be prevented from invoking the icu_load_collation() function,
possibly using the authorisation callback.

View file

@ -0,0 +1,499 @@
/*
** 2007 May 6
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $
**
** This file implements an integration between the ICU library
** ("International Components for Unicode", an open-source library
** for handling unicode data) and SQLite. The integration uses
** ICU to provide the following to SQLite:
**
** * An implementation of the SQL regexp() function (and hence REGEXP
** operator) using the ICU uregex_XX() APIs.
**
** * Implementations of the SQL scalar upper() and lower() functions
** for case mapping.
**
** * Integration of ICU and SQLite collation seqences.
**
** * An implementation of the LIKE operator that uses ICU to
** provide case-independent matching.
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
/* Include ICU headers */
#include <unicode/utypes.h>
#include <unicode/uregex.h>
#include <unicode/ustring.h>
#include <unicode/ucol.h>
#include <assert.h>
#ifndef SQLITE_CORE
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#else
#include "sqlite3.h"
#endif
/*
** Maximum length (in bytes) of the pattern in a LIKE or GLOB
** operator.
*/
#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
#endif
/*
** Version of sqlite3_free() that is always a function, never a macro.
*/
static void xFree(void *p){
sqlite3_free(p);
}
/*
** Compare two UTF-8 strings for equality where the first string is
** a "LIKE" expression. Return true (1) if they are the same and
** false (0) if they are different.
*/
static int icuLikeCompare(
const uint8_t *zPattern, /* LIKE pattern */
const uint8_t *zString, /* The UTF-8 string to compare against */
const UChar32 uEsc /* The escape character */
){
static const int MATCH_ONE = (UChar32)'_';
static const int MATCH_ALL = (UChar32)'%';
int iPattern = 0; /* Current byte index in zPattern */
int iString = 0; /* Current byte index in zString */
int prevEscape = 0; /* True if the previous character was uEsc */
while( zPattern[iPattern]!=0 ){
/* Read (and consume) the next character from the input pattern. */
UChar32 uPattern;
U8_NEXT_UNSAFE(zPattern, iPattern, uPattern);
assert(uPattern!=0);
/* There are now 4 possibilities:
**
** 1. uPattern is an unescaped match-all character "%",
** 2. uPattern is an unescaped match-one character "_",
** 3. uPattern is an unescaped escape character, or
** 4. uPattern is to be handled as an ordinary character
*/
if( !prevEscape && uPattern==MATCH_ALL ){
/* Case 1. */
uint8_t c;
/* Skip any MATCH_ALL or MATCH_ONE characters that follow a
** MATCH_ALL. For each MATCH_ONE, skip one character in the
** test string.
*/
while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){
if( c==MATCH_ONE ){
if( zString[iString]==0 ) return 0;
U8_FWD_1_UNSAFE(zString, iString);
}
iPattern++;
}
if( zPattern[iPattern]==0 ) return 1;
while( zString[iString] ){
if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){
return 1;
}
U8_FWD_1_UNSAFE(zString, iString);
}
return 0;
}else if( !prevEscape && uPattern==MATCH_ONE ){
/* Case 2. */
if( zString[iString]==0 ) return 0;
U8_FWD_1_UNSAFE(zString, iString);
}else if( !prevEscape && uPattern==uEsc){
/* Case 3. */
prevEscape = 1;
}else{
/* Case 4. */
UChar32 uString;
U8_NEXT_UNSAFE(zString, iString, uString);
uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT);
uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT);
if( uString!=uPattern ){
return 0;
}
prevEscape = 0;
}
}
return zString[iString]==0;
}
/*
** Implementation of the like() SQL function. This function implements
** the build-in LIKE operator. The first argument to the function is the
** pattern and the second argument is the string. So, the SQL statements:
**
** A LIKE B
**
** is implemented as like(B, A). If there is an escape character E,
**
** A LIKE B ESCAPE E
**
** is mapped to like(B, A, E).
*/
static void icuLikeFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
const unsigned char *zA = sqlite3_value_text(argv[0]);
const unsigned char *zB = sqlite3_value_text(argv[1]);
UChar32 uEsc = 0;
/* Limit the length of the LIKE or GLOB pattern to avoid problems
** of deep recursion and N*N behavior in patternCompare().
*/
if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){
sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
return;
}
if( argc==3 ){
/* The escape character string must consist of a single UTF-8 character.
** Otherwise, return an error.
*/
int nE= sqlite3_value_bytes(argv[2]);
const unsigned char *zE = sqlite3_value_text(argv[2]);
int i = 0;
if( zE==0 ) return;
U8_NEXT(zE, i, nE, uEsc);
if( i!=nE){
sqlite3_result_error(context,
"ESCAPE expression must be a single character", -1);
return;
}
}
if( zA && zB ){
sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
}
}
/*
** This function is called when an ICU function called from within
** the implementation of an SQL scalar function returns an error.
**
** The scalar function context passed as the first argument is
** loaded with an error message based on the following two args.
*/
static void icuFunctionError(
sqlite3_context *pCtx, /* SQLite scalar function context */
const char *zName, /* Name of ICU function that failed */
UErrorCode e /* Error code returned by ICU function */
){
char zBuf[128];
sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
zBuf[127] = '\0';
sqlite3_result_error(pCtx, zBuf, -1);
}
/*
** Function to delete compiled regexp objects. Registered as
** a destructor function with sqlite3_set_auxdata().
*/
static void icuRegexpDelete(void *p){
URegularExpression *pExpr = (URegularExpression *)p;
uregex_close(pExpr);
}
/*
** Implementation of SQLite REGEXP operator. This scalar function takes
** two arguments. The first is a regular expression pattern to compile
** the second is a string to match against that pattern. If either
** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
** is 1 if the string matches the pattern, or 0 otherwise.
**
** SQLite maps the regexp() function to the regexp() operator such
** that the following two are equivalent:
**
** zString REGEXP zPattern
** regexp(zPattern, zString)
**
** Uses the following ICU regexp APIs:
**
** uregex_open()
** uregex_matches()
** uregex_close()
*/
static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
UErrorCode status = U_ZERO_ERROR;
URegularExpression *pExpr;
UBool res;
const UChar *zString = sqlite3_value_text16(apArg[1]);
/* If the left hand side of the regexp operator is NULL,
** then the result is also NULL.
*/
if( !zString ){
return;
}
pExpr = sqlite3_get_auxdata(p, 0);
if( !pExpr ){
const UChar *zPattern = sqlite3_value_text16(apArg[0]);
if( !zPattern ){
return;
}
pExpr = uregex_open(zPattern, -1, 0, 0, &status);
if( U_SUCCESS(status) ){
sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
}else{
assert(!pExpr);
icuFunctionError(p, "uregex_open", status);
return;
}
}
/* Configure the text that the regular expression operates on. */
uregex_setText(pExpr, zString, -1, &status);
if( !U_SUCCESS(status) ){
icuFunctionError(p, "uregex_setText", status);
return;
}
/* Attempt the match */
res = uregex_matches(pExpr, 0, &status);
if( !U_SUCCESS(status) ){
icuFunctionError(p, "uregex_matches", status);
return;
}
/* Set the text that the regular expression operates on to a NULL
** pointer. This is not really necessary, but it is tidier than
** leaving the regular expression object configured with an invalid
** pointer after this function returns.
*/
uregex_setText(pExpr, 0, 0, &status);
/* Return 1 or 0. */
sqlite3_result_int(p, res ? 1 : 0);
}
/*
** Implementations of scalar functions for case mapping - upper() and
** lower(). Function upper() converts its input to upper-case (ABC).
** Function lower() converts to lower-case (abc).
**
** ICU provides two types of case mapping, "general" case mapping and
** "language specific". Refer to ICU documentation for the differences
** between the two.
**
** To utilise "general" case mapping, the upper() or lower() scalar
** functions are invoked with one argument:
**
** upper('ABC') -> 'abc'
** lower('abc') -> 'ABC'
**
** To access ICU "language specific" case mapping, upper() or lower()
** should be invoked with two arguments. The second argument is the name
** of the locale to use. Passing an empty string ("") or SQL NULL value
** as the second argument is the same as invoking the 1 argument version
** of upper() or lower().
**
** lower('I', 'en_us') -> 'i'
** lower('I', 'tr_tr') -> 'ı' (small dotless i)
**
** http://www.icu-project.org/userguide/posix.html#case_mappings
*/
static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
const UChar *zInput;
UChar *zOutput;
int nInput;
int nOutput;
UErrorCode status = U_ZERO_ERROR;
const char *zLocale = 0;
assert(nArg==1 || nArg==2);
if( nArg==2 ){
zLocale = (const char *)sqlite3_value_text(apArg[1]);
}
zInput = sqlite3_value_text16(apArg[0]);
if( !zInput ){
return;
}
nInput = sqlite3_value_bytes16(apArg[0]);
nOutput = nInput * 2 + 2;
zOutput = sqlite3_malloc(nOutput);
if( !zOutput ){
return;
}
if( sqlite3_user_data(p) ){
u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
}else{
u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
}
if( !U_SUCCESS(status) ){
icuFunctionError(p, "u_strToLower()/u_strToUpper", status);
return;
}
sqlite3_result_text16(p, zOutput, -1, xFree);
}
/*
** Collation sequence destructor function. The pCtx argument points to
** a UCollator structure previously allocated using ucol_open().
*/
static void icuCollationDel(void *pCtx){
UCollator *p = (UCollator *)pCtx;
ucol_close(p);
}
/*
** Collation sequence comparison function. The pCtx argument points to
** a UCollator structure previously allocated using ucol_open().
*/
static int icuCollationColl(
void *pCtx,
int nLeft,
const void *zLeft,
int nRight,
const void *zRight
){
UCollationResult res;
UCollator *p = (UCollator *)pCtx;
res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
switch( res ){
case UCOL_LESS: return -1;
case UCOL_GREATER: return +1;
case UCOL_EQUAL: return 0;
}
assert(!"Unexpected return value from ucol_strcoll()");
return 0;
}
/*
** Implementation of the scalar function icu_load_collation().
**
** This scalar function is used to add ICU collation based collation
** types to an SQLite database connection. It is intended to be called
** as follows:
**
** SELECT icu_load_collation(<locale>, <collation-name>);
**
** Where <locale> is a string containing an ICU locale identifier (i.e.
** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
** collation sequence to create.
*/
static void icuLoadCollation(
sqlite3_context *p,
int nArg,
sqlite3_value **apArg
){
sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
UErrorCode status = U_ZERO_ERROR;
const char *zLocale; /* Locale identifier - (eg. "jp_JP") */
const char *zName; /* SQL Collation sequence name (eg. "japanese") */
UCollator *pUCollator; /* ICU library collation object */
int rc; /* Return code from sqlite3_create_collation_x() */
assert(nArg==2);
zLocale = (const char *)sqlite3_value_text(apArg[0]);
zName = (const char *)sqlite3_value_text(apArg[1]);
if( !zLocale || !zName ){
return;
}
pUCollator = ucol_open(zLocale, &status);
if( !U_SUCCESS(status) ){
icuFunctionError(p, "ucol_open", status);
return;
}
assert(p);
rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator,
icuCollationColl, icuCollationDel
);
if( rc!=SQLITE_OK ){
ucol_close(pUCollator);
sqlite3_result_error(p, "Error registering collation function", -1);
}
}
/*
** Register the ICU extension functions with database db.
*/
int sqlite3IcuInit(sqlite3 *db){
struct IcuScalar {
const char *zName; /* Function name */
int nArg; /* Number of arguments */
int enc; /* Optimal text encoding */
void *pContext; /* sqlite3_user_data() context */
void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
} scalars[] = {
{"regexp",-1, SQLITE_ANY, 0, icuRegexpFunc},
{"lower", 1, SQLITE_UTF16, 0, icuCaseFunc16},
{"lower", 2, SQLITE_UTF16, 0, icuCaseFunc16},
{"upper", 1, SQLITE_UTF16, (void*)1, icuCaseFunc16},
{"upper", 2, SQLITE_UTF16, (void*)1, icuCaseFunc16},
{"lower", 1, SQLITE_UTF8, 0, icuCaseFunc16},
{"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16},
{"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16},
{"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16},
{"like", 2, SQLITE_UTF8, 0, icuLikeFunc},
{"like", 3, SQLITE_UTF8, 0, icuLikeFunc},
{"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation},
};
int rc = SQLITE_OK;
int i;
for(i=0; rc==SQLITE_OK && i<(sizeof(scalars)/sizeof(struct IcuScalar)); i++){
struct IcuScalar *p = &scalars[i];
rc = sqlite3_create_function(
db, p->zName, p->nArg, p->enc, p->pContext, p->xFunc, 0, 0
);
}
return rc;
}
#if !SQLITE_CORE
int sqlite3_extension_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
SQLITE_EXTENSION_INIT2(pApi)
return sqlite3IcuInit(db);
}
#endif
#endif

View file

@ -0,0 +1,251 @@
#!/bin/sh
#
# install - install a program, script, or datafile
# This comes from X11R5 (mit/util/scripts/install.sh).
#
# Copyright 1991 by the Massachusetts Institute of Technology
#
# Permission to use, copy, modify, distribute, and sell this software and its
# documentation for any purpose is hereby granted without fee, provided that
# the above copyright notice appear in all copies and that both that
# copyright notice and this permission notice appear in supporting
# documentation, and that the name of M.I.T. not be used in advertising or
# publicity pertaining to distribution of the software without specific,
# written prior permission. M.I.T. makes no representations about the
# suitability of this software for any purpose. It is provided "as is"
# without express or implied warranty.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# `make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch. It can only install one file at a time, a restriction
# shared with many OS's install programs.
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit="${DOITPROG-}"
# put in absolute paths if you don't have them in your path; or use env. vars.
mvprog="${MVPROG-mv}"
cpprog="${CPPROG-cp}"
chmodprog="${CHMODPROG-chmod}"
chownprog="${CHOWNPROG-chown}"
chgrpprog="${CHGRPPROG-chgrp}"
stripprog="${STRIPPROG-strip}"
rmprog="${RMPROG-rm}"
mkdirprog="${MKDIRPROG-mkdir}"
transformbasename=""
transform_arg=""
instcmd="$mvprog"
chmodcmd="$chmodprog 0755"
chowncmd=""
chgrpcmd=""
stripcmd=""
rmcmd="$rmprog -f"
mvcmd="$mvprog"
src=""
dst=""
dir_arg=""
while [ x"$1" != x ]; do
case $1 in
-c) instcmd="$cpprog"
shift
continue;;
-d) dir_arg=true
shift
continue;;
-m) chmodcmd="$chmodprog $2"
shift
shift
continue;;
-o) chowncmd="$chownprog $2"
shift
shift
continue;;
-g) chgrpcmd="$chgrpprog $2"
shift
shift
continue;;
-s) stripcmd="$stripprog"
shift
continue;;
-t=*) transformarg=`echo $1 | sed 's/-t=//'`
shift
continue;;
-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
shift
continue;;
*) if [ x"$src" = x ]
then
src=$1
else
# this colon is to work around a 386BSD /bin/sh bug
:
dst=$1
fi
shift
continue;;
esac
done
if [ x"$src" = x ]
then
echo "install: no input file specified"
exit 1
else
true
fi
if [ x"$dir_arg" != x ]; then
dst=$src
src=""
if [ -d $dst ]; then
instcmd=:
chmodcmd=""
else
instcmd=mkdir
fi
else
# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if [ -f $src -o -d $src ]
then
true
else
echo "install: $src does not exist"
exit 1
fi
if [ x"$dst" = x ]
then
echo "install: no destination specified"
exit 1
else
true
fi
# If destination is a directory, append the input filename; if your system
# does not like double slashes in filenames, you may need to add some logic
if [ -d $dst ]
then
dst="$dst"/`basename $src`
else
true
fi
fi
## this sed command emulates the dirname command
dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
# Make sure that the destination directory exists.
# this part is taken from Noah Friedman's mkinstalldirs script
# Skip lots of stat calls in the usual case.
if [ ! -d "$dstdir" ]; then
defaultIFS='
'
IFS="${IFS-${defaultIFS}}"
oIFS="${IFS}"
# Some sh's can't handle IFS=/ for some reason.
IFS='%'
set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
IFS="${oIFS}"
pathcomp=''
while [ $# -ne 0 ] ; do
pathcomp="${pathcomp}${1}"
shift
if [ ! -d "${pathcomp}" ] ;
then
$mkdirprog "${pathcomp}"
else
true
fi
pathcomp="${pathcomp}/"
done
fi
if [ x"$dir_arg" != x ]
then
$doit $instcmd $dst &&
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
else
# If we're going to rename the final executable, determine the name now.
if [ x"$transformarg" = x ]
then
dstfile=`basename $dst`
else
dstfile=`basename $dst $transformbasename |
sed $transformarg`$transformbasename
fi
# don't allow the sed command to completely eliminate the filename
if [ x"$dstfile" = x ]
then
dstfile=`basename $dst`
else
true
fi
# Make a temp file name in the proper directory.
dsttmp=$dstdir/#inst.$$#
# Move or copy the file name to the temp name
$doit $instcmd $src $dsttmp &&
trap "rm -f ${dsttmp}" 0 &&
# and set any options; do chmod last to preserve setuid bits
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $instcmd $src $dsttmp" command.
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
# Now rename the file to the real destination.
$doit $rmcmd -f $dstdir/$dstfile &&
$doit $mvcmd $dsttmp $dstdir/$dstfile
fi &&
exit 0

7625
client/src/thirdparty/sqlite-3.4.2/libtool vendored Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,486 @@
###############################################################################
# The following macros should be defined before this script is
# invoked:
#
# TOP The toplevel directory of the source tree. This is the
# directory that contains this "Makefile.in" and the
# "configure.in" script.
#
# BCC C Compiler and options for use in building executables that
# will run on the platform that is doing the build.
#
# THREADLIB Specify any extra linker options needed to make the library
# thread safe
#
# OPTS Extra compiler command-line options.
#
# EXE The suffix to add to executable files. ".exe" for windows
# and "" for Unix.
#
# TCC C Compiler and options for use in building executables that
# will run on the target platform. This is usually the same
# as BCC, unless you are cross-compiling.
#
# AR Tools used to build a static library.
# RANLIB
#
# TCL_FLAGS Extra compiler options needed for programs that use the
# TCL library.
#
# LIBTCL Linker options needed to link against the TCL library.
#
# READLINE_FLAGS Compiler options needed for programs that use the
# readline() library.
#
# LIBREADLINE Linker options needed by programs using readline() must
# link against.
#
# NAWK Nawk compatible awk program. Older (obsolete?) solaris
# systems need this to avoid using the original AT&T AWK.
#
# Once the macros above are defined, the rest of this make script will
# build the SQLite library and testing tools.
################################################################################
# This is how we compile
#
TCCX = $(TCC) $(OPTS) -I. -I$(TOP)/src
# Object files for the SQLite library.
#
LIBOBJ+= alter.o analyze.o attach.o auth.o btmutex.o btree.o build.o \
callback.o complete.o date.o delete.o \
expr.o fault.o func.o hash.o insert.o journal.o loadext.o \
main.o malloc.o mem1.o mem2.o mem3.o mem4.o mutex.o mutex_os2.o \
mutex_unix.o mutex_w32.o \
opcodes.o os.o os_os2.o os_unix.o os_win.o \
pager.o parse.o pragma.o prepare.o printf.o random.o \
select.o table.o $(TCLOBJ) tokenize.o trigger.o \
update.o util.o vacuum.o \
vdbe.o vdbeapi.o vdbeaux.o vdbeblob.o vdbefifo.o vdbemem.o \
where.o utf.o legacy.o vtab.o
EXTOBJ = icu.o
EXTOBJ += fts1.o \
fts1_hash.o \
fts1_tokenizer1.o \
fts1_porter.o
EXTOBJ += fts2.o \
fts2_hash.o \
fts2_icu.o \
fts2_porter.o \
fts2_tokenizer.o \
fts2_tokenizer1.o
EXTOBJ += fts3.o \
fts3_hash.o \
fts3_icu.o \
fts3_porter.o \
fts3_tokenizer.o \
fts3_tokenizer1.o
# All of the source code files.
#
SRC = \
$(TOP)/src/alter.c \
$(TOP)/src/analyze.c \
$(TOP)/src/attach.c \
$(TOP)/src/auth.c \
$(TOP)/src/btmutex.c \
$(TOP)/src/btree.c \
$(TOP)/src/btree.h \
$(TOP)/src/btreeInt.h \
$(TOP)/src/build.c \
$(TOP)/src/callback.c \
$(TOP)/src/complete.c \
$(TOP)/src/date.c \
$(TOP)/src/delete.c \
$(TOP)/src/expr.c \
$(TOP)/src/fault.c \
$(TOP)/src/func.c \
$(TOP)/src/hash.c \
$(TOP)/src/hash.h \
$(TOP)/src/insert.c \
$(TOP)/src/journal.c \
$(TOP)/src/legacy.c \
$(TOP)/src/loadext.c \
$(TOP)/src/main.c \
$(TOP)/src/malloc.c \
$(TOP)/src/mem1.c \
$(TOP)/src/mem2.c \
$(TOP)/src/mem3.c \
$(TOP)/src/mem4.c \
$(TOP)/src/mutex.c \
$(TOP)/src/mutex.h \
$(TOP)/src/mutex_os2.c \
$(TOP)/src/mutex_unix.c \
$(TOP)/src/mutex_w32.c \
$(TOP)/src/os.c \
$(TOP)/src/os.h \
$(TOP)/src/os_common.h \
$(TOP)/src/os_os2.c \
$(TOP)/src/os_unix.c \
$(TOP)/src/os_win.c \
$(TOP)/src/pager.c \
$(TOP)/src/pager.h \
$(TOP)/src/parse.y \
$(TOP)/src/pragma.c \
$(TOP)/src/prepare.c \
$(TOP)/src/printf.c \
$(TOP)/src/random.c \
$(TOP)/src/select.c \
$(TOP)/src/shell.c \
$(TOP)/src/sqlite.h.in \
$(TOP)/src/sqlite3ext.h \
$(TOP)/src/sqliteInt.h \
$(TOP)/src/sqliteLimit.h \
$(TOP)/src/table.c \
$(TOP)/src/tclsqlite.c \
$(TOP)/src/tokenize.c \
$(TOP)/src/trigger.c \
$(TOP)/src/utf.c \
$(TOP)/src/update.c \
$(TOP)/src/util.c \
$(TOP)/src/vacuum.c \
$(TOP)/src/vdbe.c \
$(TOP)/src/vdbe.h \
$(TOP)/src/vdbeapi.c \
$(TOP)/src/vdbeaux.c \
$(TOP)/src/vdbeblob.c \
$(TOP)/src/vdbefifo.c \
$(TOP)/src/vdbemem.c \
$(TOP)/src/vdbeInt.h \
$(TOP)/src/vtab.c \
$(TOP)/src/where.c
# Source code for extensions
#
SRC += \
$(TOP)/ext/fts1/fts1.c \
$(TOP)/ext/fts1/fts1.h \
$(TOP)/ext/fts1/fts1_hash.c \
$(TOP)/ext/fts1/fts1_hash.h \
$(TOP)/ext/fts1/fts1_porter.c \
$(TOP)/ext/fts1/fts1_tokenizer.h \
$(TOP)/ext/fts1/fts1_tokenizer1.c
SRC += \
$(TOP)/ext/fts2/fts2.c \
$(TOP)/ext/fts2/fts2.h \
$(TOP)/ext/fts2/fts2_hash.c \
$(TOP)/ext/fts2/fts2_hash.h \
$(TOP)/ext/fts2/fts2_icu.c \
$(TOP)/ext/fts2/fts2_porter.c \
$(TOP)/ext/fts2/fts2_tokenizer.h \
$(TOP)/ext/fts2/fts2_tokenizer.c \
$(TOP)/ext/fts2/fts2_tokenizer1.c
SRC += \
$(TOP)/ext/fts3/fts3.c \
$(TOP)/ext/fts3/fts3.h \
$(TOP)/ext/fts3/fts3_hash.c \
$(TOP)/ext/fts3/fts3_hash.h \
$(TOP)/ext/fts3/fts3_icu.c \
$(TOP)/ext/fts3/fts3_porter.c \
$(TOP)/ext/fts3/fts3_tokenizer.h \
$(TOP)/ext/fts3/fts3_tokenizer.c \
$(TOP)/ext/fts3/fts3_tokenizer1.c
SRC += \
$(TOP)/ext/icu/icu.c
# Generated source code files
#
SRC += \
keywordhash.h \
opcodes.c \
opcodes.h \
parse.c \
parse.h \
sqlite3.h
# Source code to the test files.
#
TESTSRC = \
$(TOP)/src/test1.c \
$(TOP)/src/test2.c \
$(TOP)/src/test3.c \
$(TOP)/src/test4.c \
$(TOP)/src/test5.c \
$(TOP)/src/test6.c \
$(TOP)/src/test7.c \
$(TOP)/src/test8.c \
$(TOP)/src/test9.c \
$(TOP)/src/test_autoext.c \
$(TOP)/src/test_async.c \
$(TOP)/src/test_btree.c \
$(TOP)/src/test_config.c \
$(TOP)/src/test_devsym.c \
$(TOP)/src/test_hexio.c \
$(TOP)/src/test_malloc.c \
$(TOP)/src/test_md5.c \
$(TOP)/src/test_onefile.c \
$(TOP)/src/test_schema.c \
$(TOP)/src/test_server.c \
$(TOP)/src/test_tclvar.c \
$(TOP)/src/test_thread.c \
#TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c
#TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c
TESTSRC2 = \
$(TOP)/src/attach.c $(TOP)/src/btree.c $(TOP)/src/build.c $(TOP)/src/date.c \
$(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/os.c \
$(TOP)/src/os_os2.c $(TOP)/src/os_unix.c $(TOP)/src/os_win.c \
$(TOP)/src/pager.c $(TOP)/src/pragma.c $(TOP)/src/prepare.c \
$(TOP)/src/printf.c $(TOP)/src/random.c \
$(TOP)/src/select.c $(TOP)/src/tokenize.c \
$(TOP)/src/utf.c $(TOP)/src/util.c $(TOP)/src/vdbeapi.c $(TOP)/src/vdbeaux.c \
$(TOP)/src/vdbe.c $(TOP)/src/vdbemem.c $(TOP)/src/where.c parse.c
# Header files used by all library source files.
#
HDR = \
$(TOP)/src/btree.h \
$(TOP)/src/btreeInt.h \
$(TOP)/src/hash.h \
keywordhash.h \
$(TOP)/src/mutex.h \
opcodes.h \
$(TOP)/src/os.h \
$(TOP)/src/os_common.h \
$(TOP)/src/pager.h \
parse.h \
sqlite3.h \
$(TOP)/src/sqlite3ext.h \
$(TOP)/src/sqliteInt.h \
$(TOP)/src/sqliteLimit.h \
$(TOP)/src/vdbe.h \
$(TOP)/src/vdbeInt.h
# Header files used by extensions
#
EXTHDR += \
$(TOP)/ext/fts1/fts1.h \
$(TOP)/ext/fts1/fts1_hash.h \
$(TOP)/ext/fts1/fts1_tokenizer.h
EXTHDR += \
$(TOP)/ext/fts2/fts2.h \
$(TOP)/ext/fts2/fts2_hash.h \
$(TOP)/ext/fts2/fts2_tokenizer.h
EXTHDR += \
$(TOP)/ext/fts3/fts3.h \
$(TOP)/ext/fts3/fts3_hash.h \
$(TOP)/ext/fts3/fts3_tokenizer.h
# This is the default Makefile target. The objects listed here
# are what get build when you type just "make" with no arguments.
#
all: sqlite3.h libsqlite3.a sqlite3$(EXE)
libsqlite3.a: $(LIBOBJ)
$(AR) libsqlite3.a $(LIBOBJ)
$(RANLIB) libsqlite3.a
sqlite3$(EXE): $(TOP)/src/shell.c libsqlite3.a sqlite3.h
$(TCCX) $(READLINE_FLAGS) -o sqlite3$(EXE) \
$(TOP)/src/shell.c \
libsqlite3.a $(LIBREADLINE) $(TLIBS) $(THREADLIB)
objects: $(LIBOBJ_ORIG)
# This target creates a directory named "tsrc" and fills it with
# copies of all of the C source code and header files needed to
# build on the target system. Some of the C source code and header
# files are automatically generated. This target takes care of
# all that automatic generation.
#
target_source: $(SRC)
rm -rf tsrc
mkdir tsrc
cp -f $(SRC) tsrc
rm tsrc/sqlite.h.in tsrc/parse.y
touch target_source
sqlite3.c: target_source $(TOP)/tool/mksqlite3c.tcl
tclsh $(TOP)/tool/mksqlite3c.tcl
cp sqlite3.c tclsqlite3.c
cat $(TOP)/src/tclsqlite.c >>tclsqlite3.c
fts2amal.c: target_source $(TOP)/ext/fts2/mkfts2amal.tcl
tclsh $(TOP)/ext/fts2/mkfts2amal.tcl
fts3amal.c: target_source $(TOP)/ext/fts3/mkfts3amal.tcl
tclsh $(TOP)/ext/fts3/mkfts3amal.tcl
# Rules to build the LEMON compiler generator
#
lemon: $(TOP)/tool/lemon.c $(TOP)/tool/lempar.c
$(BCC) -o lemon $(TOP)/tool/lemon.c
cp $(TOP)/tool/lempar.c .
# Rules to build individual *.o files from files in the src directory.
#
%.o: %.c $(HDR)
$(TCCX) -c $<
# Rules to build individual *.o files from generated *.c files. This
# applies to:
#
# parse.o
# opcodes.o
#
%.o: $(TOP)/src/%.c $(HDR)
$(TCCX) -c $<
tclsqlite.o: $(TOP)/src/tclsqlite.c $(HDR)
$(TCCX) $(TCL_FLAGS) -c $(TOP)/src/tclsqlite.c
# Rules to build opcodes.c and opcodes.h
#
opcodes.c: opcodes.h $(TOP)/mkopcodec.awk
sort -n -b -k 3 opcodes.h | $(NAWK) -f $(TOP)/mkopcodec.awk >opcodes.c
opcodes.h: parse.h $(TOP)/src/vdbe.c $(TOP)/mkopcodeh.awk
cat parse.h $(TOP)/src/vdbe.c |$(NAWK) -f $(TOP)/mkopcodeh.awk >opcodes.h
# Rules to build parse.c and parse.h - the outputs of lemon.
#
parse.h: parse.c
parse.c: $(TOP)/src/parse.y lemon $(TOP)/addopcodes.awk
cp $(TOP)/src/parse.y .
rm -f parse.h
./lemon $(OPTS) parse.y
mv parse.h parse.h.temp
awk -f $(TOP)/addopcodes.awk parse.h.temp >parse.h
sqlite3.h: $(TOP)/src/sqlite.h.in
sed -e s/--VERS--/`cat ${TOP}/VERSION`/ \
-e s/--VERSION-NUMBER--/`cat ${TOP}/VERSION | sed 's/[^0-9]/ /g' | $(NAWK) '{printf "%d%03d%03d",$$1,$$2,$$3}'`/ \
$(TOP)/src/sqlite.h.in >sqlite3.h
keywordhash.h: $(TOP)/tool/mkkeywordhash.c
$(BCC) -o mkkeywordhash $(OPTS) $(TOP)/tool/mkkeywordhash.c
./mkkeywordhash >keywordhash.h
# Rules to build the extension objects.
#
icu.o: $(TOP)/ext/icu/icu.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/icu/icu.c
fts2.o: $(TOP)/ext/fts2/fts2.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2.c
fts2_hash.o: $(TOP)/ext/fts2/fts2_hash.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_hash.c
fts2_icu.o: $(TOP)/ext/fts2/fts2_icu.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_icu.c
fts2_porter.o: $(TOP)/ext/fts2/fts2_porter.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_porter.c
fts2_tokenizer.o: $(TOP)/ext/fts2/fts2_tokenizer.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer.c
fts2_tokenizer1.o: $(TOP)/ext/fts2/fts2_tokenizer1.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer1.c
fts3.o: $(TOP)/ext/fts3/fts3.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3.c
fts3_hash.o: $(TOP)/ext/fts3/fts3_hash.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_hash.c
fts3_icu.o: $(TOP)/ext/fts3/fts3_icu.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_icu.c
fts3_porter.o: $(TOP)/ext/fts3/fts3_porter.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_porter.c
fts3_tokenizer.o: $(TOP)/ext/fts3/fts3_tokenizer.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_tokenizer.c
fts3_tokenizer1.o: $(TOP)/ext/fts3/fts3_tokenizer1.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_tokenizer1.c
# Rules for building test programs and for running tests
#
tclsqlite3: $(TOP)/src/tclsqlite.c libsqlite3.a
$(TCCX) $(TCL_FLAGS) -DTCLSH=1 -o tclsqlite3 \
$(TOP)/src/tclsqlite.c libsqlite3.a $(LIBTCL) $(THREADLIB)
# Rules to build the 'testfixture' application.
#
TESTFIXTURE_FLAGS = -DTCLSH=1 -DSQLITE_TEST=1 -DSQLITE_CRASH_TEST=1
TESTFIXTURE_FLAGS += -DSQLITE_SERVER=1 -DSQLITE_PRIVATE="" -DSQLITE_CORE
testfixture$(EXE): $(TESTSRC2) libsqlite3.a $(TESTSRC) $(TOP)/src/tclsqlite.c
$(TCCX) $(TCL_FLAGS) $(TESTFIXTURE_FLAGS) \
$(TESTSRC) $(TESTSRC2) $(TOP)/src/tclsqlite.c \
-o testfixture$(EXE) $(LIBTCL) $(THREADLIB) libsqlite3.a
amalgamation-testfixture$(EXE): sqlite3.c $(TESTSRC) $(TOP)/src/tclsqlite.c
$(TCCX) $(TCL_FLAGS) $(TESTFIXTURE_FLAGS) \
$(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c \
-o testfixture$(EXE) $(LIBTCL) $(THREADLIB)
fts3-testfixture$(EXE): sqlite3.c fts3amal.c $(TESTSRC) $(TOP)/src/tclsqlite.c
$(TCCX) $(TCL_FLAGS) $(TESTFIXTURE_FLAGS) \
-DSQLITE_ENABLE_FTS3=1 \
$(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c fts3amal.c \
-o testfixture$(EXE) $(LIBTCL) $(THREADLIB)
fulltest: testfixture$(EXE) sqlite3$(EXE)
./testfixture$(EXE) $(TOP)/test/all.test
soaktest: testfixture$(EXE) sqlite3$(EXE)
./testfixture$(EXE) $(TOP)/test/all.test -soak 1
test: testfixture$(EXE) sqlite3$(EXE)
./testfixture$(EXE) $(TOP)/test/quick.test
sqlite3_analyzer$(EXE): $(TOP)/src/tclsqlite.c sqlite3.c $(TESTSRC) \
$(TOP)/tool/spaceanal.tcl
sed \
-e '/^#/d' \
-e 's,\\,\\\\,g' \
-e 's,",\\",g' \
-e 's,^,",' \
-e 's,$$,\\n",' \
$(TOP)/tool/spaceanal.tcl >spaceanal_tcl.h
$(TCCX) $(TCL_FLAGS) $(TESTFIXTURE_FLAGS) \
-DTCLSH=2 -DSQLITE_TEST=1 -DSQLITE_DEBUG=1 -DSQLITE_PRIVATE="" \
$(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c \
-o sqlite3_analyzer$(EXE) \
$(LIBTCL) $(THREADLIB)
TEST_EXTENSION = $(SHPREFIX)testloadext.$(SO)
$(TEST_EXTENSION): $(TOP)/src/test_loadext.c
$(MKSHLIB) $(TOP)/src/test_loadext.c -o $(TEST_EXTENSION)
extensiontest: testfixture$(EXE) $(TEST_EXTENSION)
./testfixture$(EXE) $(TOP)/test/loadext.test
# Standard install and cleanup targets
#
install: sqlite3 libsqlite3.a sqlite3.h
mv sqlite3 /usr/bin
mv libsqlite3.a /usr/lib
mv sqlite3.h /usr/include
clean:
rm -f *.o sqlite3 libsqlite3.a sqlite3.h opcodes.*
rm -f lemon lempar.c parse.* sqlite*.tar.gz mkkeywordhash keywordhash.h
rm -f $(PUBLISH)
rm -f *.da *.bb *.bbg gmon.out
rm -rf tsrc target_source
rm -f testloadext.dll libtestloadext.so
rm -f sqlite3.c fts?amal.c

View file

@ -0,0 +1,46 @@
#!/bin/sh
#
# This script is used to compile SQLite into a DLL.
#
# Two separate DLLs are generated. "sqlite3.dll" is the core
# library. "tclsqlite3.dll" contains the TCL bindings and is the
# library that is loaded into TCL in order to run SQLite.
#
make sqlite3.c
PATH=$PATH:/opt/mingw/bin
TCLDIR=/home/drh/tcltk/846/win/846win
TCLSTUBLIB=$TCLDIR/libtcl84stub.a
OPTS='-DUSE_TCL_STUBS=1 -DSQLITE_THREADSAFE=1 -DBUILD_sqlite=1 -DOS_WIN=1'
OPTS="$OPTS -DSQLITE_ENABLE_FTS3=1"
CC="i386-mingw32msvc-gcc -Os $OPTS -Itsrc -I$TCLDIR"
NM="i386-mingw32msvc-nm"
CMD="$CC -c sqlite3.c"
echo $CMD
$CMD
CMD="$CC -c tclsqlite3.c"
echo $CMD
$CMD
echo 'EXPORTS' >tclsqlite3.def
$NM tclsqlite3.o | grep ' T ' >temp1
grep '_Init$' temp1 >temp2
grep '_SafeInit$' temp1 >>temp2
grep ' T _sqlite3_' temp1 >>temp2
echo 'EXPORTS' >tclsqlite3.def
sed 's/^.* T _//' temp2 | sort | uniq >>tclsqlite3.def
i386-mingw32msvc-dllwrap \
--def tclsqlite3.def -v --export-all \
--driver-name i386-mingw32msvc-gcc \
--dlltool-name i386-mingw32msvc-dlltool \
--as i386-mingw32msvc-as \
--target i386-mingw32 \
-dllname tclsqlite3.dll -lmsvcrt tclsqlite3.o $TCLSTUBLIB
$NM sqlite3.o | grep ' T ' >temp1
echo 'EXPORTS' >sqlite3.def
grep ' _sqlite3_' temp1 | sed 's/^.* _//' >>sqlite3.def
i386-mingw32msvc-dllwrap \
--def sqlite3.def -v --export-all \
--driver-name i386-mingw32msvc-gcc \
--dlltool-name i386-mingw32msvc-dlltool \
--as i386-mingw32msvc-as \
--target i386-mingw32 \
-dllname sqlite3.dll -lmsvcrt sqlite3.o

View file

@ -0,0 +1,13 @@
#!/bin/sh
#
# This script is used to compile SQLite into a shared library on Linux.
#
# Two separate shared libraries are generated. "sqlite3.so" is the core
# library. "tclsqlite3.so" contains the TCL bindings and is the
# library that is loaded into TCL in order to run SQLite.
#
CFLAGS=-O2 -Wall
make fts2amal.c
echo gcc $CFLAGS -shared fts2amal.c -o fts2.so
gcc $CFLAGS -shared fts2amal.c -o fts2.so
strip fts2.so

View file

@ -0,0 +1,22 @@
#!/bin/sh
#
# This script is used to compile SQLite extensions into DLLs.
#
make fts2amal.c
PATH=$PATH:/opt/mingw/bin
OPTS='-DTHREADSAFE=1 -DBUILD_sqlite=1 -DOS_WIN=1'
CC="i386-mingw32msvc-gcc -O2 $OPTS -Itsrc"
NM="i386-mingw32msvc-nm"
CMD="$CC -c fts2amal.c"
echo $CMD
$CMD
echo 'EXPORTS' >fts2.def
echo 'sqlite3_extension_init' >>fts2.def
i386-mingw32msvc-dllwrap \
--def fts2.def -v --export-all \
--driver-name i386-mingw32msvc-gcc \
--dlltool-name i386-mingw32msvc-dlltool \
--as i386-mingw32msvc-as \
--target i386-mingw32 \
-dllname fts2.dll -lmsvcrt fts2amal.o
zip fts2dll.zip fts2.dll fts2.def

Some files were not shown because too many files have changed in this diff Show more