diff options
Diffstat (limited to 'src/runtime/c/sg/sqlite3Btree.c')
| -rw-r--r-- | src/runtime/c/sg/sqlite3Btree.c | 48708 |
1 files changed, 48708 insertions, 0 deletions
diff --git a/src/runtime/c/sg/sqlite3Btree.c b/src/runtime/c/sg/sqlite3Btree.c new file mode 100644 index 000000000..20d870918 --- /dev/null +++ b/src/runtime/c/sg/sqlite3Btree.c @@ -0,0 +1,48708 @@ +/****************************************************************************** +** This file is an amalgamation of many separate C source files from SQLite +** version 3.8.11.1. By combining all the individual C code files into this +** single large file, the entire code can be compiled as a single translation +** unit. This allows many compilers to do optimizations that would not be +** possible if the files were compiled separately. Performance improvements +** of 5% or more are commonly seen when SQLite is compiled as a single +** translation unit. +** +** This file is all you need to compile SQLite. To use SQLite in other +** programs, you need this file and the "sqliteBtree.h" header file that defines +** the programming interface to the SQLite library. (If you do not have +** the "sqliteBtree.h" header file at hand, you will find a copy embedded within +** the text of this file. Search for "Begin file sqliteBtree.h" to find the start +** of the embedded sqliteBtree.h header file.) +*/ +#define SQLITE_CORE 1 +#define SQLITE_AMALGAMATION 1 +#ifndef SQLITE_PRIVATE +# define SQLITE_PRIVATE static +#endif +/************** Begin file sqliteInt.h ***************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Internal interface definitions for SQLite. +** +*/ +#ifndef _SQLITEINT_H_ +#define _SQLITEINT_H_ + +/* +** Include the header file used to customize the compiler options for MSVC. +** This should be done first so that it can successfully prevent spurious +** compiler warnings due to subsequent content in this file and other files +** that are included by this file. +*/ +/************** Include msvc.h in the middle of sqliteInt.h ******************/ +/************** Begin file msvc.h ********************************************/ +/* +** 2015 January 12 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code that is specific to MSVC. +*/ +#ifndef _MSVC_H_ +#define _MSVC_H_ + +#if defined(_MSC_VER) +#pragma warning(disable : 4054) +#pragma warning(disable : 4055) +#pragma warning(disable : 4100) +#pragma warning(disable : 4127) +#pragma warning(disable : 4130) +#pragma warning(disable : 4152) +#pragma warning(disable : 4189) +#pragma warning(disable : 4206) +#pragma warning(disable : 4210) +#pragma warning(disable : 4232) +#pragma warning(disable : 4244) +#pragma warning(disable : 4305) +#pragma warning(disable : 4306) +#pragma warning(disable : 4702) +#pragma warning(disable : 4706) +#endif /* defined(_MSC_VER) */ + +#endif /* _MSVC_H_ */ + +/************** End of msvc.h ************************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/* +** Special setup for VxWorks +*/ +/************** Include vxworks.h in the middle of sqliteInt.h ***************/ +/************** Begin file vxworks.h *****************************************/ +/* +** 2015-03-02 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code that is specific to Wind River's VxWorks +*/ +#if defined(__RTP__) || defined(_WRS_KERNEL) +/* This is VxWorks. Set up things specially for that OS +*/ +#include <vxWorks.h> +#include <pthread.h> /* amalgamator: dontcache */ +#define OS_VXWORKS 1 +#define SQLITE_OS_OTHER 0 +#define SQLITE_HOMEGROWN_RECURSIVE_MUTEX 1 +#define SQLITE_ENABLE_LOCKING_STYLE 0 +#define HAVE_UTIME 1 +#else +/* This is not VxWorks. */ +#define OS_VXWORKS 0 +#endif /* defined(_WRS_KERNEL) */ + +/************** End of vxworks.h *********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/* +** These #defines should enable >2GB file support on POSIX if the +** underlying operating system supports it. If the OS lacks +** large file support, or if the OS is windows, these should be no-ops. +** +** Ticket #2739: The _LARGEFILE_SOURCE macro must appear before any +** system #includes. Hence, this block of code must be the very first +** code in all source files. +** +** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch +** on the compiler command line. This is necessary if you are compiling +** on a recent machine (ex: Red Hat 7.2) but you want your code to work +** on an older machine (ex: Red Hat 6.0). If you compile on Red Hat 7.2 +** without this option, LFS is enable. But LFS does not exist in the kernel +** in Red Hat 6.0, so the code won't work. Hence, for maximum binary +** portability you should omit LFS. +** +** The previous paragraph was written in 2005. (This paragraph is written +** on 2008-11-28.) These days, all Linux kernels support large files, so +** you should probably leave LFS enabled. But some embedded platforms might +** lack LFS in which case the SQLITE_DISABLE_LFS macro might still be useful. +** +** Similar is true for Mac OS X. LFS is only supported on Mac OS X 9 and later. +*/ +#ifndef SQLITE_DISABLE_LFS +# define _LARGE_FILE 1 +# ifndef _FILE_OFFSET_BITS +# define _FILE_OFFSET_BITS 64 +# endif +# define _LARGEFILE_SOURCE 1 +#endif + +/* What version of GCC is being used. 0 means GCC is not being used */ +#ifdef __GNUC__ +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) +#else +# define GCC_VERSION 0 +#endif + +/* Needed for various definitions... */ +#if defined(__GNUC__) && !defined(_GNU_SOURCE) +# define _GNU_SOURCE +#endif + +#if defined(__OpenBSD__) && !defined(_BSD_SOURCE) +# define _BSD_SOURCE +#endif + +/* +** For MinGW, check to see if we can include the header file containing its +** version information, among other things. Normally, this internal MinGW +** header file would [only] be included automatically by other MinGW header +** files; however, the contained version information is now required by this +** header file to work around binary compatibility issues (see below) and +** this is the only known way to reliably obtain it. This entire #if block +** would be completely unnecessary if there was any other way of detecting +** MinGW via their preprocessor (e.g. if they customized their GCC to define +** some MinGW-specific macros). When compiling for MinGW, either the +** _HAVE_MINGW_H or _HAVE__MINGW_H (note the extra underscore) macro must be +** defined; otherwise, detection of conditions specific to MinGW will be +** disabled. +*/ +#if defined(_HAVE_MINGW_H) +# include "mingw.h" +#elif defined(_HAVE__MINGW_H) +# include "_mingw.h" +#endif + +/* +** For MinGW version 4.x (and higher), check to see if the _USE_32BIT_TIME_T +** define is required to maintain binary compatibility with the MSVC runtime +** library in use (e.g. for Windows XP). +*/ +#if !defined(_USE_32BIT_TIME_T) && !defined(_USE_64BIT_TIME_T) && \ + defined(_WIN32) && !defined(_WIN64) && \ + defined(__MINGW_MAJOR_VERSION) && __MINGW_MAJOR_VERSION >= 4 && \ + defined(__MSVCRT__) +# define _USE_32BIT_TIME_T +#endif + +#include "sqlite3Btree.h" + +#include <stdarg.h> /* Needed for the definition of va_list */ + +/* +** Provide the ability to override linkage features of the interface. +*/ +#ifndef SQLITE_EXTERN +# define SQLITE_EXTERN extern +#endif +#ifndef SQLITE_API +# define SQLITE_API +#endif +#ifndef SQLITE_CDECL +# define SQLITE_CDECL +#endif +#ifndef SQLITE_STDCALL +# define SQLITE_STDCALL +#endif + +/* +** CAPI3REF: Compile-Time Library Version Numbers +** +** ^(The [SQLITE_VERSION] C preprocessor macro in the sqlite3.h header +** evaluates to a string literal that is the SQLite version in the +** format "X.Y.Z" where X is the major version number (always 3 for +** SQLite3) and Y is the minor version number and Z is the release number.)^ +** ^(The [SQLITE_VERSION_NUMBER] C preprocessor macro resolves to an integer +** with the value (X*1000000 + Y*1000 + Z) where X, Y, and Z are the same +** numbers used in [SQLITE_VERSION].)^ +** The SQLITE_VERSION_NUMBER for any given release of SQLite will also +** be larger than the release from which it is derived. Either Y will +** be held constant and Z will be incremented or else Y will be incremented +** and Z will be reset to zero. +** +** Since version 3.6.18, SQLite source code has been stored in the +** <a href="http://www.fossil-scm.org/">Fossil configuration management +** system</a>. ^The SQLITE_SOURCE_ID macro evaluates to +** a string which identifies a particular check-in of SQLite +** within its configuration management system. ^The SQLITE_SOURCE_ID +** string contains the date and time of the check-in (UTC) and an SHA1 +** hash of the entire source tree. +** +** See also: [sqlite3_libversion()], +** [sqlite3_libversion_number()], [sqlite3_sourceid()], +** [sqlite_version()] and [sqlite_source_id()]. +*/ +#define SQLITE_VERSION "3.8.11.1" +#define SQLITE_VERSION_NUMBER 3008011 +#define SQLITE_SOURCE_ID "2015-07-29 20:00:57 cf538e2783e468bbc25e7cb2a9ee64d3e0e80b2f" + +/* +** CAPI3REF: Device Characteristics +** +** The xDeviceCharacteristics method of the [sqlite3_io_methods] +** object returns an integer which is a vector of these +** bit values expressing I/O characteristics of the mass storage +** device that holds the file that the [sqlite3_io_methods] +** refers to. +** +** The SQLITE_IOCAP_ATOMIC property means that all writes of +** any size are atomic. The SQLITE_IOCAP_ATOMICnnn values +** mean that writes of blocks that are nnn bytes in size and +** are aligned to an address which is an integer multiple of +** nnn are atomic. The SQLITE_IOCAP_SAFE_APPEND value means +** that when data is appended to a file, the data is appended +** first then the size of the file is extended, never the other +** way around. The SQLITE_IOCAP_SEQUENTIAL property means that +** information is written to disk in the same order as calls +** to xWrite(). The SQLITE_IOCAP_POWERSAFE_OVERWRITE property means that +** after reboot following a crash or power loss, the only bytes in a +** file that were written at the application level might have changed +** and that adjacent bytes, even bytes within the same sector are +** guaranteed to be unchanged. The SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN +** flag indicate that a file cannot be deleted when open. The +** SQLITE_IOCAP_IMMUTABLE flag indicates that the file is on +** read-only media and cannot be changed even by processes with +** elevated privileges. +*/ +#define SQLITE_IOCAP_ATOMIC 0x00000001 +#define SQLITE_IOCAP_ATOMIC512 0x00000002 +#define SQLITE_IOCAP_ATOMIC1K 0x00000004 +#define SQLITE_IOCAP_ATOMIC2K 0x00000008 +#define SQLITE_IOCAP_ATOMIC4K 0x00000010 +#define SQLITE_IOCAP_ATOMIC8K 0x00000020 +#define SQLITE_IOCAP_ATOMIC16K 0x00000040 +#define SQLITE_IOCAP_ATOMIC32K 0x00000080 +#define SQLITE_IOCAP_ATOMIC64K 0x00000100 +#define SQLITE_IOCAP_SAFE_APPEND 0x00000200 +#define SQLITE_IOCAP_SEQUENTIAL 0x00000400 +#define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN 0x00000800 +#define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 +#define SQLITE_IOCAP_IMMUTABLE 0x00002000 + +/* +** CAPI3REF: File Locking Levels +** +** SQLite uses one of these integer values as the second +** argument to calls it makes to the xLock() and xUnlock() methods +** of an [sqlite3_io_methods] object. +*/ +#define SQLITE_LOCK_NONE 0 +#define SQLITE_LOCK_SHARED 1 +#define SQLITE_LOCK_RESERVED 2 +#define SQLITE_LOCK_PENDING 3 +#define SQLITE_LOCK_EXCLUSIVE 4 + +/* +** CAPI3REF: Synchronization Type Flags +** +** When SQLite invokes the xSync() method of an +** [sqlite3_io_methods] object it uses a combination of +** these integer values as the second argument. +** +** When the SQLITE_SYNC_DATAONLY flag is used, it means that the +** sync operation only needs to flush data to mass storage. Inode +** information need not be flushed. If the lower four bits of the flag +** equal SQLITE_SYNC_NORMAL, that means to use normal fsync() semantics. +** If the lower four bits equal SQLITE_SYNC_FULL, that means +** to use Mac OS X style fullsync instead of fsync(). +** +** Do not confuse the SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags +** with the [PRAGMA synchronous]=NORMAL and [PRAGMA synchronous]=FULL +** settings. The [synchronous pragma] determines when calls to the +** xSync VFS method occur and applies uniformly across all platforms. +** The SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags determine how +** energetic or rigorous or forceful the sync operations are and +** only make a difference on Mac OSX for the default SQLite code. +** (Third-party VFS implementations might also make the distinction +** between SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL, but among the +** operating systems natively supported by SQLite, only Mac OSX +** cares about the difference.) +*/ +#define SQLITE_SYNC_NORMAL 0x00002 +#define SQLITE_SYNC_FULL 0x00003 +#define SQLITE_SYNC_DATAONLY 0x00010 + +/* +** CAPI3REF: OS Interface Open File Handle +** +** An [sqlite3_file] object represents an open file in the +** [sqlite3_vfs | OS interface layer]. Individual OS interface +** implementations will +** want to subclass this object by appending additional fields +** for their own use. The pMethods entry is a pointer to an +** [sqlite3_io_methods] object that defines methods for performing +** I/O operations on the open file. +*/ +typedef struct sqlite3_file sqlite3_file; +struct sqlite3_file { + const struct sqlite3_io_methods *pMethods; /* Methods for an open file */ +}; + +/* +** CAPI3REF: OS Interface File Virtual Methods Object +** +** Every file opened by the [sqlite3_vfs.xOpen] method populates an +** [sqlite3_file] object (or, more commonly, a subclass of the +** [sqlite3_file] object) with a pointer to an instance of this object. +** This object defines the methods used to perform various operations +** against the open file represented by the [sqlite3_file] object. +** +** If the [sqlite3_vfs.xOpen] method sets the sqlite3_file.pMethods element +** to a non-NULL pointer, then the sqlite3_io_methods.xClose method +** may be invoked even if the [sqlite3_vfs.xOpen] reported that it failed. The +** only way to prevent a call to xClose following a failed [sqlite3_vfs.xOpen] +** is for the [sqlite3_vfs.xOpen] to set the sqlite3_file.pMethods element +** to NULL. +** +** The flags argument to xSync may be one of [SQLITE_SYNC_NORMAL] or +** [SQLITE_SYNC_FULL]. The first choice is the normal fsync(). +** The second choice is a Mac OS X style fullsync. The [SQLITE_SYNC_DATAONLY] +** flag may be ORed in to indicate that only the data of the file +** and not its inode needs to be synced. +** +** The integer values to xLock() and xUnlock() are one of +** <ul> +** <li> [SQLITE_LOCK_NONE], +** <li> [SQLITE_LOCK_SHARED], +** <li> [SQLITE_LOCK_RESERVED], +** <li> [SQLITE_LOCK_PENDING], or +** <li> [SQLITE_LOCK_EXCLUSIVE]. +** </ul> +** xLock() increases the lock. xUnlock() decreases the lock. +** The xCheckReservedLock() method checks whether any database connection, +** either in this process or in some other process, is holding a RESERVED, +** PENDING, or EXCLUSIVE lock on the file. It returns true +** if such a lock exists and false otherwise. +** +** The xFileControl() method is a generic interface that allows custom +** VFS implementations to directly control an open file using the +** [sqlite3_file_control()] interface. The second "op" argument is an +** integer opcode. The third argument is a generic pointer intended to +** point to a structure that may contain arguments or space in which to +** write return values. Potential uses for xFileControl() might be +** functions to enable blocking locks with timeouts, to change the +** locking strategy (for example to use dot-file locks), to inquire +** about the status of a lock, or to break stale locks. The SQLite +** core reserves all opcodes less than 100 for its own use. +** A [file control opcodes | list of opcodes] less than 100 is available. +** Applications that define a custom xFileControl method should use opcodes +** greater than 100 to avoid conflicts. VFS implementations should +** return [SQLITE_NOTFOUND] for file control opcodes that they do not +** recognize. +** +** The xSectorSize() method returns the sector size of the +** device that underlies the file. The sector size is the +** minimum write that can be performed without disturbing +** other bytes in the file. The xDeviceCharacteristics() +** method returns a bit vector describing behaviors of the +** underlying device: +** +** <ul> +** <li> [SQLITE_IOCAP_ATOMIC] +** <li> [SQLITE_IOCAP_ATOMIC512] +** <li> [SQLITE_IOCAP_ATOMIC1K] +** <li> [SQLITE_IOCAP_ATOMIC2K] +** <li> [SQLITE_IOCAP_ATOMIC4K] +** <li> [SQLITE_IOCAP_ATOMIC8K] +** <li> [SQLITE_IOCAP_ATOMIC16K] +** <li> [SQLITE_IOCAP_ATOMIC32K] +** <li> [SQLITE_IOCAP_ATOMIC64K] +** <li> [SQLITE_IOCAP_SAFE_APPEND] +** <li> [SQLITE_IOCAP_SEQUENTIAL] +** </ul> +** +** The SQLITE_IOCAP_ATOMIC property means that all writes of +** any size are atomic. The SQLITE_IOCAP_ATOMICnnn values +** mean that writes of blocks that are nnn bytes in size and +** are aligned to an address which is an integer multiple of +** nnn are atomic. The SQLITE_IOCAP_SAFE_APPEND value means +** that when data is appended to a file, the data is appended +** first then the size of the file is extended, never the other +** way around. The SQLITE_IOCAP_SEQUENTIAL property means that +** information is written to disk in the same order as calls +** to xWrite(). +** +** If xRead() returns SQLITE_IOERR_SHORT_READ it must also fill +** in the unread portions of the buffer with zeros. A VFS that +** fails to zero-fill short reads might seem to work. However, +** failure to zero-fill short reads will eventually lead to +** database corruption. +*/ +typedef struct sqlite3_io_methods sqlite3_io_methods; +struct sqlite3_io_methods { + int iVersion; + int (*xClose)(sqlite3_file*); + int (*xRead)(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst); + int (*xWrite)(sqlite3_file*, const void*, int iAmt, sqlite3_int64 iOfst); + int (*xTruncate)(sqlite3_file*, sqlite3_int64 size); + int (*xSync)(sqlite3_file*, int flags); + int (*xFileSize)(sqlite3_file*, sqlite3_int64 *pSize); + int (*xLock)(sqlite3_file*, int); + int (*xUnlock)(sqlite3_file*, int); + int (*xCheckReservedLock)(sqlite3_file*, int *pResOut); + int (*xFileControl)(sqlite3_file*, int op, void *pArg); + int (*xSectorSize)(sqlite3_file*); + int (*xDeviceCharacteristics)(sqlite3_file*); + /* Methods above are valid for version 1 */ + int (*xShmMap)(sqlite3_file*, int iPg, int pgsz, int, void volatile**); + int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); + void (*xShmBarrier)(sqlite3_file*); + int (*xShmUnmap)(sqlite3_file*, int deleteFlag); + /* Methods above are valid for version 2 */ + int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp); + int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p); + /* Methods above are valid for version 3 */ + /* Additional methods may be added in future releases */ +}; + +/* +** CAPI3REF: Standard File Control Opcodes +** KEYWORDS: {file control opcodes} {file control opcode} +** +** These integer constants are opcodes for the xFileControl method +** of the [sqlite3_io_methods] object and for the [sqlite3_file_control()] +** interface. +** +** <ul> +** <li>[[SQLITE_FCNTL_LOCKSTATE]] +** The [SQLITE_FCNTL_LOCKSTATE] opcode is used for debugging. This +** opcode causes the xFileControl method to write the current state of +** the lock (one of [SQLITE_LOCK_NONE], [SQLITE_LOCK_SHARED], +** [SQLITE_LOCK_RESERVED], [SQLITE_LOCK_PENDING], or [SQLITE_LOCK_EXCLUSIVE]) +** into an integer that the pArg argument points to. This capability +** is used during testing and is only available when the SQLITE_TEST +** compile-time option is used. +** +** <li>[[SQLITE_FCNTL_SIZE_HINT]] +** The [SQLITE_FCNTL_SIZE_HINT] opcode is used by SQLite to give the VFS +** layer a hint of how large the database file will grow to be during the +** current transaction. This hint is not guaranteed to be accurate but it +** is often close. The underlying VFS might choose to preallocate database +** file space based on this hint in order to help writes to the database +** file run faster. +** +** <li>[[SQLITE_FCNTL_CHUNK_SIZE]] +** The [SQLITE_FCNTL_CHUNK_SIZE] opcode is used to request that the VFS +** extends and truncates the database file in chunks of a size specified +** by the user. The fourth argument to [sqlite3_file_control()] should +** point to an integer (type int) containing the new chunk-size to use +** for the nominated database. Allocating database file space in large +** chunks (say 1MB at a time), may reduce file-system fragmentation and +** improve performance on some systems. +** +** <li>[[SQLITE_FCNTL_FILE_POINTER]] +** The [SQLITE_FCNTL_FILE_POINTER] opcode is used to obtain a pointer +** to the [sqlite3_file] object associated with a particular database +** connection. See the [sqlite3_file_control()] documentation for +** additional information. +** +** <li>[[SQLITE_FCNTL_SYNC_OMITTED]] +** No longer in use. +** +** <li>[[SQLITE_FCNTL_SYNC]] +** The [SQLITE_FCNTL_SYNC] opcode is generated internally by SQLite and +** sent to the VFS immediately before the xSync method is invoked on a +** database file descriptor. Or, if the xSync method is not invoked +** because the user has configured SQLite with +** [PRAGMA synchronous | PRAGMA synchronous=OFF] it is invoked in place +** of the xSync method. In most cases, the pointer argument passed with +** this file-control is NULL. However, if the database file is being synced +** as part of a multi-database commit, the argument points to a nul-terminated +** string containing the transactions master-journal file name. VFSes that +** do not need this signal should silently ignore this opcode. Applications +** should not call [sqlite3_file_control()] with this opcode as doing so may +** disrupt the operation of the specialized VFSes that do require it. +** +** <li>[[SQLITE_FCNTL_COMMIT_PHASETWO]] +** The [SQLITE_FCNTL_COMMIT_PHASETWO] opcode is generated internally by SQLite +** and sent to the VFS after a transaction has been committed immediately +** but before the database is unlocked. VFSes that do not need this signal +** should silently ignore this opcode. Applications should not call +** [sqlite3_file_control()] with this opcode as doing so may disrupt the +** operation of the specialized VFSes that do require it. +** +** <li>[[SQLITE_FCNTL_WIN32_AV_RETRY]] +** ^The [SQLITE_FCNTL_WIN32_AV_RETRY] opcode is used to configure automatic +** retry counts and intervals for certain disk I/O operations for the +** windows [VFS] in order to provide robustness in the presence of +** anti-virus programs. By default, the windows VFS will retry file read, +** file write, and file delete operations up to 10 times, with a delay +** of 25 milliseconds before the first retry and with the delay increasing +** by an additional 25 milliseconds with each subsequent retry. This +** opcode allows these two values (10 retries and 25 milliseconds of delay) +** to be adjusted. The values are changed for all database connections +** within the same process. The argument is a pointer to an array of two +** integers where the first integer i the new retry count and the second +** integer is the delay. If either integer is negative, then the setting +** is not changed but instead the prior value of that setting is written +** into the array entry, allowing the current retry settings to be +** interrogated. The zDbName parameter is ignored. +** +** <li>[[SQLITE_FCNTL_PERSIST_WAL]] +** ^The [SQLITE_FCNTL_PERSIST_WAL] opcode is used to set or query the +** persistent [WAL | Write Ahead Log] setting. By default, the auxiliary +** write ahead log and shared memory files used for transaction control +** are automatically deleted when the latest connection to the database +** closes. Setting persistent WAL mode causes those files to persist after +** close. Persisting the files is useful when other processes that do not +** have write permission on the directory containing the database file want +** to read the database file, as the WAL and shared memory files must exist +** in order for the database to be readable. The fourth parameter to +** [sqlite3_file_control()] for this opcode should be a pointer to an integer. +** That integer is 0 to disable persistent WAL mode or 1 to enable persistent +** WAL mode. If the integer is -1, then it is overwritten with the current +** WAL persistence setting. +** +** <li>[[SQLITE_FCNTL_POWERSAFE_OVERWRITE]] +** ^The [SQLITE_FCNTL_POWERSAFE_OVERWRITE] opcode is used to set or query the +** persistent "powersafe-overwrite" or "PSOW" setting. The PSOW setting +** determines the [SQLITE_IOCAP_POWERSAFE_OVERWRITE] bit of the +** xDeviceCharacteristics methods. The fourth parameter to +** [sqlite3_file_control()] for this opcode should be a pointer to an integer. +** That integer is 0 to disable zero-damage mode or 1 to enable zero-damage +** mode. If the integer is -1, then it is overwritten with the current +** zero-damage mode setting. +** +** <li>[[SQLITE_FCNTL_OVERWRITE]] +** ^The [SQLITE_FCNTL_OVERWRITE] opcode is invoked by SQLite after opening +** a write transaction to indicate that, unless it is rolled back for some +** reason, the entire database file will be overwritten by the current +** transaction. This is used by VACUUM operations. +** +** <li>[[SQLITE_FCNTL_VFSNAME]] +** ^The [SQLITE_FCNTL_VFSNAME] opcode can be used to obtain the names of +** all [VFSes] in the VFS stack. The names are of all VFS shims and the +** final bottom-level VFS are written into memory obtained from +** [sqlite3_malloc()] and the result is stored in the char* variable +** that the fourth parameter of [sqlite3_file_control()] points to. +** The caller is responsible for freeing the memory when done. As with +** all file-control actions, there is no guarantee that this will actually +** do anything. Callers should initialize the char* variable to a NULL +** pointer in case this file-control is not implemented. This file-control +** is intended for diagnostic use only. +** +** <li>[[SQLITE_FCNTL_PRAGMA]] +** ^Whenever a [PRAGMA] statement is parsed, an [SQLITE_FCNTL_PRAGMA] +** file control is sent to the open [sqlite3_file] object corresponding +** to the database file to which the pragma statement refers. ^The argument +** to the [SQLITE_FCNTL_PRAGMA] file control is an array of +** pointers to strings (char**) in which the second element of the array +** is the name of the pragma and the third element is the argument to the +** pragma or NULL if the pragma has no argument. ^The handler for an +** [SQLITE_FCNTL_PRAGMA] file control can optionally make the first element +** of the char** argument point to a string obtained from [sqlite3_mprintf()] +** or the equivalent and that string will become the result of the pragma or +** the error message if the pragma fails. ^If the +** [SQLITE_FCNTL_PRAGMA] file control returns [SQLITE_NOTFOUND], then normal +** [PRAGMA] processing continues. ^If the [SQLITE_FCNTL_PRAGMA] +** file control returns [SQLITE_OK], then the parser assumes that the +** VFS has handled the PRAGMA itself and the parser generates a no-op +** prepared statement if result string is NULL, or that returns a copy +** of the result string if the string is non-NULL. +** ^If the [SQLITE_FCNTL_PRAGMA] file control returns +** any result code other than [SQLITE_OK] or [SQLITE_NOTFOUND], that means +** that the VFS encountered an error while handling the [PRAGMA] and the +** compilation of the PRAGMA fails with an error. ^The [SQLITE_FCNTL_PRAGMA] +** file control occurs at the beginning of pragma statement analysis and so +** it is able to override built-in [PRAGMA] statements. +** +** <li>[[SQLITE_FCNTL_BUSYHANDLER]] +** ^The [SQLITE_FCNTL_BUSYHANDLER] +** file-control may be invoked by SQLite on the database file handle +** shortly after it is opened in order to provide a custom VFS with access +** to the connections busy-handler callback. The argument is of type (void **) +** - an array of two (void *) values. The first (void *) actually points +** to a function of type (int (*)(void *)). In order to invoke the connections +** busy-handler, this function should be invoked with the second (void *) in +** the array as the only argument. If it returns non-zero, then the operation +** should be retried. If it returns zero, the custom VFS should abandon the +** current operation. +** +** <li>[[SQLITE_FCNTL_TEMPFILENAME]] +** ^Application can invoke the [SQLITE_FCNTL_TEMPFILENAME] file-control +** to have SQLite generate a +** temporary filename using the same algorithm that is followed to generate +** temporary filenames for TEMP tables and other internal uses. The +** argument should be a char** which will be filled with the filename +** written into memory obtained from [sqlite3_malloc()]. The caller should +** invoke [sqlite3_free()] on the result to avoid a memory leak. +** +** <li>[[SQLITE_FCNTL_MMAP_SIZE]] +** The [SQLITE_FCNTL_MMAP_SIZE] file control is used to query or set the +** maximum number of bytes that will be used for memory-mapped I/O. +** The argument is a pointer to a value of type sqlite3_int64 that +** is an advisory maximum number of bytes in the file to memory map. The +** pointer is overwritten with the old value. The limit is not changed if +** the value originally pointed to is negative, and so the current limit +** can be queried by passing in a pointer to a negative number. This +** file-control is used internally to implement [PRAGMA mmap_size]. +** +** <li>[[SQLITE_FCNTL_TRACE]] +** The [SQLITE_FCNTL_TRACE] file control provides advisory information +** to the VFS about what the higher layers of the SQLite stack are doing. +** This file control is used by some VFS activity tracing [shims]. +** The argument is a zero-terminated string. Higher layers in the +** SQLite stack may generate instances of this file control if +** the [SQLITE_USE_FCNTL_TRACE] compile-time option is enabled. +** +** <li>[[SQLITE_FCNTL_HAS_MOVED]] +** The [SQLITE_FCNTL_HAS_MOVED] file control interprets its argument as a +** pointer to an integer and it writes a boolean into that integer depending +** on whether or not the file has been renamed, moved, or deleted since it +** was first opened. +** +** <li>[[SQLITE_FCNTL_WIN32_SET_HANDLE]] +** The [SQLITE_FCNTL_WIN32_SET_HANDLE] opcode is used for debugging. This +** opcode causes the xFileControl method to swap the file handle with the one +** pointed to by the pArg argument. This capability is used during testing +** and only needs to be supported when SQLITE_TEST is defined. +** +** <li>[[SQLITE_FCNTL_WAL_BLOCK]] +** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might +** be advantageous to block on the next WAL lock if the lock is not immediately +** available. The WAL subsystem issues this signal during rare +** circumstances in order to fix a problem with priority inversion. +** Applications should <em>not</em> use this file-control. +** +** <li>[[SQLITE_FCNTL_ZIPVFS]] +** The [SQLITE_FCNTL_ZIPVFS] opcode is implemented by zipvfs only. All other +** VFS should return SQLITE_NOTFOUND for this opcode. +** +** <li>[[SQLITE_FCNTL_RBU]] +** The [SQLITE_FCNTL_RBU] opcode is implemented by the special VFS used by +** the RBU extension only. All other VFS should return SQLITE_NOTFOUND for +** this opcode. +** </ul> +*/ +#define SQLITE_FCNTL_LOCKSTATE 1 +#define SQLITE_FCNTL_GET_LOCKPROXYFILE 2 +#define SQLITE_FCNTL_SET_LOCKPROXYFILE 3 +#define SQLITE_FCNTL_LAST_ERRNO 4 +#define SQLITE_FCNTL_SIZE_HINT 5 +#define SQLITE_FCNTL_CHUNK_SIZE 6 +#define SQLITE_FCNTL_FILE_POINTER 7 +#define SQLITE_FCNTL_SYNC_OMITTED 8 +#define SQLITE_FCNTL_WIN32_AV_RETRY 9 +#define SQLITE_FCNTL_PERSIST_WAL 10 +#define SQLITE_FCNTL_OVERWRITE 11 +#define SQLITE_FCNTL_VFSNAME 12 +#define SQLITE_FCNTL_POWERSAFE_OVERWRITE 13 +#define SQLITE_FCNTL_PRAGMA 14 +#define SQLITE_FCNTL_BUSYHANDLER 15 +#define SQLITE_FCNTL_TEMPFILENAME 16 +#define SQLITE_FCNTL_MMAP_SIZE 18 +#define SQLITE_FCNTL_TRACE 19 +#define SQLITE_FCNTL_HAS_MOVED 20 +#define SQLITE_FCNTL_SYNC 21 +#define SQLITE_FCNTL_COMMIT_PHASETWO 22 +#define SQLITE_FCNTL_WIN32_SET_HANDLE 23 +#define SQLITE_FCNTL_WAL_BLOCK 24 +#define SQLITE_FCNTL_ZIPVFS 25 +#define SQLITE_FCNTL_RBU 26 + +/* +** CAPI3REF: Mutex Handle +** +** The mutex module within SQLite defines [sqlite3_mutex] to be an +** abstract type for a mutex object. The SQLite core never looks +** at the internal representation of an [sqlite3_mutex]. It only +** deals with pointers to the [sqlite3_mutex] object. +** +** Mutexes are created using [sqlite3_mutex_alloc()]. +*/ +typedef struct sqlite3_mutex sqlite3_mutex; + +/* +** CAPI3REF: OS Interface Object +** +** An instance of the sqlite3_vfs object defines the interface between +** the SQLite core and the underlying operating system. The "vfs" +** in the name of the object stands for "virtual file system". See +** the [VFS | VFS documentation] for further information. +** +** The value of the iVersion field is initially 1 but may be larger in +** future versions of SQLite. Additional fields may be appended to this +** object when the iVersion value is increased. Note that the structure +** of the sqlite3_vfs object changes in the transaction between +** SQLite version 3.5.9 and 3.6.0 and yet the iVersion field was not +** modified. +** +** The szOsFile field is the size of the subclassed [sqlite3_file] +** structure used by this VFS. mxPathname is the maximum length of +** a pathname in this VFS. +** +** Registered sqlite3_vfs objects are kept on a linked list formed by +** the pNext pointer. The [sqlite3_vfs_register()] interface manages +** this list in a thread-safe way. The [sqlite3_vfs_find()] interface +** searches the list. Neither the application code nor the VFS +** implementation should use the pNext pointer. +** +** The pNext field is the only field in the sqlite3_vfs +** structure that SQLite will ever modify. SQLite will only access +** or modify this field while holding a particular static mutex. +** The application should never modify anything within the sqlite3_vfs +** object once the object has been registered. +** +** The zName field holds the name of the VFS module. The name must +** be unique across all VFS modules. +** +** [[sqlite3_vfs.xOpen]] +** ^SQLite guarantees that the zFilename parameter to xOpen +** is either a NULL pointer or string obtained +** from xFullPathname() with an optional suffix added. +** ^If a suffix is added to the zFilename parameter, it will +** consist of a single "-" character followed by no more than +** 11 alphanumeric and/or "-" characters. +** ^SQLite further guarantees that +** the string will be valid and unchanged until xClose() is +** called. Because of the previous sentence, +** the [sqlite3_file] can safely store a pointer to the +** filename if it needs to remember the filename for some reason. +** If the zFilename parameter to xOpen is a NULL pointer then xOpen +** must invent its own temporary name for the file. ^Whenever the +** xFilename parameter is NULL it will also be the case that the +** flags parameter will include [SQLITE_OPEN_DELETEONCLOSE]. +** +** The flags argument to xOpen() includes all bits set in +** the flags argument to [sqlite3_open_v2()]. Or if [sqlite3_open()] +** or [sqlite3_open16()] is used, then flags includes at least +** [SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]. +** If xOpen() opens a file read-only then it sets *pOutFlags to +** include [SQLITE_OPEN_READONLY]. Other bits in *pOutFlags may be set. +** +** ^(SQLite will also add one of the following flags to the xOpen() +** call, depending on the object being opened: +** +** <ul> +** <li> [SQLITE_OPEN_MAIN_DB] +** <li> [SQLITE_OPEN_MAIN_JOURNAL] +** <li> [SQLITE_OPEN_TEMP_DB] +** <li> [SQLITE_OPEN_TEMP_JOURNAL] +** <li> [SQLITE_OPEN_TRANSIENT_DB] +** <li> [SQLITE_OPEN_SUBJOURNAL] +** <li> [SQLITE_OPEN_MASTER_JOURNAL] +** <li> [SQLITE_OPEN_WAL] +** </ul>)^ +** +** The file I/O implementation can use the object type flags to +** change the way it deals with files. For example, an application +** that does not care about crash recovery or rollback might make +** the open of a journal file a no-op. Writes to this journal would +** also be no-ops, and any attempt to read the journal would return +** SQLITE_IOERR. Or the implementation might recognize that a database +** file will be doing page-aligned sector reads and writes in a random +** order and set up its I/O subsystem accordingly. +** +** SQLite might also add one of the following flags to the xOpen method: +** +** <ul> +** <li> [SQLITE_OPEN_DELETEONCLOSE] +** <li> [SQLITE_OPEN_EXCLUSIVE] +** </ul> +** +** The [SQLITE_OPEN_DELETEONCLOSE] flag means the file should be +** deleted when it is closed. ^The [SQLITE_OPEN_DELETEONCLOSE] +** will be set for TEMP databases and their journals, transient +** databases, and subjournals. +** +** ^The [SQLITE_OPEN_EXCLUSIVE] flag is always used in conjunction +** with the [SQLITE_OPEN_CREATE] flag, which are both directly +** analogous to the O_EXCL and O_CREAT flags of the POSIX open() +** API. The SQLITE_OPEN_EXCLUSIVE flag, when paired with the +** SQLITE_OPEN_CREATE, is used to indicate that file should always +** be created, and that it is an error if it already exists. +** It is <i>not</i> used to indicate the file should be opened +** for exclusive access. +** +** ^At least szOsFile bytes of memory are allocated by SQLite +** to hold the [sqlite3_file] structure passed as the third +** argument to xOpen. The xOpen method does not have to +** allocate the structure; it should just fill it in. Note that +** the xOpen method must set the sqlite3_file.pMethods to either +** a valid [sqlite3_io_methods] object or to NULL. xOpen must do +** this even if the open fails. SQLite expects that the sqlite3_file.pMethods +** element will be valid after xOpen returns regardless of the success +** or failure of the xOpen call. +** +** [[sqlite3_vfs.xAccess]] +** ^The flags argument to xAccess() may be [SQLITE_ACCESS_EXISTS] +** to test for the existence of a file, or [SQLITE_ACCESS_READWRITE] to +** test whether a file is readable and writable, or [SQLITE_ACCESS_READ] +** to test whether a file is at least readable. The file can be a +** directory. +** +** ^SQLite will always allocate at least mxPathname+1 bytes for the +** output buffer xFullPathname. The exact size of the output buffer +** is also passed as a parameter to both methods. If the output buffer +** is not large enough, [SQLITE_CANTOPEN] should be returned. Since this is +** handled as a fatal error by SQLite, vfs implementations should endeavor +** to prevent this by setting mxPathname to a sufficiently large value. +** +** The xRandomness(), xSleep(), xCurrentTime(), and xCurrentTimeInt64() +** interfaces are not strictly a part of the filesystem, but they are +** included in the VFS structure for completeness. +** The xRandomness() function attempts to return nBytes bytes +** of good-quality randomness into zOut. The return value is +** the actual number of bytes of randomness obtained. +** The xSleep() method causes the calling thread to sleep for at +** least the number of microseconds given. ^The xCurrentTime() +** method returns a Julian Day Number for the current date and time as +** a floating point value. +** ^The xCurrentTimeInt64() method returns, as an integer, the Julian +** Day Number multiplied by 86400000 (the number of milliseconds in +** a 24-hour day). +** ^SQLite will use the xCurrentTimeInt64() method to get the current +** date and time if that method is available (if iVersion is 2 or +** greater and the function pointer is not NULL) and will fall back +** to xCurrentTime() if xCurrentTimeInt64() is unavailable. +** +** ^The xSetSystemCall(), xGetSystemCall(), and xNestSystemCall() interfaces +** are not used by the SQLite core. These optional interfaces are provided +** by some VFSes to facilitate testing of the VFS code. By overriding +** system calls with functions under its control, a test program can +** simulate faults and error conditions that would otherwise be difficult +** or impossible to induce. The set of system calls that can be overridden +** varies from one VFS to another, and from one version of the same VFS to the +** next. Applications that use these interfaces must be prepared for any +** or all of these interfaces to be NULL or for their behavior to change +** from one release to the next. Applications must not attempt to access +** any of these methods if the iVersion of the VFS is less than 3. +*/ +typedef struct sqlite3_vfs sqlite3_vfs; +typedef void (*sqlite3_syscall_ptr)(void); +struct sqlite3_vfs { + int iVersion; /* Structure version number (currently 3) */ + int szOsFile; /* Size of subclassed sqlite3_file */ + int mxPathname; /* Maximum file pathname length */ + sqlite3_vfs *pNext; /* Next registered VFS */ + const char *zName; /* Name of this virtual file system */ + void *pAppData; /* Pointer to application-specific data */ + int (*xOpen)(sqlite3_vfs*, const char *zName, sqlite3_file*, + int flags, int *pOutFlags); + int (*xDelete)(sqlite3_vfs*, const char *zName, int syncDir); + int (*xAccess)(sqlite3_vfs*, const char *zName, int flags, int *pResOut); + int (*xFullPathname)(sqlite3_vfs*, const char *zName, int nOut, char *zOut); + void *(*xDlOpen)(sqlite3_vfs*, const char *zFilename); + void (*xDlError)(sqlite3_vfs*, int nByte, char *zErrMsg); + void (*(*xDlSym)(sqlite3_vfs*,void*, const char *zSymbol))(void); + void (*xDlClose)(sqlite3_vfs*, void*); + int (*xRandomness)(sqlite3_vfs*, int nByte, char *zOut); + int (*xSleep)(sqlite3_vfs*, int microseconds); + int (*xCurrentTime)(sqlite3_vfs*, double*); + int (*xGetLastError)(sqlite3_vfs*, int, char *); + /* + ** The methods above are in version 1 of the sqlite_vfs object + ** definition. Those that follow are added in version 2 or later + */ + int (*xCurrentTimeInt64)(sqlite3_vfs*, sqlite3_int64*); + /* + ** The methods above are in versions 1 and 2 of the sqlite_vfs object. + ** Those below are for version 3 and greater. + */ + int (*xSetSystemCall)(sqlite3_vfs*, const char *zName, sqlite3_syscall_ptr); + sqlite3_syscall_ptr (*xGetSystemCall)(sqlite3_vfs*, const char *zName); + const char *(*xNextSystemCall)(sqlite3_vfs*, const char *zName); + /* + ** The methods above are in versions 1 through 3 of the sqlite_vfs object. + ** New fields may be appended in figure versions. The iVersion + ** value will increment whenever this happens. + */ +}; + +/* +** CAPI3REF: Flags for the xAccess VFS method +** +** These integer constants can be used as the third parameter to +** the xAccess method of an [sqlite3_vfs] object. They determine +** what kind of permissions the xAccess method is looking for. +** With SQLITE_ACCESS_EXISTS, the xAccess method +** simply checks whether the file exists. +** With SQLITE_ACCESS_READWRITE, the xAccess method +** checks whether the named directory is both readable and writable +** (in other words, if files can be added, removed, and renamed within +** the directory). +** The SQLITE_ACCESS_READWRITE constant is currently used only by the +** [temp_store_directory pragma], though this could change in a future +** release of SQLite. +** With SQLITE_ACCESS_READ, the xAccess method +** checks whether the file is readable. The SQLITE_ACCESS_READ constant is +** currently unused, though it might be used in a future release of +** SQLite. +*/ +#define SQLITE_ACCESS_EXISTS 0 +#define SQLITE_ACCESS_READWRITE 1 /* Used by PRAGMA temp_store_directory */ +#define SQLITE_ACCESS_READ 2 /* Unused */ + +/* +** CAPI3REF: Flags for the xShmLock VFS method +** +** These integer constants define the various locking operations +** allowed by the xShmLock method of [sqlite3_io_methods]. The +** following are the only legal combinations of flags to the +** xShmLock method: +** +** <ul> +** <li> SQLITE_SHM_LOCK | SQLITE_SHM_SHARED +** <li> SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE +** <li> SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED +** <li> SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE +** </ul> +** +** When unlocking, the same SHARED or EXCLUSIVE flag must be supplied as +** was given on the corresponding lock. +** +** The xShmLock method can transition between unlocked and SHARED or +** between unlocked and EXCLUSIVE. It cannot transition between SHARED +** and EXCLUSIVE. +*/ +#define SQLITE_SHM_UNLOCK 1 +#define SQLITE_SHM_LOCK 2 +#define SQLITE_SHM_SHARED 4 +#define SQLITE_SHM_EXCLUSIVE 8 + +/* +** CAPI3REF: Maximum xShmLock index +** +** The xShmLock method on [sqlite3_io_methods] may use values +** between 0 and this upper bound as its "offset" argument. +** The SQLite core will never attempt to acquire or release a +** lock outside of this range +*/ +#define SQLITE_SHM_NLOCK 8 + +SQLITE_PRIVATE int sqlite3_os_init(void); +SQLITE_PRIVATE int sqlite3_os_end(void); + +/* +** CAPI3REF: Configuring The SQLite Library +** +** The sqlite3_config() interface is used to make global configuration +** changes to SQLite in order to tune SQLite to the specific needs of +** the application. The default configuration is recommended for most +** applications and so this routine is usually not necessary. It is +** provided to support rare applications with unusual needs. +** +** <b>The sqlite3_config() interface is not threadsafe. The application +** must ensure that no other SQLite interfaces are invoked by other +** threads while sqlite3_config() is running.</b> +** +** The sqlite3_config() interface +** may only be invoked prior to library initialization using +** [sqlite3BtreeInitialize()] or after shutdown by [sqlite3BtreeShutdown()]. +** ^If sqlite3_config() is called after [sqlite3BtreeInitialize()] and before +** [sqlite3BtreeShutdown()] then it will return SQLITE_MISUSE. +** Note, however, that ^sqlite3_config() can be called as part of the +** implementation of an application-defined [sqlite3_os_init()]. +** +** The first argument to sqlite3_config() is an integer +** [configuration option] that determines +** what property of SQLite is to be configured. Subsequent arguments +** vary depending on the [configuration option] +** in the first argument. +** +** ^When a configuration option is set, sqlite3_config() returns [SQLITE_OK]. +** ^If the option is unknown or SQLite is unable to set the option +** then this routine returns a non-zero [error code]. +*/ +SQLITE_PRIVATE int sqlite3_config(int, ...); + +/* +** CAPI3REF: Memory Allocation Routines +** +** An instance of this object defines the interface between SQLite +** and low-level memory allocation routines. +** +** This object is used in only one place in the SQLite interface. +** A pointer to an instance of this object is the argument to +** [sqlite3_config()] when the configuration option is +** [SQLITE_CONFIG_MALLOC] or [SQLITE_CONFIG_GETMALLOC]. +** By creating an instance of this object +** and passing it to [sqlite3_config]([SQLITE_CONFIG_MALLOC]) +** during configuration, an application can specify an alternative +** memory allocation subsystem for SQLite to use for all of its +** dynamic memory needs. +** +** Note that SQLite comes with several [built-in memory allocators] +** that are perfectly adequate for the overwhelming majority of applications +** and that this object is only useful to a tiny minority of applications +** with specialized memory allocation requirements. This object is +** also used during testing of SQLite in order to specify an alternative +** memory allocator that simulates memory out-of-memory conditions in +** order to verify that SQLite recovers gracefully from such +** conditions. +** +** The xMalloc, xRealloc, and xFree methods must work like the +** malloc(), realloc() and free() functions from the standard C library. +** ^SQLite guarantees that the second argument to +** xRealloc is always a value returned by a prior call to xRoundup. +** +** xSize should return the allocated size of a memory allocation +** previously obtained from xMalloc or xRealloc. The allocated size +** is always at least as big as the requested size but may be larger. +** +** The xRoundup method returns what would be the allocated size of +** a memory allocation given a particular requested size. Most memory +** allocators round up memory allocations at least to the next multiple +** of 8. Some allocators round up to a larger multiple or to a power of 2. +** Every memory allocation request coming in through [sqlite3_malloc()] +** or [sqlite3_realloc()] first calls xRoundup. If xRoundup returns 0, +** that causes the corresponding memory allocation to fail. +** +** The xInit method initializes the memory allocator. For example, +** it might allocate any require mutexes or initialize internal data +** structures. The xShutdown method is invoked (indirectly) by +** [sqlite3BtreeShutdown()] and should deallocate any resources acquired +** by xInit. The pAppData pointer is used as the only parameter to +** xInit and xShutdown. +** +** SQLite holds the [SQLITE_MUTEX_STATIC_MASTER] mutex when it invokes +** the xInit method, so the xInit method need not be threadsafe. The +** xShutdown method is only called from [sqlite3BtreeShutdown()] so it does +** not need to be threadsafe either. For all other methods, SQLite +** holds the [SQLITE_MUTEX_STATIC_MEM] mutex as long as the +** [SQLITE_CONFIG_MEMSTATUS] configuration option is turned on (which +** it is by default) and so the methods are automatically serialized. +** However, if [SQLITE_CONFIG_MEMSTATUS] is disabled, then the other +** methods must be threadsafe or else make their own arrangements for +** serialization. +** +** SQLite will never invoke xInit() more than once without an intervening +** call to xShutdown(). +*/ +typedef struct sqlite3_mem_methods sqlite3_mem_methods; +struct sqlite3_mem_methods { + void *(*xMalloc)(int); /* Memory allocation function */ + void (*xFree)(void*); /* Free a prior allocation */ + void *(*xRealloc)(void*,int); /* Resize an allocation */ + int (*xSize)(void*); /* Return the size of an allocation */ + int (*xRoundup)(int); /* Round up request size to allocation size */ + int (*xInit)(void*); /* Initialize the memory allocator */ + void (*xShutdown)(void*); /* Deinitialize the memory allocator */ + void *pAppData; /* Argument to xInit() and xShutdown() */ +}; + +/* +** CAPI3REF: Configuration Options +** KEYWORDS: {configuration option} +** +** These constants are the available integer configuration options that +** can be passed as the first argument to the [sqlite3_config()] interface. +** +** New configuration options may be added in future releases of SQLite. +** Existing configuration options might be discontinued. Applications +** should check the return code from [sqlite3_config()] to make sure that +** the call worked. The [sqlite3_config()] interface will return a +** non-zero [error code] if a discontinued or unsupported configuration option +** is invoked. +** +** <dl> +** [[SQLITE_CONFIG_SINGLETHREAD]] <dt>SQLITE_CONFIG_SINGLETHREAD</dt> +** <dd>There are no arguments to this option. ^This option sets the +** [threading mode] to Single-thread. In other words, it disables +** all mutexing and puts SQLite into a mode where it can only be used +** by a single thread. ^If SQLite is compiled with +** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then +** it is not possible to change the [threading mode] from its default +** value of Single-thread and so [sqlite3_config()] will return +** [SQLITE_ERROR] if called with the SQLITE_CONFIG_SINGLETHREAD +** configuration option.</dd> +** +** [[SQLITE_CONFIG_MULTITHREAD]] <dt>SQLITE_CONFIG_MULTITHREAD</dt> +** <dd>There are no arguments to this option. ^This option sets the +** [threading mode] to Multi-thread. In other words, it disables +** mutexing on [database connection] and [prepared statement] objects. +** The application is responsible for serializing access to +** [database connections] and [prepared statements]. But other mutexes +** are enabled so that SQLite will be safe to use in a multi-threaded +** environment as long as no two threads attempt to use the same +** [database connection] at the same time. ^If SQLite is compiled with +** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then +** it is not possible to set the Multi-thread [threading mode] and +** [sqlite3_config()] will return [SQLITE_ERROR] if called with the +** SQLITE_CONFIG_MULTITHREAD configuration option.</dd> +** +** [[SQLITE_CONFIG_SERIALIZED]] <dt>SQLITE_CONFIG_SERIALIZED</dt> +** <dd>There are no arguments to this option. ^This option sets the +** [threading mode] to Serialized. In other words, this option enables +** all mutexes including the recursive +** mutexes on [database connection] and [prepared statement] objects. +** In this mode (which is the default when SQLite is compiled with +** [SQLITE_THREADSAFE=1]) the SQLite library will itself serialize access +** to [database connections] and [prepared statements] so that the +** application is free to use the same [database connection] or the +** same [prepared statement] in different threads at the same time. +** ^If SQLite is compiled with +** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then +** it is not possible to set the Serialized [threading mode] and +** [sqlite3_config()] will return [SQLITE_ERROR] if called with the +** SQLITE_CONFIG_SERIALIZED configuration option.</dd> +** +** [[SQLITE_CONFIG_MALLOC]] <dt>SQLITE_CONFIG_MALLOC</dt> +** <dd> ^(The SQLITE_CONFIG_MALLOC option takes a single argument which is +** a pointer to an instance of the [sqlite3_mem_methods] structure. +** The argument specifies +** alternative low-level memory allocation routines to be used in place of +** the memory allocation routines built into SQLite.)^ ^SQLite makes +** its own private copy of the content of the [sqlite3_mem_methods] structure +** before the [sqlite3_config()] call returns.</dd> +** +** [[SQLITE_CONFIG_GETMALLOC]] <dt>SQLITE_CONFIG_GETMALLOC</dt> +** <dd> ^(The SQLITE_CONFIG_GETMALLOC option takes a single argument which +** is a pointer to an instance of the [sqlite3_mem_methods] structure. +** The [sqlite3_mem_methods] +** structure is filled with the currently defined memory allocation routines.)^ +** This option can be used to overload the default memory allocation +** routines with a wrapper that simulations memory allocation failure or +** tracks memory usage, for example. </dd> +** +** [[SQLITE_CONFIG_MEMSTATUS]] <dt>SQLITE_CONFIG_MEMSTATUS</dt> +** <dd> ^The SQLITE_CONFIG_MEMSTATUS option takes single argument of type int, +** interpreted as a boolean, which enables or disables the collection of +** memory allocation statistics. ^(When memory allocation statistics are +** disabled, the following SQLite interfaces become non-operational: +** <ul> +** <li> [sqlite3_memory_used()] +** <li> [sqlite3_memory_highwater()] +** <li> [sqlite3_soft_heap_limit64()] +** <li> [sqlite3_status64()] +** </ul>)^ +** ^Memory allocation statistics are enabled by default unless SQLite is +** compiled with [SQLITE_DEFAULT_MEMSTATUS]=0 in which case memory +** allocation statistics are disabled by default. +** </dd> +** +** [[SQLITE_CONFIG_SCRATCH]] <dt>SQLITE_CONFIG_SCRATCH</dt> +** <dd> ^The SQLITE_CONFIG_SCRATCH option specifies a static memory buffer +** that SQLite can use for scratch memory. ^(There are three arguments +** to SQLITE_CONFIG_SCRATCH: A pointer an 8-byte +** aligned memory buffer from which the scratch allocations will be +** drawn, the size of each scratch allocation (sz), +** and the maximum number of scratch allocations (N).)^ +** The first argument must be a pointer to an 8-byte aligned buffer +** of at least sz*N bytes of memory. +** ^SQLite will not use more than one scratch buffers per thread. +** ^SQLite will never request a scratch buffer that is more than 6 +** times the database page size. +** ^If SQLite needs needs additional +** scratch memory beyond what is provided by this configuration option, then +** [sqlite3_malloc()] will be used to obtain the memory needed.<p> +** ^When the application provides any amount of scratch memory using +** SQLITE_CONFIG_SCRATCH, SQLite avoids unnecessary large +** [sqlite3_malloc|heap allocations]. +** This can help [Robson proof|prevent memory allocation failures] due to heap +** fragmentation in low-memory embedded systems. +** </dd> +** +** [[SQLITE_CONFIG_PAGECACHE]] <dt>SQLITE_CONFIG_PAGECACHE</dt> +** <dd> ^The SQLITE_CONFIG_PAGECACHE option specifies a static memory buffer +** that SQLite can use for the database page cache with the default page +** cache implementation. +** This configuration should not be used if an application-define page +** cache implementation is loaded using the [SQLITE_CONFIG_PCACHE2] +** configuration option. +** ^There are three arguments to SQLITE_CONFIG_PAGECACHE: A pointer to +** 8-byte aligned +** memory, the size of each page buffer (sz), and the number of pages (N). +** The sz argument should be the size of the largest database page +** (a power of two between 512 and 65536) plus some extra bytes for each +** page header. ^The number of extra bytes needed by the page header +** can be determined using the [SQLITE_CONFIG_PCACHE_HDRSZ] option +** to [sqlite3_config()]. +** ^It is harmless, apart from the wasted memory, +** for the sz parameter to be larger than necessary. The first +** argument should pointer to an 8-byte aligned block of memory that +** is at least sz*N bytes of memory, otherwise subsequent behavior is +** undefined. +** ^SQLite will use the memory provided by the first argument to satisfy its +** memory needs for the first N pages that it adds to cache. ^If additional +** page cache memory is needed beyond what is provided by this option, then +** SQLite goes to [sqlite3_malloc()] for the additional storage space.</dd> +** +** [[SQLITE_CONFIG_HEAP]] <dt>SQLITE_CONFIG_HEAP</dt> +** <dd> ^The SQLITE_CONFIG_HEAP option specifies a static memory buffer +** that SQLite will use for all of its dynamic memory allocation needs +** beyond those provided for by [SQLITE_CONFIG_SCRATCH] and +** [SQLITE_CONFIG_PAGECACHE]. +** ^The SQLITE_CONFIG_HEAP option is only available if SQLite is compiled +** with either [SQLITE_ENABLE_MEMSYS3] or [SQLITE_ENABLE_MEMSYS5] and returns +** [SQLITE_ERROR] if invoked otherwise. +** ^There are three arguments to SQLITE_CONFIG_HEAP: +** An 8-byte aligned pointer to the memory, +** the number of bytes in the memory buffer, and the minimum allocation size. +** ^If the first pointer (the memory pointer) is NULL, then SQLite reverts +** to using its default memory allocator (the system malloc() implementation), +** undoing any prior invocation of [SQLITE_CONFIG_MALLOC]. ^If the +** memory pointer is not NULL then the alternative memory +** allocator is engaged to handle all of SQLites memory allocation needs. +** The first pointer (the memory pointer) must be aligned to an 8-byte +** boundary or subsequent behavior of SQLite will be undefined. +** The minimum allocation size is capped at 2**12. Reasonable values +** for the minimum allocation size are 2**5 through 2**8.</dd> +** +** [[SQLITE_CONFIG_MUTEX]] <dt>SQLITE_CONFIG_MUTEX</dt> +** <dd> ^(The SQLITE_CONFIG_MUTEX option takes a single argument which is a +** pointer to an instance of the [sqlite3_mutex_methods] structure. +** The argument specifies alternative low-level mutex routines to be used +** in place the mutex routines built into SQLite.)^ ^SQLite makes a copy of +** the content of the [sqlite3_mutex_methods] structure before the call to +** [sqlite3_config()] returns. ^If SQLite is compiled with +** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then +** the entire mutexing subsystem is omitted from the build and hence calls to +** [sqlite3_config()] with the SQLITE_CONFIG_MUTEX configuration option will +** return [SQLITE_ERROR].</dd> +** +** [[SQLITE_CONFIG_GETMUTEX]] <dt>SQLITE_CONFIG_GETMUTEX</dt> +** <dd> ^(The SQLITE_CONFIG_GETMUTEX option takes a single argument which +** is a pointer to an instance of the [sqlite3_mutex_methods] structure. The +** [sqlite3_mutex_methods] +** structure is filled with the currently defined mutex routines.)^ +** This option can be used to overload the default mutex allocation +** routines with a wrapper used to track mutex usage for performance +** profiling or testing, for example. ^If SQLite is compiled with +** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then +** the entire mutexing subsystem is omitted from the build and hence calls to +** [sqlite3_config()] with the SQLITE_CONFIG_GETMUTEX configuration option will +** return [SQLITE_ERROR].</dd> +** +** [[SQLITE_CONFIG_LOOKASIDE]] <dt>SQLITE_CONFIG_LOOKASIDE</dt> +** <dd> ^(The SQLITE_CONFIG_LOOKASIDE option takes two arguments that determine +** the default size of lookaside memory on each [database connection]. +** The first argument is the +** size of each lookaside buffer slot and the second is the number of +** slots allocated to each database connection.)^ ^(SQLITE_CONFIG_LOOKASIDE +** sets the <i>default</i> lookaside size. The [SQLITE_DBCONFIG_LOOKASIDE] +** option to [sqlite3_db_config()] can be used to change the lookaside +** configuration on individual connections.)^ </dd> +** +** [[SQLITE_CONFIG_PCACHE2]] <dt>SQLITE_CONFIG_PCACHE2</dt> +** <dd> ^(The SQLITE_CONFIG_PCACHE2 option takes a single argument which is +** a pointer to an [sqlite3_pcache_methods2] object. This object specifies +** the interface to a custom page cache implementation.)^ +** ^SQLite makes a copy of the [sqlite3_pcache_methods2] object.</dd> +** +** [[SQLITE_CONFIG_GETPCACHE2]] <dt>SQLITE_CONFIG_GETPCACHE2</dt> +** <dd> ^(The SQLITE_CONFIG_GETPCACHE2 option takes a single argument which +** is a pointer to an [sqlite3_pcache_methods2] object. SQLite copies of +** the current page cache implementation into that object.)^ </dd> +** +** [[SQLITE_CONFIG_LOG]] <dt>SQLITE_CONFIG_LOG</dt> +** <dd> The SQLITE_CONFIG_LOG option is used to configure the SQLite +** global [error log]. +** (^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a +** function with a call signature of void(*)(void*,int,const char*), +** and a pointer to void. ^If the function pointer is not NULL, it is +** invoked by [sqlite3_log()] to process each logging event. ^If the +** function pointer is NULL, the [sqlite3_log()] interface becomes a no-op. +** ^The void pointer that is the second argument to SQLITE_CONFIG_LOG is +** passed through as the first parameter to the application-defined logger +** function whenever that function is invoked. ^The second parameter to +** the logger function is a copy of the first parameter to the corresponding +** [sqlite3_log()] call and is intended to be a [result code] or an +** [extended result code]. ^The third parameter passed to the logger is +** log message after formatting via [sqlite3_snprintf()]. +** The SQLite logging interface is not reentrant; the logger function +** supplied by the application must not invoke any SQLite interface. +** In a multi-threaded application, the application-defined logger +** function must be threadsafe. </dd> +** +** [[SQLITE_CONFIG_URI]] <dt>SQLITE_CONFIG_URI +** <dd>^(The SQLITE_CONFIG_URI option takes a single argument of type int. +** If non-zero, then URI handling is globally enabled. If the parameter is zero, +** then URI handling is globally disabled.)^ ^If URI handling is globally +** enabled, all filenames passed to [sqlite3_open()], [sqlite3_open_v2()], +** [sqlite3_open16()] or +** specified as part of [ATTACH] commands are interpreted as URIs, regardless +** of whether or not the [SQLITE_OPEN_URI] flag is set when the database +** connection is opened. ^If it is globally disabled, filenames are +** only interpreted as URIs if the SQLITE_OPEN_URI flag is set when the +** database connection is opened. ^(By default, URI handling is globally +** disabled. The default value may be changed by compiling with the +** [SQLITE_USE_URI] symbol defined.)^ +** +** [[SQLITE_CONFIG_COVERING_INDEX_SCAN]] <dt>SQLITE_CONFIG_COVERING_INDEX_SCAN +** <dd>^The SQLITE_CONFIG_COVERING_INDEX_SCAN option takes a single integer +** argument which is interpreted as a boolean in order to enable or disable +** the use of covering indices for full table scans in the query optimizer. +** ^The default setting is determined +** by the [SQLITE_ALLOW_COVERING_INDEX_SCAN] compile-time option, or is "on" +** if that compile-time option is omitted. +** The ability to disable the use of covering indices for full table scans +** is because some incorrectly coded legacy applications might malfunction +** when the optimization is enabled. Providing the ability to +** disable the optimization allows the older, buggy application code to work +** without change even with newer versions of SQLite. +** +** [[SQLITE_CONFIG_PCACHE]] [[SQLITE_CONFIG_GETPCACHE]] +** <dt>SQLITE_CONFIG_PCACHE and SQLITE_CONFIG_GETPCACHE +** <dd> These options are obsolete and should not be used by new code. +** They are retained for backwards compatibility but are now no-ops. +** </dd> +** +** [[SQLITE_CONFIG_MMAP_SIZE]] +** <dt>SQLITE_CONFIG_MMAP_SIZE +** <dd>^SQLITE_CONFIG_MMAP_SIZE takes two 64-bit integer (sqlite3_int64) values +** that are the default mmap size limit (the default setting for +** [PRAGMA mmap_size]) and the maximum allowed mmap size limit. +** ^The default setting can be overridden by each database connection using +** either the [PRAGMA mmap_size] command, or by using the +** [SQLITE_FCNTL_MMAP_SIZE] file control. ^(The maximum allowed mmap size +** will be silently truncated if necessary so that it does not exceed the +** compile-time maximum mmap size set by the +** [SQLITE_MAX_MMAP_SIZE] compile-time option.)^ +** ^If either argument to this option is negative, then that argument is +** changed to its compile-time default. +** +** [[SQLITE_CONFIG_WIN32_HEAPSIZE]] +** <dt>SQLITE_CONFIG_WIN32_HEAPSIZE +** <dd>^The SQLITE_CONFIG_WIN32_HEAPSIZE option is only available if SQLite is +** compiled for Windows with the [SQLITE_WIN32_MALLOC] pre-processor macro +** defined. ^SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit unsigned integer value +** that specifies the maximum size of the created heap. +** +** [[SQLITE_CONFIG_PCACHE_HDRSZ]] +** <dt>SQLITE_CONFIG_PCACHE_HDRSZ +** <dd>^The SQLITE_CONFIG_PCACHE_HDRSZ option takes a single parameter which +** is a pointer to an integer and writes into that integer the number of extra +** bytes per page required for each page in [SQLITE_CONFIG_PAGECACHE]. +** The amount of extra space required can change depending on the compiler, +** target platform, and SQLite version. +** +** [[SQLITE_CONFIG_PMASZ]] +** <dt>SQLITE_CONFIG_PMASZ +** <dd>^The SQLITE_CONFIG_PMASZ option takes a single parameter which +** is an unsigned integer and sets the "Minimum PMA Size" for the multithreaded +** sorter to that integer. The default minimum PMA Size is set by the +** [SQLITE_SORTER_PMASZ] compile-time option. New threads are launched +** to help with sort operations when multithreaded sorting +** is enabled (using the [PRAGMA threads] command) and the amount of content +** to be sorted exceeds the page size times the minimum of the +** [PRAGMA cache_size] setting and this value. +** </dl> +*/ +#define SQLITE_CONFIG_SINGLETHREAD 1 /* nil */ +#define SQLITE_CONFIG_MULTITHREAD 2 /* nil */ +#define SQLITE_CONFIG_SERIALIZED 3 /* nil */ +#define SQLITE_CONFIG_MALLOC 4 /* sqlite3_mem_methods* */ +#define SQLITE_CONFIG_GETMALLOC 5 /* sqlite3_mem_methods* */ +#define SQLITE_CONFIG_SCRATCH 6 /* void*, int sz, int N */ +#define SQLITE_CONFIG_PAGECACHE 7 /* void*, int sz, int N */ +#define SQLITE_CONFIG_HEAP 8 /* void*, int nByte, int min */ +#define SQLITE_CONFIG_MEMSTATUS 9 /* boolean */ +#define SQLITE_CONFIG_MUTEX 10 /* sqlite3_mutex_methods* */ +#define SQLITE_CONFIG_GETMUTEX 11 /* sqlite3_mutex_methods* */ +/* previously SQLITE_CONFIG_CHUNKALLOC 12 which is now unused. */ +#define SQLITE_CONFIG_LOOKASIDE 13 /* int int */ +#define SQLITE_CONFIG_PCACHE 14 /* no-op */ +#define SQLITE_CONFIG_GETPCACHE 15 /* no-op */ +#define SQLITE_CONFIG_LOG 16 /* xFunc, void* */ +#define SQLITE_CONFIG_URI 17 /* int */ +#define SQLITE_CONFIG_PCACHE2 18 /* sqlite3_pcache_methods2* */ +#define SQLITE_CONFIG_GETPCACHE2 19 /* sqlite3_pcache_methods2* */ +#define SQLITE_CONFIG_COVERING_INDEX_SCAN 20 /* int */ +#define SQLITE_CONFIG_SQLLOG 21 /* xSqllog, void* */ +#define SQLITE_CONFIG_MMAP_SIZE 22 /* sqlite3_int64, sqlite3_int64 */ +#define SQLITE_CONFIG_WIN32_HEAPSIZE 23 /* int nByte */ +#define SQLITE_CONFIG_PCACHE_HDRSZ 24 /* int *psz */ +#define SQLITE_CONFIG_PMASZ 25 /* unsigned int szPma */ + +/* +** CAPI3REF: Formatted String Printing Functions +** +** These routines are work-alikes of the "printf()" family of functions +** from the standard C library. +** These routines understand most of the common K&R formatting options, +** plus some additional non-standard formats, detailed below. +** Note that some of the more obscure formatting options from recent +** C-library standards are omitted from this implementation. +** +** ^The sqlite3_mprintf() and sqlite3_vmprintf() routines write their +** results into memory obtained from [sqlite3_malloc()]. +** The strings returned by these two routines should be +** released by [sqlite3_free()]. ^Both routines return a +** NULL pointer if [sqlite3_malloc()] is unable to allocate enough +** memory to hold the resulting string. +** +** ^(The sqlite3_snprintf() routine is similar to "snprintf()" from +** the standard C library. The result is written into the +** buffer supplied as the second parameter whose size is given by +** the first parameter. Note that the order of the +** first two parameters is reversed from snprintf().)^ This is an +** historical accident that cannot be fixed without breaking +** backwards compatibility. ^(Note also that sqlite3_snprintf() +** returns a pointer to its buffer instead of the number of +** characters actually written into the buffer.)^ We admit that +** the number of characters written would be a more useful return +** value but we cannot change the implementation of sqlite3_snprintf() +** now without breaking compatibility. +** +** ^As long as the buffer size is greater than zero, sqlite3_snprintf() +** guarantees that the buffer is always zero-terminated. ^The first +** parameter "n" is the total size of the buffer, including space for +** the zero terminator. So the longest string that can be completely +** written will be n-1 characters. +** +** ^The sqlite3_vsnprintf() routine is a varargs version of sqlite3_snprintf(). +** +** These routines all implement some additional formatting +** options that are useful for constructing SQL statements. +** All of the usual printf() formatting options apply. In addition, there +** is are "%q", "%Q", "%w" and "%z" options. +** +** ^(The %q option works like %s in that it substitutes a nul-terminated +** string from the argument list. But %q also doubles every '\'' character. +** %q is designed for use inside a string literal.)^ By doubling each '\'' +** character it escapes that character and allows it to be inserted into +** the string. +** +** For example, assume the string variable zText contains text as follows: +** +** <blockquote><pre> +** char *zText = "It's a happy day!"; +** </pre></blockquote> +** +** One can use this text in an SQL statement as follows: +** +** <blockquote><pre> +** char *zSQL = sqlite3_mprintf("INSERT INTO table VALUES('%q')", zText); +** sqlite3_exec(db, zSQL, 0, 0, 0); +** sqlite3_free(zSQL); +** </pre></blockquote> +** +** Because the %q format string is used, the '\'' character in zText +** is escaped and the SQL generated is as follows: +** +** <blockquote><pre> +** INSERT INTO table1 VALUES('It''s a happy day!') +** </pre></blockquote> +** +** This is correct. Had we used %s instead of %q, the generated SQL +** would have looked like this: +** +** <blockquote><pre> +** INSERT INTO table1 VALUES('It's a happy day!'); +** </pre></blockquote> +** +** This second example is an SQL syntax error. As a general rule you should +** always use %q instead of %s when inserting text into a string literal. +** +** ^(The %Q option works like %q except it also adds single quotes around +** the outside of the total string. Additionally, if the parameter in the +** argument list is a NULL pointer, %Q substitutes the text "NULL" (without +** single quotes).)^ So, for example, one could say: +** +** <blockquote><pre> +** char *zSQL = sqlite3_mprintf("INSERT INTO table VALUES(%Q)", zText); +** sqlite3_exec(db, zSQL, 0, 0, 0); +** sqlite3_free(zSQL); +** </pre></blockquote> +** +** The code above will render a correct SQL statement in the zSQL +** variable even if the zText variable is a NULL pointer. +** +** ^(The "%w" formatting option is like "%q" except that it expects to +** be contained within double-quotes instead of single quotes, and it +** escapes the double-quote character instead of the single-quote +** character.)^ The "%w" formatting option is intended for safely inserting +** table and column names into a constructed SQL statement. +** +** ^(The "%z" formatting option works like "%s" but with the +** addition that after the string has been read and copied into +** the result, [sqlite3_free()] is called on the input string.)^ +*/ +SQLITE_PRIVATE char *sqlite3_mprintf(const char*,...); +SQLITE_PRIVATE char *sqlite3_vmprintf(const char*, va_list); +SQLITE_PRIVATE char *sqlite3_snprintf(int,char*,const char*, ...); +SQLITE_PRIVATE char *sqlite3_vsnprintf(int,char*,const char*, va_list); + +/* +** CAPI3REF: Memory Allocation Subsystem +** +** The SQLite core uses these three routines for all of its own +** internal memory allocation needs. "Core" in the previous sentence +** does not include operating-system specific VFS implementation. The +** Windows VFS uses native malloc() and free() for some operations. +** +** ^The sqlite3_malloc() routine returns a pointer to a block +** of memory at least N bytes in length, where N is the parameter. +** ^If sqlite3_malloc() is unable to obtain sufficient free +** memory, it returns a NULL pointer. ^If the parameter N to +** sqlite3_malloc() is zero or negative then sqlite3_malloc() returns +** a NULL pointer. +** +** ^The sqlite3_malloc64(N) routine works just like +** sqlite3_malloc(N) except that N is an unsigned 64-bit integer instead +** of a signed 32-bit integer. +** +** ^Calling sqlite3_free() with a pointer previously returned +** by sqlite3_malloc() or sqlite3_realloc() releases that memory so +** that it might be reused. ^The sqlite3_free() routine is +** a no-op if is called with a NULL pointer. Passing a NULL pointer +** to sqlite3_free() is harmless. After being freed, memory +** should neither be read nor written. Even reading previously freed +** memory might result in a segmentation fault or other severe error. +** Memory corruption, a segmentation fault, or other severe error +** might result if sqlite3_free() is called with a non-NULL pointer that +** was not obtained from sqlite3_malloc() or sqlite3_realloc(). +** +** ^The sqlite3_realloc(X,N) interface attempts to resize a +** prior memory allocation X to be at least N bytes. +** ^If the X parameter to sqlite3_realloc(X,N) +** is a NULL pointer then its behavior is identical to calling +** sqlite3_malloc(N). +** ^If the N parameter to sqlite3_realloc(X,N) is zero or +** negative then the behavior is exactly the same as calling +** sqlite3_free(X). +** ^sqlite3_realloc(X,N) returns a pointer to a memory allocation +** of at least N bytes in size or NULL if insufficient memory is available. +** ^If M is the size of the prior allocation, then min(N,M) bytes +** of the prior allocation are copied into the beginning of buffer returned +** by sqlite3_realloc(X,N) and the prior allocation is freed. +** ^If sqlite3_realloc(X,N) returns NULL and N is positive, then the +** prior allocation is not freed. +** +** ^The sqlite3_realloc64(X,N) interfaces works the same as +** sqlite3_realloc(X,N) except that N is a 64-bit unsigned integer instead +** of a 32-bit signed integer. +** +** ^The memory returned by sqlite3_malloc(), sqlite3_realloc(), +** sqlite3_malloc64(), and sqlite3_realloc64() +** is always aligned to at least an 8 byte boundary, or to a +** 4 byte boundary if the [SQLITE_4_BYTE_ALIGNED_MALLOC] compile-time +** option is used. +** +** In SQLite version 3.5.0 and 3.5.1, it was possible to define +** the SQLITE_OMIT_MEMORY_ALLOCATION which would cause the built-in +** implementation of these routines to be omitted. That capability +** is no longer provided. Only built-in memory allocators can be used. +** +** Prior to SQLite version 3.7.10, the Windows OS interface layer called +** the system malloc() and free() directly when converting +** filenames between the UTF-8 encoding used by SQLite +** and whatever filename encoding is used by the particular Windows +** installation. Memory allocation errors were detected, but +** they were reported back as [SQLITE_CANTOPEN] or +** [SQLITE_IOERR] rather than [SQLITE_NOMEM]. +** +** The pointer arguments to [sqlite3_free()] and [sqlite3_realloc()] +** must be either NULL or else pointers obtained from a prior +** invocation of [sqlite3_malloc()] or [sqlite3_realloc()] that have +** not yet been released. +** +** The application must not read or write any part of +** a block of memory after it has been released using +** [sqlite3_free()] or [sqlite3_realloc()]. +*/ +SQLITE_PRIVATE void *sqlite3_malloc(int); +SQLITE_PRIVATE void *sqlite3_malloc64(sqlite3_uint64); +SQLITE_PRIVATE void *sqlite3_realloc(void*, int); +SQLITE_PRIVATE void *sqlite3_realloc64(void*, sqlite3_uint64); +SQLITE_PRIVATE void sqlite3_free(void*); + +/* +** CAPI3REF: Run-Time Limit Categories +** KEYWORDS: {limit category} {*limit categories} +** +** These constants define various performance limits +** that can be lowered at run-time using [sqlite3_limit()]. +** The synopsis of the meanings of the various limits is shown below. +** Additional information is available at [limits | Limits in SQLite]. +** +** <dl> +** [[SQLITE_LIMIT_LENGTH]] ^(<dt>SQLITE_LIMIT_LENGTH</dt> +** <dd>The maximum size of any string or BLOB or table row, in bytes.<dd>)^ +** +** [[SQLITE_LIMIT_SQL_LENGTH]] ^(<dt>SQLITE_LIMIT_SQL_LENGTH</dt> +** <dd>The maximum length of an SQL statement, in bytes.</dd>)^ +** +** [[SQLITE_LIMIT_COLUMN]] ^(<dt>SQLITE_LIMIT_COLUMN</dt> +** <dd>The maximum number of columns in a table definition or in the +** result set of a [SELECT] or the maximum number of columns in an index +** or in an ORDER BY or GROUP BY clause.</dd>)^ +** +** [[SQLITE_LIMIT_EXPR_DEPTH]] ^(<dt>SQLITE_LIMIT_EXPR_DEPTH</dt> +** <dd>The maximum depth of the parse tree on any expression.</dd>)^ +** +** [[SQLITE_LIMIT_COMPOUND_SELECT]] ^(<dt>SQLITE_LIMIT_COMPOUND_SELECT</dt> +** <dd>The maximum number of terms in a compound SELECT statement.</dd>)^ +** +** [[SQLITE_LIMIT_VDBE_OP]] ^(<dt>SQLITE_LIMIT_VDBE_OP</dt> +** <dd>The maximum number of instructions in a virtual machine program +** used to implement an SQL statement. This limit is not currently +** enforced, though that might be added in some future release of +** SQLite.</dd>)^ +** +** [[SQLITE_LIMIT_FUNCTION_ARG]] ^(<dt>SQLITE_LIMIT_FUNCTION_ARG</dt> +** <dd>The maximum number of arguments on a function.</dd>)^ +** +** [[SQLITE_LIMIT_ATTACHED]] ^(<dt>SQLITE_LIMIT_ATTACHED</dt> +** <dd>The maximum number of [ATTACH | attached databases].)^</dd> +** +** [[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]] +** ^(<dt>SQLITE_LIMIT_LIKE_PATTERN_LENGTH</dt> +** <dd>The maximum length of the pattern argument to the [LIKE] or +** [GLOB] operators.</dd>)^ +** +** [[SQLITE_LIMIT_VARIABLE_NUMBER]] +** ^(<dt>SQLITE_LIMIT_VARIABLE_NUMBER</dt> +** <dd>The maximum index number of any [parameter] in an SQL statement.)^ +** +** [[SQLITE_LIMIT_TRIGGER_DEPTH]] ^(<dt>SQLITE_LIMIT_TRIGGER_DEPTH</dt> +** <dd>The maximum depth of recursion for triggers.</dd>)^ +** +** [[SQLITE_LIMIT_WORKER_THREADS]] ^(<dt>SQLITE_LIMIT_WORKER_THREADS</dt> +** <dd>The maximum number of auxiliary worker threads that a single +** [prepared statement] may start.</dd>)^ +** </dl> +*/ +#define SQLITE_LIMIT_LENGTH 0 +#define SQLITE_LIMIT_SQL_LENGTH 1 +#define SQLITE_LIMIT_COLUMN 2 +#define SQLITE_LIMIT_EXPR_DEPTH 3 +#define SQLITE_LIMIT_COMPOUND_SELECT 4 +#define SQLITE_LIMIT_VDBE_OP 5 +#define SQLITE_LIMIT_FUNCTION_ARG 6 +#define SQLITE_LIMIT_ATTACHED 7 +#define SQLITE_LIMIT_LIKE_PATTERN_LENGTH 8 +#define SQLITE_LIMIT_VARIABLE_NUMBER 9 +#define SQLITE_LIMIT_TRIGGER_DEPTH 10 +#define SQLITE_LIMIT_WORKER_THREADS 11 + +/* +** CAPI3REF: Name Of The Folder Holding Temporary Files +** +** ^(If this global variable is made to point to a string which is +** the name of a folder (a.k.a. directory), then all temporary files +** created by SQLite when using a built-in [sqlite3_vfs | VFS] +** will be placed in that directory.)^ ^If this variable +** is a NULL pointer, then SQLite performs a search for an appropriate +** temporary file directory. +** +** Applications are strongly discouraged from using this global variable. +** It is required to set a temporary folder on Windows Runtime (WinRT). +** But for all other platforms, it is highly recommended that applications +** neither read nor write this variable. This global variable is a relic +** that exists for backwards compatibility of legacy applications and should +** be avoided in new projects. +** +** It is not safe to read or modify this variable in more than one +** thread at a time. It is not safe to read or modify this variable +** if a [database connection] is being used at the same time in a separate +** thread. +** It is intended that this variable be set once +** as part of process initialization and before any SQLite interface +** routines have been called and that this variable remain unchanged +** thereafter. +** +** ^The [temp_store_directory pragma] may modify this variable and cause +** it to point to memory obtained from [sqlite3_malloc]. ^Furthermore, +** the [temp_store_directory pragma] always assumes that any string +** that this variable points to is held in memory obtained from +** [sqlite3_malloc] and the pragma may attempt to free that memory +** using [sqlite3_free]. +** Hence, if this variable is modified directly, either it should be +** made NULL or made to point to memory obtained from [sqlite3_malloc] +** or else the use of the [temp_store_directory pragma] should be avoided. +** Except when requested by the [temp_store_directory pragma], SQLite +** does not free the memory that sqlite3_temp_directory points to. If +** the application wants that memory to be freed, it must do +** so itself, taking care to only do so after all [database connection] +** objects have been destroyed. +** +** <b>Note to Windows Runtime users:</b> The temporary directory must be set +** prior to calling [sqlite3_open] or [sqlite3_open_v2]. Otherwise, various +** features that require the use of temporary files may fail. Here is an +** example of how to do this using C++ with the Windows Runtime: +** +** <blockquote><pre> +** LPCWSTR zPath = Windows::Storage::ApplicationData::Current-> +** TemporaryFolder->Path->Data(); +** char zPathBuf[MAX_PATH + 1]; +** memset(zPathBuf, 0, sizeof(zPathBuf)); +** WideCharToMultiByte(CP_UTF8, 0, zPath, -1, zPathBuf, sizeof(zPathBuf), +** NULL, NULL); +** sqlite3_temp_directory = sqlite3_mprintf("%s", zPathBuf); +** </pre></blockquote> +*/ +SQLITE_PRIVATE char *sqlite3_temp_directory; + +/* +** CAPI3REF: Virtual File System Objects +** +** A virtual filesystem (VFS) is an [sqlite3_vfs] object +** that SQLite uses to interact +** with the underlying operating system. Most SQLite builds come with a +** single default VFS that is appropriate for the host computer. +** New VFSes can be registered and existing VFSes can be unregistered. +** The following interfaces are provided. +** +** ^The sqlite3_vfs_find() interface returns a pointer to a VFS given its name. +** ^Names are case sensitive. +** ^Names are zero-terminated UTF-8 strings. +** ^If there is no match, a NULL pointer is returned. +** ^If zVfsName is NULL then the default VFS is returned. +** +** ^New VFSes are registered with sqlite3_vfs_register(). +** ^Each new VFS becomes the default VFS if the makeDflt flag is set. +** ^The same VFS can be registered multiple times without injury. +** ^To make an existing VFS into the default VFS, register it again +** with the makeDflt flag set. If two different VFSes with the +** same name are registered, the behavior is undefined. If a +** VFS is registered with a name that is NULL or an empty string, +** then the behavior is undefined. +** +*/ +SQLITE_PRIVATE sqlite3_vfs *sqlite3_vfs_find(const char *zVfsName); +SQLITE_PRIVATE int sqlite3_vfs_register(sqlite3_vfs*, int makeDflt); + +/* +** CAPI3REF: Mutexes +** +** The SQLite core uses these routines for thread +** synchronization. Though they are intended for internal +** use by SQLite, code that links against SQLite is +** permitted to use any of these routines. +** +** The SQLite source code contains multiple implementations +** of these mutex routines. An appropriate implementation +** is selected automatically at compile-time. The following +** implementations are available in the SQLite core: +** +** <ul> +** <li> SQLITE_MUTEX_PTHREADS +** <li> SQLITE_MUTEX_W32 +** <li> SQLITE_MUTEX_NOOP +** </ul> +** +** The SQLITE_MUTEX_NOOP implementation is a set of routines +** that does no real locking and is appropriate for use in +** a single-threaded application. The SQLITE_MUTEX_PTHREADS and +** SQLITE_MUTEX_W32 implementations are appropriate for use on Unix +** and Windows. +** +** If SQLite is compiled with the SQLITE_MUTEX_APPDEF preprocessor +** macro defined (with "-DSQLITE_MUTEX_APPDEF=1"), then no mutex +** implementation is included with the library. In this case the +** application must supply a custom mutex implementation using the +** [SQLITE_CONFIG_MUTEX] option of the sqlite3_config() function +** before calling sqlite3BtreeInitialize() or any other public sqlite3_ +** function that calls sqlite3BtreeInitialize(). +** +** ^The sqlite3_mutex_alloc() routine allocates a new +** mutex and returns a pointer to it. ^The sqlite3_mutex_alloc() +** routine returns NULL if it is unable to allocate the requested +** mutex. The argument to sqlite3_mutex_alloc() must one of these +** integer constants: +** +** <ul> +** <li> SQLITE_MUTEX_FAST +** <li> SQLITE_MUTEX_RECURSIVE +** <li> SQLITE_MUTEX_STATIC_MASTER +** <li> SQLITE_MUTEX_STATIC_MEM +** <li> SQLITE_MUTEX_STATIC_OPEN +** <li> SQLITE_MUTEX_STATIC_PRNG +** <li> SQLITE_MUTEX_STATIC_LRU +** <li> SQLITE_MUTEX_STATIC_PMEM +** <li> SQLITE_MUTEX_STATIC_APP1 +** <li> SQLITE_MUTEX_STATIC_APP2 +** <li> SQLITE_MUTEX_STATIC_APP3 +** <li> SQLITE_MUTEX_STATIC_VFS1 +** <li> SQLITE_MUTEX_STATIC_VFS2 +** <li> SQLITE_MUTEX_STATIC_VFS3 +** </ul> +** +** ^The first two constants (SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) +** cause sqlite3_mutex_alloc() to create +** a new mutex. ^The new mutex is recursive when SQLITE_MUTEX_RECURSIVE +** is used but not necessarily so when SQLITE_MUTEX_FAST is used. +** The mutex implementation does not need to make a distinction +** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does +** not want to. SQLite will only request a recursive mutex in +** cases where it really needs one. If a faster non-recursive mutex +** implementation is available on the host platform, the mutex subsystem +** might return such a mutex in response to SQLITE_MUTEX_FAST. +** +** ^The other allowed parameters to sqlite3_mutex_alloc() (anything other +** than SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) each return +** a pointer to a static preexisting mutex. ^Nine static mutexes are +** used by the current version of SQLite. Future versions of SQLite +** may add additional static mutexes. Static mutexes are for internal +** use by SQLite only. Applications that use SQLite mutexes should +** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or +** SQLITE_MUTEX_RECURSIVE. +** +** ^Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST +** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() +** returns a different mutex on every call. ^For the static +** mutex types, the same mutex is returned on every call that has +** the same type number. +** +** ^The sqlite3_mutex_free() routine deallocates a previously +** allocated dynamic mutex. Attempting to deallocate a static +** mutex results in undefined behavior. +** +** ^The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt +** to enter a mutex. ^If another thread is already within the mutex, +** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return +** SQLITE_BUSY. ^The sqlite3_mutex_try() interface returns [SQLITE_OK] +** upon successful entry. ^(Mutexes created using +** SQLITE_MUTEX_RECURSIVE can be entered multiple times by the same thread. +** In such cases, the +** mutex must be exited an equal number of times before another thread +** can enter.)^ If the same thread tries to enter any mutex other +** than an SQLITE_MUTEX_RECURSIVE more than once, the behavior is undefined. +** +** ^(Some systems (for example, Windows 95) do not support the operation +** implemented by sqlite3_mutex_try(). On those systems, sqlite3_mutex_try() +** will always return SQLITE_BUSY. The SQLite core only ever uses +** sqlite3_mutex_try() as an optimization so this is acceptable +** behavior.)^ +** +** ^The sqlite3_mutex_leave() routine exits a mutex that was +** previously entered by the same thread. The behavior +** is undefined if the mutex is not currently entered by the +** calling thread or is not currently allocated. +** +** ^If the argument to sqlite3_mutex_enter(), sqlite3_mutex_try(), or +** sqlite3_mutex_leave() is a NULL pointer, then all three routines +** behave as no-ops. +** +** See also: [sqlite3_mutex_held()] and [sqlite3_mutex_notheld()]. +*/ +SQLITE_PRIVATE sqlite3_mutex *sqlite3_mutex_alloc(int); +SQLITE_PRIVATE void sqlite3_mutex_free(sqlite3_mutex*); +SQLITE_PRIVATE void sqlite3_mutex_enter(sqlite3_mutex*); +SQLITE_PRIVATE int sqlite3_mutex_try(sqlite3_mutex*); +SQLITE_PRIVATE void sqlite3_mutex_leave(sqlite3_mutex*); + +/* +** CAPI3REF: Mutex Methods Object +** +** An instance of this structure defines the low-level routines +** used to allocate and use mutexes. +** +** Usually, the default mutex implementations provided by SQLite are +** sufficient, however the application has the option of substituting a custom +** implementation for specialized deployments or systems for which SQLite +** does not provide a suitable implementation. In this case, the application +** creates and populates an instance of this structure to pass +** to sqlite3_config() along with the [SQLITE_CONFIG_MUTEX] option. +** Additionally, an instance of this structure can be used as an +** output variable when querying the system for the current mutex +** implementation, using the [SQLITE_CONFIG_GETMUTEX] option. +** +** ^The xMutexInit method defined by this structure is invoked as +** part of system initialization by the sqlite3BtreeInitialize() function. +** ^The xMutexInit routine is called by SQLite exactly once for each +** effective call to [sqlite3BtreeInitialize()]. +** +** ^The xMutexEnd method defined by this structure is invoked as +** part of system shutdown by the sqlite3BtreeShutdown() function. The +** implementation of this method is expected to release all outstanding +** resources obtained by the mutex methods implementation, especially +** those obtained by the xMutexInit method. ^The xMutexEnd() +** interface is invoked exactly once for each call to [sqlite3BtreeShutdown()]. +** +** ^(The remaining seven methods defined by this structure (xMutexAlloc, +** xMutexFree, xMutexEnter, xMutexTry, xMutexLeave, xMutexHeld and +** xMutexNotheld) implement the following interfaces (respectively): +** +** <ul> +** <li> [sqlite3_mutex_alloc()] </li> +** <li> [sqlite3_mutex_free()] </li> +** <li> [sqlite3_mutex_enter()] </li> +** <li> [sqlite3_mutex_try()] </li> +** <li> [sqlite3_mutex_leave()] </li> +** <li> [sqlite3_mutex_held()] </li> +** <li> [sqlite3_mutex_notheld()] </li> +** </ul>)^ +** +** The only difference is that the public sqlite3_XXX functions enumerated +** above silently ignore any invocations that pass a NULL pointer instead +** of a valid mutex handle. The implementations of the methods defined +** by this structure are not required to handle this case, the results +** of passing a NULL pointer instead of a valid mutex handle are undefined +** (i.e. it is acceptable to provide an implementation that segfaults if +** it is passed a NULL pointer). +** +** The xMutexInit() method must be threadsafe. It must be harmless to +** invoke xMutexInit() multiple times within the same process and without +** intervening calls to xMutexEnd(). Second and subsequent calls to +** xMutexInit() must be no-ops. +** +** xMutexInit() must not use SQLite memory allocation ([sqlite3_malloc()] +** and its associates). Similarly, xMutexAlloc() must not use SQLite memory +** allocation for a static mutex. ^However xMutexAlloc() may use SQLite +** memory allocation for a fast or recursive mutex. +** +** ^SQLite will invoke the xMutexEnd() method when [sqlite3BtreeShutdown()] is +** called, but only if the prior call to xMutexInit returned SQLITE_OK. +** If xMutexInit fails in any way, it is expected to clean up after itself +** prior to returning. +*/ +typedef struct sqlite3_mutex_methods sqlite3_mutex_methods; +struct sqlite3_mutex_methods { + int (*xMutexInit)(void); + int (*xMutexEnd)(void); + sqlite3_mutex *(*xMutexAlloc)(int); + void (*xMutexFree)(sqlite3_mutex *); + void (*xMutexEnter)(sqlite3_mutex *); + int (*xMutexTry)(sqlite3_mutex *); + void (*xMutexLeave)(sqlite3_mutex *); + int (*xMutexHeld)(sqlite3_mutex *); + int (*xMutexNotheld)(sqlite3_mutex *); +}; + +/* +** CAPI3REF: Mutex Types +** +** The [sqlite3_mutex_alloc()] interface takes a single argument +** which is one of these integer constants. +** +** The set of static mutexes may change from one SQLite release to the +** next. Applications that override the built-in mutex logic must be +** prepared to accommodate additional static mutexes. +*/ +#define SQLITE_MUTEX_FAST 0 +#define SQLITE_MUTEX_RECURSIVE 1 +#define SQLITE_MUTEX_STATIC_MASTER 2 +#define SQLITE_MUTEX_STATIC_MEM 3 /* sqlite3_malloc() */ +#define SQLITE_MUTEX_STATIC_MEM2 4 /* NOT USED */ +#define SQLITE_MUTEX_STATIC_OPEN 4 /* sqlite3BtreeOpen() */ +#define SQLITE_MUTEX_STATIC_PRNG 5 /* sqlite3_random() */ +#define SQLITE_MUTEX_STATIC_LRU 6 /* lru page list */ +#define SQLITE_MUTEX_STATIC_LRU2 7 /* NOT USED */ +#define SQLITE_MUTEX_STATIC_PMEM 7 /* sqlite3PageMalloc() */ +#define SQLITE_MUTEX_STATIC_APP1 8 /* For use by application */ +#define SQLITE_MUTEX_STATIC_APP2 9 /* For use by application */ +#define SQLITE_MUTEX_STATIC_APP3 10 /* For use by application */ +#define SQLITE_MUTEX_STATIC_VFS1 11 /* For use by built-in VFS */ +#define SQLITE_MUTEX_STATIC_VFS2 12 /* For use by extension VFS */ +#define SQLITE_MUTEX_STATIC_VFS3 13 /* For use by application VFS */ + +/* +** CAPI3REF: Status Parameters +** KEYWORDS: {status parameters} +** +** These integer constants designate various run-time status parameters +** that can be returned by [sqlite3_status()]. +** +** <dl> +** [[SQLITE_STATUS_MEMORY_USED]] ^(<dt>SQLITE_STATUS_MEMORY_USED</dt> +** <dd>This parameter is the current amount of memory checked out +** using [sqlite3_malloc()], either directly or indirectly. The +** figure includes calls made to [sqlite3_malloc()] by the application +** and internal memory usage by the SQLite library. Scratch memory +** controlled by [SQLITE_CONFIG_SCRATCH] and auxiliary page-cache +** memory controlled by [SQLITE_CONFIG_PAGECACHE] is not included in +** this parameter. The amount returned is the sum of the allocation +** sizes as reported by the xSize method in [sqlite3_mem_methods].</dd>)^ +** +** [[SQLITE_STATUS_MALLOC_SIZE]] ^(<dt>SQLITE_STATUS_MALLOC_SIZE</dt> +** <dd>This parameter records the largest memory allocation request +** handed to [sqlite3_malloc()] or [sqlite3_realloc()] (or their +** internal equivalents). Only the value returned in the +** *pHighwater parameter to [sqlite3_status()] is of interest. +** The value written into the *pCurrent parameter is undefined.</dd>)^ +** +** [[SQLITE_STATUS_MALLOC_COUNT]] ^(<dt>SQLITE_STATUS_MALLOC_COUNT</dt> +** <dd>This parameter records the number of separate memory allocations +** currently checked out.</dd>)^ +** +** [[SQLITE_STATUS_PAGECACHE_USED]] ^(<dt>SQLITE_STATUS_PAGECACHE_USED</dt> +** <dd>This parameter returns the number of pages used out of the +** [pagecache memory allocator] that was configured using +** [SQLITE_CONFIG_PAGECACHE]. The +** value returned is in pages, not in bytes.</dd>)^ +** +** [[SQLITE_STATUS_PAGECACHE_OVERFLOW]] +** ^(<dt>SQLITE_STATUS_PAGECACHE_OVERFLOW</dt> +** <dd>This parameter returns the number of bytes of page cache +** allocation which could not be satisfied by the [SQLITE_CONFIG_PAGECACHE] +** buffer and where forced to overflow to [sqlite3_malloc()]. The +** returned value includes allocations that overflowed because they +** where too large (they were larger than the "sz" parameter to +** [SQLITE_CONFIG_PAGECACHE]) and allocations that overflowed because +** no space was left in the page cache.</dd>)^ +** +** [[SQLITE_STATUS_PAGECACHE_SIZE]] ^(<dt>SQLITE_STATUS_PAGECACHE_SIZE</dt> +** <dd>This parameter records the largest memory allocation request +** handed to [pagecache memory allocator]. Only the value returned in the +** *pHighwater parameter to [sqlite3_status()] is of interest. +** The value written into the *pCurrent parameter is undefined.</dd>)^ +** +** [[SQLITE_STATUS_SCRATCH_USED]] ^(<dt>SQLITE_STATUS_SCRATCH_USED</dt> +** <dd>This parameter returns the number of allocations used out of the +** [scratch memory allocator] configured using +** [SQLITE_CONFIG_SCRATCH]. The value returned is in allocations, not +** in bytes. Since a single thread may only have one scratch allocation +** outstanding at time, this parameter also reports the number of threads +** using scratch memory at the same time.</dd>)^ +** +** [[SQLITE_STATUS_SCRATCH_OVERFLOW]] ^(<dt>SQLITE_STATUS_SCRATCH_OVERFLOW</dt> +** <dd>This parameter returns the number of bytes of scratch memory +** allocation which could not be satisfied by the [SQLITE_CONFIG_SCRATCH] +** buffer and where forced to overflow to [sqlite3_malloc()]. The values +** returned include overflows because the requested allocation was too +** larger (that is, because the requested allocation was larger than the +** "sz" parameter to [SQLITE_CONFIG_SCRATCH]) and because no scratch buffer +** slots were available. +** </dd>)^ +** +** [[SQLITE_STATUS_SCRATCH_SIZE]] ^(<dt>SQLITE_STATUS_SCRATCH_SIZE</dt> +** <dd>This parameter records the largest memory allocation request +** handed to [scratch memory allocator]. Only the value returned in the +** *pHighwater parameter to [sqlite3_status()] is of interest. +** The value written into the *pCurrent parameter is undefined.</dd>)^ +** +** [[SQLITE_STATUS_PARSER_STACK]] ^(<dt>SQLITE_STATUS_PARSER_STACK</dt> +** <dd>This parameter records the deepest parser stack. It is only +** meaningful if SQLite is compiled with [YYTRACKMAXSTACKDEPTH].</dd>)^ +** </dl> +** +** New status parameters may be added from time to time. +*/ +#define SQLITE_STATUS_MEMORY_USED 0 +#define SQLITE_STATUS_PAGECACHE_USED 1 +#define SQLITE_STATUS_PAGECACHE_OVERFLOW 2 +#define SQLITE_STATUS_SCRATCH_USED 3 +#define SQLITE_STATUS_SCRATCH_OVERFLOW 4 +#define SQLITE_STATUS_MALLOC_SIZE 5 +#define SQLITE_STATUS_PARSER_STACK 6 +#define SQLITE_STATUS_PAGECACHE_SIZE 7 +#define SQLITE_STATUS_SCRATCH_SIZE 8 +#define SQLITE_STATUS_MALLOC_COUNT 9 + +/* +** CAPI3REF: Status Parameters for database connections +** KEYWORDS: {SQLITE_DBSTATUS options} +** +** These constants are the available integer "verbs" that can be passed as +** the second argument to the [sqlite3_db_status()] interface. +** +** New verbs may be added in future releases of SQLite. Existing verbs +** might be discontinued. Applications should check the return code from +** [sqlite3_db_status()] to make sure that the call worked. +** The [sqlite3_db_status()] interface will return a non-zero error code +** if a discontinued or unsupported verb is invoked. +** +** <dl> +** [[SQLITE_DBSTATUS_LOOKASIDE_USED]] ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_USED</dt> +** <dd>This parameter returns the number of lookaside memory slots currently +** checked out.</dd>)^ +** +** [[SQLITE_DBSTATUS_LOOKASIDE_HIT]] ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_HIT</dt> +** <dd>This parameter returns the number malloc attempts that were +** satisfied using lookaside memory. Only the high-water value is meaningful; +** the current value is always zero.)^ +** +** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE]] +** ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE</dt> +** <dd>This parameter returns the number malloc attempts that might have +** been satisfied using lookaside memory but failed due to the amount of +** memory requested being larger than the lookaside slot size. +** Only the high-water value is meaningful; +** the current value is always zero.)^ +** +** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL]] +** ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL</dt> +** <dd>This parameter returns the number malloc attempts that might have +** been satisfied using lookaside memory but failed due to all lookaside +** memory already being in use. +** Only the high-water value is meaningful; +** the current value is always zero.)^ +** +** [[SQLITE_DBSTATUS_CACHE_USED]] ^(<dt>SQLITE_DBSTATUS_CACHE_USED</dt> +** <dd>This parameter returns the approximate number of bytes of heap +** memory used by all pager caches associated with the database connection.)^ +** ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_USED is always 0. +** +** [[SQLITE_DBSTATUS_SCHEMA_USED]] ^(<dt>SQLITE_DBSTATUS_SCHEMA_USED</dt> +** <dd>This parameter returns the approximate number of bytes of heap +** memory used to store the schema for all databases associated +** with the connection - main, temp, and any [ATTACH]-ed databases.)^ +** ^The full amount of memory used by the schemas is reported, even if the +** schema memory is shared with other database connections due to +** [shared cache mode] being enabled. +** ^The highwater mark associated with SQLITE_DBSTATUS_SCHEMA_USED is always 0. +** +** [[SQLITE_DBSTATUS_STMT_USED]] ^(<dt>SQLITE_DBSTATUS_STMT_USED</dt> +** <dd>This parameter returns the approximate number of bytes of heap +** and lookaside memory used by all prepared statements associated with +** the database connection.)^ +** ^The highwater mark associated with SQLITE_DBSTATUS_STMT_USED is always 0. +** </dd> +** +** [[SQLITE_DBSTATUS_CACHE_HIT]] ^(<dt>SQLITE_DBSTATUS_CACHE_HIT</dt> +** <dd>This parameter returns the number of pager cache hits that have +** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_HIT +** is always 0. +** </dd> +** +** [[SQLITE_DBSTATUS_CACHE_MISS]] ^(<dt>SQLITE_DBSTATUS_CACHE_MISS</dt> +** <dd>This parameter returns the number of pager cache misses that have +** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_MISS +** is always 0. +** </dd> +** +** [[SQLITE_DBSTATUS_CACHE_WRITE]] ^(<dt>SQLITE_DBSTATUS_CACHE_WRITE</dt> +** <dd>This parameter returns the number of dirty cache entries that have +** been written to disk. Specifically, the number of pages written to the +** wal file in wal mode databases, or the number of pages written to the +** database file in rollback mode databases. Any pages written as part of +** transaction rollback or database recovery operations are not included. +** If an IO or other error occurs while writing a page to disk, the effect +** on subsequent SQLITE_DBSTATUS_CACHE_WRITE requests is undefined.)^ ^The +** highwater mark associated with SQLITE_DBSTATUS_CACHE_WRITE is always 0. +** </dd> +** +** [[SQLITE_DBSTATUS_DEFERRED_FKS]] ^(<dt>SQLITE_DBSTATUS_DEFERRED_FKS</dt> +** <dd>This parameter returns zero for the current value if and only if +** all foreign key constraints (deferred or immediate) have been +** resolved.)^ ^The highwater mark is always 0. +** </dd> +** </dl> +*/ +#define SQLITE_DBSTATUS_LOOKASIDE_USED 0 +#define SQLITE_DBSTATUS_CACHE_USED 1 +#define SQLITE_DBSTATUS_SCHEMA_USED 2 +#define SQLITE_DBSTATUS_STMT_USED 3 +#define SQLITE_DBSTATUS_LOOKASIDE_HIT 4 +#define SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE 5 +#define SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL 6 +#define SQLITE_DBSTATUS_CACHE_HIT 7 +#define SQLITE_DBSTATUS_CACHE_MISS 8 +#define SQLITE_DBSTATUS_CACHE_WRITE 9 +#define SQLITE_DBSTATUS_DEFERRED_FKS 10 +#define SQLITE_DBSTATUS_MAX 10 /* Largest defined DBSTATUS */ + +/* +** CAPI3REF: Checkpoint Mode Values +** KEYWORDS: {checkpoint mode} +** +** These constants define all valid values for the "checkpoint mode" passed +** as the third parameter to the [sqlite3_wal_checkpoint_v2()] interface. +** See the [sqlite3_wal_checkpoint_v2()] documentation for details on the +** meaning of each of these checkpoint modes. +*/ +#define SQLITE_CHECKPOINT_PASSIVE 0 /* Do as much as possible w/o blocking */ +#define SQLITE_CHECKPOINT_FULL 1 /* Wait for writers, then checkpoint */ +#define SQLITE_CHECKPOINT_RESTART 2 /* Like FULL but wait for for readers */ +#define SQLITE_CHECKPOINT_TRUNCATE 3 /* Like RESTART but also truncate WAL */ + +/* +** CAPI3REF: Custom Page Cache Object +** +** The sqlite3_pcache type is opaque. It is implemented by +** the pluggable module. The SQLite core has no knowledge of +** its size or internal structure and never deals with the +** sqlite3_pcache object except by holding and passing pointers +** to the object. +** +** See [sqlite3_pcache_methods2] for additional information. +*/ +typedef struct sqlite3_pcache sqlite3_pcache; + +/* +** CAPI3REF: Custom Page Cache Object +** +** The sqlite3_pcache_page object represents a single page in the +** page cache. The page cache will allocate instances of this +** object. Various methods of the page cache use pointers to instances +** of this object as parameters or as their return value. +** +** See [sqlite3_pcache_methods2] for additional information. +*/ +typedef struct sqlite3_pcache_page sqlite3_pcache_page; +struct sqlite3_pcache_page { + void *pBuf; /* The content of the page */ + void *pExtra; /* Extra information associated with the page */ +}; + +/* +** CAPI3REF: Application Defined Page Cache. +** KEYWORDS: {page cache} +** +** ^(The [sqlite3_config]([SQLITE_CONFIG_PCACHE2], ...) interface can +** register an alternative page cache implementation by passing in an +** instance of the sqlite3_pcache_methods2 structure.)^ +** In many applications, most of the heap memory allocated by +** SQLite is used for the page cache. +** By implementing a +** custom page cache using this API, an application can better control +** the amount of memory consumed by SQLite, the way in which +** that memory is allocated and released, and the policies used to +** determine exactly which parts of a database file are cached and for +** how long. +** +** The alternative page cache mechanism is an +** extreme measure that is only needed by the most demanding applications. +** The built-in page cache is recommended for most uses. +** +** ^(The contents of the sqlite3_pcache_methods2 structure are copied to an +** internal buffer by SQLite within the call to [sqlite3_config]. Hence +** the application may discard the parameter after the call to +** [sqlite3_config()] returns.)^ +** +** [[the xInit() page cache method]] +** ^(The xInit() method is called once for each effective +** call to [sqlite3BtreeInitialize()])^ +** (usually only once during the lifetime of the process). ^(The xInit() +** method is passed a copy of the sqlite3_pcache_methods2.pArg value.)^ +** The intent of the xInit() method is to set up global data structures +** required by the custom page cache implementation. +** ^(If the xInit() method is NULL, then the +** built-in default page cache is used instead of the application defined +** page cache.)^ +** +** [[the xShutdown() page cache method]] +** ^The xShutdown() method is called by [sqlite3BtreeShutdown()]. +** It can be used to clean up +** any outstanding resources before process shutdown, if required. +** ^The xShutdown() method may be NULL. +** +** ^SQLite automatically serializes calls to the xInit method, +** so the xInit method need not be threadsafe. ^The +** xShutdown method is only called from [sqlite3BtreeShutdown()] so it does +** not need to be threadsafe either. All other methods must be threadsafe +** in multithreaded applications. +** +** ^SQLite will never invoke xInit() more than once without an intervening +** call to xShutdown(). +** +** [[the xCreate() page cache methods]] +** ^SQLite invokes the xCreate() method to construct a new cache instance. +** SQLite will typically create one cache instance for each open database file, +** though this is not guaranteed. ^The +** first parameter, szPage, is the size in bytes of the pages that must +** be allocated by the cache. ^szPage will always a power of two. ^The +** second parameter szExtra is a number of bytes of extra storage +** associated with each page cache entry. ^The szExtra parameter will +** a number less than 250. SQLite will use the +** extra szExtra bytes on each page to store metadata about the underlying +** database page on disk. The value passed into szExtra depends +** on the SQLite version, the target platform, and how SQLite was compiled. +** ^The third argument to xCreate(), bPurgeable, is true if the cache being +** created will be used to cache database pages of a file stored on disk, or +** false if it is used for an in-memory database. The cache implementation +** does not have to do anything special based with the value of bPurgeable; +** it is purely advisory. ^On a cache where bPurgeable is false, SQLite will +** never invoke xUnpin() except to deliberately delete a page. +** ^In other words, calls to xUnpin() on a cache with bPurgeable set to +** false will always have the "discard" flag set to true. +** ^Hence, a cache created with bPurgeable false will +** never contain any unpinned pages. +** +** [[the xCachesize() page cache method]] +** ^(The xCachesize() method may be called at any time by SQLite to set the +** suggested maximum cache-size (number of pages stored by) the cache +** instance passed as the first argument. This is the value configured using +** the SQLite "[PRAGMA cache_size]" command.)^ As with the bPurgeable +** parameter, the implementation is not required to do anything with this +** value; it is advisory only. +** +** [[the xPagecount() page cache methods]] +** The xPagecount() method must return the number of pages currently +** stored in the cache, both pinned and unpinned. +** +** [[the xFetch() page cache methods]] +** The xFetch() method locates a page in the cache and returns a pointer to +** an sqlite3_pcache_page object associated with that page, or a NULL pointer. +** The pBuf element of the returned sqlite3_pcache_page object will be a +** pointer to a buffer of szPage bytes used to store the content of a +** single database page. The pExtra element of sqlite3_pcache_page will be +** a pointer to the szExtra bytes of extra storage that SQLite has requested +** for each entry in the page cache. +** +** The page to be fetched is determined by the key. ^The minimum key value +** is 1. After it has been retrieved using xFetch, the page is considered +** to be "pinned". +** +** If the requested page is already in the page cache, then the page cache +** implementation must return a pointer to the page buffer with its content +** intact. If the requested page is not already in the cache, then the +** cache implementation should use the value of the createFlag +** parameter to help it determined what action to take: +** +** <table border=1 width=85% align=center> +** <tr><th> createFlag <th> Behavior when page is not already in cache +** <tr><td> 0 <td> Do not allocate a new page. Return NULL. +** <tr><td> 1 <td> Allocate a new page if it easy and convenient to do so. +** Otherwise return NULL. +** <tr><td> 2 <td> Make every effort to allocate a new page. Only return +** NULL if allocating a new page is effectively impossible. +** </table> +** +** ^(SQLite will normally invoke xFetch() with a createFlag of 0 or 1. SQLite +** will only use a createFlag of 2 after a prior call with a createFlag of 1 +** failed.)^ In between the to xFetch() calls, SQLite may +** attempt to unpin one or more cache pages by spilling the content of +** pinned pages to disk and synching the operating system disk cache. +** +** [[the xUnpin() page cache method]] +** ^xUnpin() is called by SQLite with a pointer to a currently pinned page +** as its second argument. If the third parameter, discard, is non-zero, +** then the page must be evicted from the cache. +** ^If the discard parameter is +** zero, then the page may be discarded or retained at the discretion of +** page cache implementation. ^The page cache implementation +** may choose to evict unpinned pages at any time. +** +** The cache must not perform any reference counting. A single +** call to xUnpin() unpins the page regardless of the number of prior calls +** to xFetch(). +** +** [[the xRekey() page cache methods]] +** The xRekey() method is used to change the key value associated with the +** page passed as the second argument. If the cache +** previously contains an entry associated with newKey, it must be +** discarded. ^Any prior cache entry associated with newKey is guaranteed not +** to be pinned. +** +** When SQLite calls the xTruncate() method, the cache must discard all +** existing cache entries with page numbers (keys) greater than or equal +** to the value of the iLimit parameter passed to xTruncate(). If any +** of these pages are pinned, they are implicitly unpinned, meaning that +** they can be safely discarded. +** +** [[the xDestroy() page cache method]] +** ^The xDestroy() method is used to delete a cache allocated by xCreate(). +** All resources associated with the specified cache should be freed. ^After +** calling the xDestroy() method, SQLite considers the [sqlite3_pcache*] +** handle invalid, and will not use it with any other sqlite3_pcache_methods2 +** functions. +** +** [[the xShrink() page cache method]] +** ^SQLite invokes the xShrink() method when it wants the page cache to +** free up as much of heap memory as possible. The page cache implementation +** is not obligated to free any memory, but well-behaved implementations should +** do their best. +*/ +typedef struct sqlite3_pcache_methods2 sqlite3_pcache_methods2; +struct sqlite3_pcache_methods2 { + int iVersion; + void *pArg; + int (*xInit)(void*); + void (*xShutdown)(void*); + sqlite3_pcache *(*xCreate)(int szPage, int szExtra, int bPurgeable); + void (*xCachesize)(sqlite3_pcache*, int nCachesize); + int (*xPagecount)(sqlite3_pcache*); + sqlite3_pcache_page *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag); + void (*xUnpin)(sqlite3_pcache*, sqlite3_pcache_page*, int discard); + void (*xRekey)(sqlite3_pcache*, sqlite3_pcache_page*, + unsigned oldKey, unsigned newKey); + void (*xTruncate)(sqlite3_pcache*, unsigned iLimit); + void (*xDestroy)(sqlite3_pcache*); + void (*xShrink)(sqlite3_pcache*); +}; + +/* +** CAPI3REF: Online Backup Object +** +** The sqlite3_backup object records state information about an ongoing +** online backup operation. ^The sqlite3_backup object is created by +** a call to [sqlite3_backup_init()] and is destroyed by a call to +** [sqlite3_backup_finish()]. +** +** See Also: [Using the SQLite Online Backup API] +*/ +typedef struct sqlite3_backup sqlite3_backup; + +/* +** CAPI3REF: Error Logging Interface +** +** ^The [sqlite3_log()] interface writes a message into the [error log] +** established by the [SQLITE_CONFIG_LOG] option to [sqlite3_config()]. +** ^If logging is enabled, the zFormat string and subsequent arguments are +** used with [sqlite3_snprintf()] to generate the final output string. +** +** The sqlite3_log() interface is intended for use by extensions such as +** virtual tables, collating functions, and SQL functions. While there is +** nothing to prevent an application from calling sqlite3_log(), doing so +** is considered bad form. +** +** The zFormat string must not be NULL. +** +** To avoid deadlocks and other threading problems, the sqlite3_log() routine +** will not use dynamically allocated memory. The log message is stored in +** a fixed-length buffer on the stack. If the log message is longer than +** a few hundred characters, it will be truncated to the length of the +** buffer. +*/ +SQLITE_PRIVATE void sqlite3_log(int iErrCode, const char *zFormat, ...); + +/* +** CAPI3REF: Text Encodings +** +** These constant define integer codes that represent the various +** text encodings supported by SQLite. +*/ +#define SQLITE_UTF8 1 /* IMP: R-37514-35566 */ +#define SQLITE_UTF16LE 2 /* IMP: R-03371-37637 */ +#define SQLITE_UTF16BE 3 /* IMP: R-51971-34154 */ +#define SQLITE_UTF16 4 /* Use native byte order */ +#define SQLITE_ANY 5 /* Deprecated */ +#define SQLITE_UTF16_ALIGNED 8 /* sqlite3_create_collation only */ + +/* +** Include the configuration header output by 'configure' if we're using the +** autoconf-based build +*/ +#ifdef _HAVE_SQLITE_CONFIG_H +#include "config.h" +#endif + +/************** Include sqliteLimit.h in the middle of sqliteInt.h ***********/ +/************** Begin file sqliteLimit.h *************************************/ +/* +** 2007 May 7 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file defines various limits of what SQLite can process. +*/ + +/* +** The maximum length of a TEXT or BLOB in bytes. This also +** limits the size of a row in a table or index. +** +** The hard limit is the ability of a 32-bit signed integer +** to count the size: 2^31-1 or 2147483647. +*/ +#ifndef SQLITE_MAX_LENGTH +# define SQLITE_MAX_LENGTH 1000000000 +#endif + +/* +** This is the maximum number of +** +** * Columns in a table +** * Columns in an index +** * Columns in a view +** * Terms in the SET clause of an UPDATE statement +** * Terms in the result set of a SELECT statement +** * Terms in the GROUP BY or ORDER BY clauses of a SELECT statement. +** * Terms in the VALUES clause of an INSERT statement +** +** The hard upper limit here is 32676. Most database people will +** tell you that in a well-normalized database, you usually should +** not have more than a dozen or so columns in any table. And if +** that is the case, there is no point in having more than a few +** dozen values in any of the other situations described above. +*/ +#ifndef SQLITE_MAX_COLUMN +# define SQLITE_MAX_COLUMN 2000 +#endif + +/* +** The maximum length of a single SQL statement in bytes. +** +** It used to be the case that setting this value to zero would +** turn the limit off. That is no longer true. It is not possible +** to turn this limit off. +*/ +#ifndef SQLITE_MAX_SQL_LENGTH +# define SQLITE_MAX_SQL_LENGTH 1000000000 +#endif + +/* +** The maximum depth of an expression tree. This is limited to +** some extent by SQLITE_MAX_SQL_LENGTH. But sometime you might +** want to place more severe limits on the complexity of an +** expression. +** +** A value of 0 used to mean that the limit was not enforced. +** But that is no longer true. The limit is now strictly enforced +** at all times. +*/ +#ifndef SQLITE_MAX_EXPR_DEPTH +# define SQLITE_MAX_EXPR_DEPTH 1000 +#endif + +/* +** The maximum number of terms in a compound SELECT statement. +** The code generator for compound SELECT statements does one +** level of recursion for each term. A stack overflow can result +** if the number of terms is too large. In practice, most SQL +** never has more than 3 or 4 terms. Use a value of 0 to disable +** any limit on the number of terms in a compount SELECT. +*/ +#ifndef SQLITE_MAX_COMPOUND_SELECT +# define SQLITE_MAX_COMPOUND_SELECT 500 +#endif + +/* +** The maximum number of opcodes in a VDBE program. +** Not currently enforced. +*/ +#ifndef SQLITE_MAX_VDBE_OP +# define SQLITE_MAX_VDBE_OP 25000 +#endif + +/* +** The maximum number of arguments to an SQL function. +*/ +#ifndef SQLITE_MAX_FUNCTION_ARG +# define SQLITE_MAX_FUNCTION_ARG 127 +#endif + +/* +** The suggested maximum number of in-memory pages to use for +** the main database table and for temporary tables. +** +** IMPLEMENTATION-OF: R-31093-59126 The default suggested cache size +** is 2000 pages. +** IMPLEMENTATION-OF: R-48205-43578 The default suggested cache size can be +** altered using the SQLITE_DEFAULT_CACHE_SIZE compile-time options. +*/ +#ifndef SQLITE_DEFAULT_CACHE_SIZE +# define SQLITE_DEFAULT_CACHE_SIZE 2000 +#endif + +/* +** The default number of frames to accumulate in the log file before +** checkpointing the database in WAL mode. +*/ +#ifndef SQLITE_DEFAULT_WAL_AUTOCHECKPOINT +# define SQLITE_DEFAULT_WAL_AUTOCHECKPOINT 1000 +#endif + +/* +** The maximum number of attached databases. This must be between 0 +** and 62. The upper bound on 62 is because a 64-bit integer bitmap +** is used internally to track attached databases. +*/ +#ifndef SQLITE_MAX_ATTACHED +# define SQLITE_MAX_ATTACHED 10 +#endif + + +/* +** The maximum value of a ?nnn wildcard that the parser will accept. +*/ +#ifndef SQLITE_MAX_VARIABLE_NUMBER +# define SQLITE_MAX_VARIABLE_NUMBER 999 +#endif + +/* Maximum page size. The upper bound on this value is 65536. This a limit +** imposed by the use of 16-bit offsets within each page. +** +** Earlier versions of SQLite allowed the user to change this value at +** compile time. This is no longer permitted, on the grounds that it creates +** a library that is technically incompatible with an SQLite library +** compiled with a different limit. If a process operating on a database +** with a page-size of 65536 bytes crashes, then an instance of SQLite +** compiled with the default page-size limit will not be able to rollback +** the aborted transaction. This could lead to database corruption. +*/ +#ifdef SQLITE_MAX_PAGE_SIZE +# undef SQLITE_MAX_PAGE_SIZE +#endif +#define SQLITE_MAX_PAGE_SIZE 65536 + + +/* +** The default size of a database page. +*/ +#ifndef SQLITE_DEFAULT_PAGE_SIZE +# define SQLITE_DEFAULT_PAGE_SIZE 1024 +#endif +#if SQLITE_DEFAULT_PAGE_SIZE>SQLITE_MAX_PAGE_SIZE +# undef SQLITE_DEFAULT_PAGE_SIZE +# define SQLITE_DEFAULT_PAGE_SIZE SQLITE_MAX_PAGE_SIZE +#endif + +/* +** Ordinarily, if no value is explicitly provided, SQLite creates databases +** with page size SQLITE_DEFAULT_PAGE_SIZE. However, based on certain +** device characteristics (sector-size and atomic write() support), +** SQLite may choose a larger value. This constant is the maximum value +** SQLite will choose on its own. +*/ +#ifndef SQLITE_MAX_DEFAULT_PAGE_SIZE +# define SQLITE_MAX_DEFAULT_PAGE_SIZE 8192 +#endif +#if SQLITE_MAX_DEFAULT_PAGE_SIZE>SQLITE_MAX_PAGE_SIZE +# undef SQLITE_MAX_DEFAULT_PAGE_SIZE +# define SQLITE_MAX_DEFAULT_PAGE_SIZE SQLITE_MAX_PAGE_SIZE +#endif + + +/* +** Maximum number of pages in one database file. +** +** This is really just the default value for the max_page_count pragma. +** This value can be lowered (or raised) at run-time using that the +** max_page_count macro. +*/ +#ifndef SQLITE_MAX_PAGE_COUNT +# define SQLITE_MAX_PAGE_COUNT 1073741823 +#endif + +/* +** Maximum length (in bytes) of the pattern in a LIKE or GLOB +** operator. +*/ +#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH +# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 +#endif + +/* +** Maximum depth of recursion for triggers. +** +** A value of 1 means that a trigger program will not be able to itself +** fire any triggers. A value of 0 means that no trigger programs at all +** may be executed. +*/ +#ifndef SQLITE_MAX_TRIGGER_DEPTH +# define SQLITE_MAX_TRIGGER_DEPTH 1000 +#endif + +/************** End of sqliteLimit.h *****************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/* Disable nuisance warnings on Borland compilers */ +#if defined(__BORLANDC__) +#pragma warn -rch /* unreachable code */ +#pragma warn -ccc /* Condition is always true or false */ +#pragma warn -aus /* Assigned value is never used */ +#pragma warn -csu /* Comparing signed and unsigned */ +#pragma warn -spa /* Suspicious pointer arithmetic */ +#endif + +/* +** Include standard header files as necessary +*/ +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif + +/* +** The following macros are used to cast pointers to integers and +** integers to pointers. The way you do this varies from one compiler +** to the next, so we have developed the following set of #if statements +** to generate appropriate macros for a wide range of compilers. +** +** The correct "ANSI" way to do this is to use the intptr_t type. +** Unfortunately, that typedef is not available on all compilers, or +** if it is available, it requires an #include of specific headers +** that vary from one machine to the next. +** +** Ticket #3860: The llvm-gcc-4.2 compiler from Apple chokes on +** the ((void*)&((char*)0)[X]) construct. But MSVC chokes on ((void*)(X)). +** So we have to define the macros in different ways depending on the +** compiler. +*/ +#if defined(__PTRDIFF_TYPE__) /* This case should work for GCC */ +# define SQLITE_INT_TO_PTR(X) ((void*)(__PTRDIFF_TYPE__)(X)) +# define SQLITE_PTR_TO_INT(X) ((int)(__PTRDIFF_TYPE__)(X)) +#elif !defined(__GNUC__) /* Works for compilers other than LLVM */ +# define SQLITE_INT_TO_PTR(X) ((void*)&((char*)0)[X]) +# define SQLITE_PTR_TO_INT(X) ((int)(((char*)X)-(char*)0)) +#elif defined(HAVE_STDINT_H) /* Use this case if we have ANSI headers */ +# define SQLITE_INT_TO_PTR(X) ((void*)(intptr_t)(X)) +# define SQLITE_PTR_TO_INT(X) ((int)(intptr_t)(X)) +#else /* Generates a warning - but it always works */ +# define SQLITE_INT_TO_PTR(X) ((void*)(X)) +# define SQLITE_PTR_TO_INT(X) ((int)(X)) +#endif + +/* +** A macro to hint to the compiler that a function should not be +** inlined. +*/ +#if defined(__GNUC__) +# define SQLITE_NOINLINE __attribute__((noinline)) +#elif defined(_MSC_VER) && _MSC_VER>=1310 +# define SQLITE_NOINLINE __declspec(noinline) +#else +# define SQLITE_NOINLINE +#endif + +/* +** Make sure that the compiler intrinsics we desire are enabled when +** compiling with an appropriate version of MSVC unless prevented by +** the SQLITE_DISABLE_INTRINSIC define. +*/ +#if !defined(SQLITE_DISABLE_INTRINSIC) +# if defined(_MSC_VER) && _MSC_VER>=1300 +# if !defined(_WIN32_WCE) +# include <intrin.h> +# pragma intrinsic(_byteswap_ushort) +# pragma intrinsic(_byteswap_ulong) +# pragma intrinsic(_ReadWriteBarrier) +# else +# include <cmnintrin.h> +# endif +# endif +#endif + +/* +** Powersafe overwrite is on by default. But can be turned off using +** the -DSQLITE_POWERSAFE_OVERWRITE=0 command-line option. +*/ +#ifndef SQLITE_POWERSAFE_OVERWRITE +# define SQLITE_POWERSAFE_OVERWRITE 1 +#endif + +/* +** EVIDENCE-OF: R-25715-37072 Memory allocation statistics are enabled by +** default unless SQLite is compiled with SQLITE_DEFAULT_MEMSTATUS=0 in +** which case memory allocation statistics are disabled by default. +*/ +#if !defined(SQLITE_DEFAULT_MEMSTATUS) +# define SQLITE_DEFAULT_MEMSTATUS 1 +#endif + +/* +** Exactly one of the following macros must be defined in order to +** specify which memory allocation subsystem to use. +** +** SQLITE_SYSTEM_MALLOC // Use normal system malloc() +** SQLITE_WIN32_MALLOC // Use Win32 native heap API +** SQLITE_ZERO_MALLOC // Use a stub allocator that always fails +** SQLITE_MEMDEBUG // Debugging version of system malloc() +** +** On Windows, if the SQLITE_WIN32_MALLOC_VALIDATE macro is defined and the +** assert() macro is enabled, each call into the Win32 native heap subsystem +** will cause HeapValidate to be called. If heap validation should fail, an +** assertion will be triggered. +** +** If none of the above are defined, then set SQLITE_SYSTEM_MALLOC as +** the default. +*/ +#if defined(SQLITE_SYSTEM_MALLOC) \ + + defined(SQLITE_WIN32_MALLOC) \ + + defined(SQLITE_ZERO_MALLOC) \ + + defined(SQLITE_MEMDEBUG)>1 +# error "Two or more of the following compile-time configuration options\ + are defined but at most one is allowed:\ + SQLITE_SYSTEM_MALLOC, SQLITE_WIN32_MALLOC, SQLITE_MEMDEBUG,\ + SQLITE_ZERO_MALLOC" +#endif +#if defined(SQLITE_SYSTEM_MALLOC) \ + + defined(SQLITE_WIN32_MALLOC) \ + + defined(SQLITE_ZERO_MALLOC) \ + + defined(SQLITE_MEMDEBUG)==0 +# define SQLITE_SYSTEM_MALLOC 1 +#endif + +/* +** We need to define _XOPEN_SOURCE as follows in order to enable +** recursive mutexes on most Unix systems and fchmod() on OpenBSD. +** But _XOPEN_SOURCE define causes problems for Mac OS X, so omit +** it. +*/ +#if !defined(_XOPEN_SOURCE) && !defined(__DARWIN__) && !defined(__APPLE__) +# define _XOPEN_SOURCE 600 +#endif + +/* +** NDEBUG and SQLITE_DEBUG are opposites. It should always be true that +** defined(NDEBUG)==!defined(SQLITE_DEBUG). If this is not currently true, +** make it true by defining or undefining NDEBUG. +** +** Setting NDEBUG makes the code smaller and faster by disabling the +** assert() statements in the code. So we want the default action +** to be for NDEBUG to be set and NDEBUG to be undefined only if SQLITE_DEBUG +** is set. Thus NDEBUG becomes an opt-in rather than an opt-out +** feature. +*/ +#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) +# define NDEBUG 1 +#endif +#if defined(NDEBUG) && defined(SQLITE_DEBUG) +# undef NDEBUG +#endif + +/* +** The testcase() macro is used to aid in coverage testing. When +** doing coverage testing, the condition inside the argument to +** testcase() must be evaluated both true and false in order to +** get full branch coverage. The testcase() macro is inserted +** to help ensure adequate test coverage in places where simple +** condition/decision coverage is inadequate. For example, testcase() +** can be used to make sure boundary values are tested. For +** bitmask tests, testcase() can be used to make sure each bit +** is significant and used at least once. On switch statements +** where multiple cases go to the same block of code, testcase() +** can insure that all cases are evaluated. +** +*/ +#ifdef SQLITE_COVERAGE_TEST +SQLITE_PRIVATE void sqlite3Coverage(int); +# define testcase(X) if( X ){ sqlite3Coverage(__LINE__); } +#else +# define testcase(X) +#endif + +/* +** The TESTONLY macro is used to enclose variable declarations or +** other bits of code that are needed to support the arguments +** within testcase() and assert() macros. +*/ +#if !defined(NDEBUG) || defined(SQLITE_COVERAGE_TEST) +# define TESTONLY(X) X +#else +# define TESTONLY(X) +#endif + +/* +** Sometimes we need a small amount of code such as a variable initialization +** to setup for a later assert() statement. We do not want this code to +** appear when assert() is disabled. The following macro is therefore +** used to contain that setup code. The "VVA" acronym stands for +** "Verification, Validation, and Accreditation". In other words, the +** code within VVA_ONLY() will only run during verification processes. +*/ +#ifndef NDEBUG +# define VVA_ONLY(X) X +#else +# define VVA_ONLY(X) +#endif + +/* +** The ALWAYS and NEVER macros surround boolean expressions which +** are intended to always be true or false, respectively. Such +** expressions could be omitted from the code completely. But they +** are included in a few cases in order to enhance the resilience +** of SQLite to unexpected behavior - to make the code "self-healing" +** or "ductile" rather than being "brittle" and crashing at the first +** hint of unplanned behavior. +** +** In other words, ALWAYS and NEVER are added for defensive code. +** +** When doing coverage testing ALWAYS and NEVER are hard-coded to +** be true and false so that the unreachable code they specify will +** not be counted as untested code. +*/ +#if defined(SQLITE_COVERAGE_TEST) +# define ALWAYS(X) (1) +# define NEVER(X) (0) +#elif !defined(NDEBUG) +# define ALWAYS(X) ((X)?1:(assert(0),0)) +# define NEVER(X) ((X)?(assert(0),1):0) +#else +# define ALWAYS(X) (X) +# define NEVER(X) (X) +#endif + +/* +** Declarations used for tracing the operating system interfaces. +*/ +#if defined(SQLITE_FORCE_OS_TRACE) || defined(SQLITE_TEST) || \ + (defined(SQLITE_DEBUG) && SQLITE_OS_WIN) + extern int sqlite3OSTrace; +# define OSTRACE(X) if( sqlite3OSTrace ) sqlite3DebugPrintf X +# define SQLITE_HAVE_OS_TRACE +#else +# define OSTRACE(X) +# undef SQLITE_HAVE_OS_TRACE +#endif + +/* +** Is the sqlite3ErrName() function needed in the build? Currently, +** it is needed by "mutex_w32.c" (when debugging), "os_win.c" (when +** OSTRACE is enabled), and by several "test*.c" files (which are +** compiled using SQLITE_TEST). +*/ +#if defined(SQLITE_HAVE_OS_TRACE) || defined(SQLITE_TEST) || \ + (defined(SQLITE_DEBUG) && SQLITE_OS_WIN) +# define SQLITE_NEED_ERR_NAME +#else +# undef SQLITE_NEED_ERR_NAME +#endif + +/* +** The macro unlikely() is a hint that surrounds a boolean +** expression that is usually false. Macro likely() surrounds +** a boolean expression that is usually true. These hints could, +** in theory, be used by the compiler to generate better code, but +** currently they are just comments for human readers. +*/ +#define likely(X) (X) +#define unlikely(X) (X) + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stddef.h> + +/* +** The "file format" number is an integer that is incremented whenever +** the VDBE-level file format changes. The following macros define the +** the default file format for new databases and the maximum file format +** that the library can read. +*/ +#define SQLITE_MAX_FILE_FORMAT 4 +#ifndef SQLITE_DEFAULT_FILE_FORMAT +# define SQLITE_DEFAULT_FILE_FORMAT 4 +#endif + +/* +** The default initial allocation for the pagecache when using separate +** pagecaches for each database connection. A positive number is the +** number of pages. A negative number N translations means that a buffer +** of -1024*N bytes is allocated and used for as many pages as it will hold. +*/ +#ifndef SQLITE_DEFAULT_PCACHE_INITSZ +# define SQLITE_DEFAULT_PCACHE_INITSZ 100 +#endif + +/* +** GCC does not define the offsetof() macro so we'll have to do it +** ourselves. +*/ +#ifndef offsetof +#define offsetof(STRUCTURE,FIELD) ((int)((char*)&((STRUCTURE*)0)->FIELD)) +#endif + +/* +** Macros to compute minimum and maximum of two numbers. +*/ +#define MIN(A,B) ((A)<(B)?(A):(B)) +#define MAX(A,B) ((A)>(B)?(A):(B)) + +/* +** Swap two objects of type TYPE. +*/ +#define SWAP(TYPE,A,B) {TYPE t=A; A=B; B=t;} + +/* +** SQLITE_MAX_U32 is a u64 constant that is the maximum u64 value +** that can be stored in a u32 without loss of data. The value +** is 0x00000000ffffffff. But because of quirks of some compilers, we +** have to specify the value in the less intuitive manner shown: +*/ +#define SQLITE_MAX_U32 ((((u64)1)<<32)-1) + +/* +** Set the SQLITE_PTRSIZE macro to the number of bytes in a pointer +*/ +#ifndef SQLITE_PTRSIZE +# if defined(__SIZEOF_POINTER__) +# define SQLITE_PTRSIZE __SIZEOF_POINTER__ +# elif defined(i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(_M_ARM) || defined(__arm__) || defined(__x86) +# define SQLITE_PTRSIZE 4 +# else +# define SQLITE_PTRSIZE 8 +# endif +#endif + +/* +** Constants for the largest and smallest possible 64-bit signed integers. +** These macros are designed to work correctly on both 32-bit and 64-bit +** compilers. +*/ +#define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) +#define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) + +/* +** Round up a number to the next larger multiple of 8. This is used +** to force 8-byte alignment on 64-bit architectures. +*/ +#define ROUND8(x) (((x)+7)&~7) + +/* +** Round down to the nearest multiple of 8 +*/ +#define ROUNDDOWN8(x) ((x)&~7) + +/* +** Disable MMAP on platforms where it is known to not work +*/ +#if defined(__OpenBSD__) || defined(__QNXNTO__) +# undef SQLITE_MAX_MMAP_SIZE +# define SQLITE_MAX_MMAP_SIZE 0 +#endif + +/* +** Default maximum size of memory used by memory-mapped I/O in the VFS +*/ +#ifdef __APPLE__ +# include <TargetConditionals.h> +# if TARGET_OS_IPHONE +# undef SQLITE_MAX_MMAP_SIZE +# define SQLITE_MAX_MMAP_SIZE 0 +# endif +#endif +#ifndef SQLITE_MAX_MMAP_SIZE +# if defined(__linux__) \ + || defined(_WIN32) \ + || (defined(__APPLE__) && defined(__MACH__)) \ + || defined(__sun) \ + || defined(__FreeBSD__) \ + || defined(__DragonFly__) +# define SQLITE_MAX_MMAP_SIZE 0x7fff0000 /* 2147418112 */ +# else +# define SQLITE_MAX_MMAP_SIZE 0 +# endif +# define SQLITE_MAX_MMAP_SIZE_xc 1 /* exclude from ctime.c */ +#endif + +/* +** The default MMAP_SIZE is zero on all platforms. Or, even if a larger +** default MMAP_SIZE is specified at compile-time, make sure that it does +** not exceed the maximum mmap size. +*/ +#ifndef SQLITE_DEFAULT_MMAP_SIZE +# define SQLITE_DEFAULT_MMAP_SIZE 0 +# define SQLITE_DEFAULT_MMAP_SIZE_xc 1 /* Exclude from ctime.c */ +#endif +#if SQLITE_DEFAULT_MMAP_SIZE>SQLITE_MAX_MMAP_SIZE +# undef SQLITE_DEFAULT_MMAP_SIZE +# define SQLITE_DEFAULT_MMAP_SIZE SQLITE_MAX_MMAP_SIZE +#endif + +/* +** An instance of the following structure is used to store the busy-handler +** callback for a given sqlite handle. +** +** The sqlite.busyHandler member of the sqlite struct contains the busy +** callback for the database handle. Each pager opened via the sqlite +** handle is passed a pointer to sqlite.busyHandler. The busy-handler +** callback is currently invoked only from within pager.c. +*/ +typedef struct BusyHandler BusyHandler; +struct BusyHandler { + int (*xFunc)(void *,int); /* The busy callback */ + void *pArg; /* First arg to busy callback */ + int nBusy; /* Incremented with each busy call */ +}; + +/* +** The root-page of the master database table. +*/ +#define MASTER_ROOT 1 + +/* +** A convenience macro that returns the number of elements in +** an array. +*/ +#define ArraySize(X) ((int)(sizeof(X)/sizeof(X[0]))) + +/* +** When SQLITE_OMIT_WSD is defined, it means that the target platform does +** not support Writable Static Data (WSD) such as global and static variables. +** All variables must either be on the stack or dynamically allocated from +** the heap. When WSD is unsupported, the variable declarations scattered +** throughout the SQLite code must become constants instead. The SQLITE_WSD +** macro is used for this purpose. And instead of referencing the variable +** directly, we use its constant as a key to lookup the run-time allocated +** buffer that holds real variable. The constant is also the initializer +** for the run-time allocated buffer. +** +** In the usual case where WSD is supported, the SQLITE_WSD and GLOBAL +** macros become no-ops and have zero performance impact. +*/ +#ifdef SQLITE_OMIT_WSD + #define SQLITE_WSD const + #define GLOBAL(t,v) (*(t*)sqlite3_wsd_find((void*)&(v), sizeof(v))) + #define sqlite3GlobalConfig GLOBAL(struct Sqlite3Config, sqlite3Config) +SQLITE_PRIVATE int sqlite3_wsd_init(int N, int J); +SQLITE_PRIVATE void *sqlite3_wsd_find(void *K, int L); +#else + #define SQLITE_WSD + #define GLOBAL(t,v) v + #define sqlite3GlobalConfig sqlite3Config +#endif + +/* +** The following macros are used to suppress compiler warnings and to +** make it clear to human readers when a function parameter is deliberately +** left unused within the body of a function. This usually happens when +** a function is called via a function pointer. For example the +** implementation of an SQL aggregate step callback may not use the +** parameter indicating the number of arguments passed to the aggregate, +** if it knows that this is enforced elsewhere. +** +** When a function parameter is not used at all within the body of a function, +** it is generally named "NotUsed" or "NotUsed2" to make things even clearer. +** However, these macros may also be used to suppress warnings related to +** parameters that may or may not be used depending on compilation options. +** For example those parameters only used in assert() statements. In these +** cases the parameters are named as per the usual conventions. +*/ +#define UNUSED_PARAMETER(x) (void)(x) +#define UNUSED_PARAMETER2(x,y) UNUSED_PARAMETER(x),UNUSED_PARAMETER(y) + +typedef struct Bitvec Bitvec; +typedef struct CollSeq CollSeq; +typedef struct PrintfArguments PrintfArguments; +typedef struct Savepoint Savepoint; +typedef struct StrAccum StrAccum; + +/************** Include pager.h in the middle of sqliteInt.h *****************/ +/************** Begin file pager.h *******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the interface that the sqlite page cache +** subsystem. The page cache subsystem reads and writes a file a page +** at a time and provides a journal for rollback. +*/ + +#ifndef _PAGER_H_ +#define _PAGER_H_ + +/* +** Default maximum size for persistent journal files. A negative +** value means no limit. This value may be overridden using the +** sqlite3PagerJournalSizeLimit() API. See also "PRAGMA journal_size_limit". +*/ +#ifndef SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT + #define SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT -1 +#endif + +/* +** The type used to represent a page number. The first page in a file +** is called page 1. 0 is used to represent "not a page". +*/ +typedef u32 Pgno; + +/* +** Each open file is managed by a separate instance of the "Pager" structure. +*/ +typedef struct Pager Pager; + +/* +** Handle type for pages. +*/ +typedef struct PgHdr DbPage; + +/* +** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is +** reserved for working around a windows/posix incompatibility). It is +** used in the journal to signify that the remainder of the journal file +** is devoted to storing a master journal name - there are no more pages to +** roll back. See comments for function writeMasterJournal() in pager.c +** for details. +*/ +#define PAGER_MJ_PGNO(x) ((Pgno)((PENDING_BYTE/((x)->pageSize))+1)) + +/* +** Allowed values for the flags parameter to sqlite3PagerOpen(). +** +** NOTE: These values must match the corresponding BTREE_ values in btree.h. +*/ +#define PAGER_OMIT_JOURNAL 0x0001 /* Do not use a rollback journal */ +#define PAGER_MEMORY 0x0002 /* In-memory database */ + +/* +** Valid values for the second argument to sqlite3PagerLockingMode(). +*/ +#define PAGER_LOCKINGMODE_QUERY -1 +#define PAGER_LOCKINGMODE_NORMAL 0 +#define PAGER_LOCKINGMODE_EXCLUSIVE 1 + +/* +** Numeric constants that encode the journalmode. +*/ +#define PAGER_JOURNALMODE_QUERY (-1) /* Query the value of journalmode */ +#define PAGER_JOURNALMODE_DELETE 0 /* Commit by deleting journal file */ +#define PAGER_JOURNALMODE_PERSIST 1 /* Commit by zeroing journal header */ +#define PAGER_JOURNALMODE_OFF 2 /* Journal omitted. */ +#define PAGER_JOURNALMODE_TRUNCATE 3 /* Commit by truncating journal */ +#define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ +#define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ + +/* +** Flags that make up the mask passed to sqlite3PagerAcquire(). +*/ +#define PAGER_GET_NOCONTENT 0x01 /* Do not load data from disk */ +#define PAGER_GET_READONLY 0x02 /* Read-only page is acceptable */ + +/* +** Flags for sqlite3PagerSetFlags() +*/ +#define PAGER_SYNCHRONOUS_OFF 0x01 /* PRAGMA synchronous=OFF */ +#define PAGER_SYNCHRONOUS_NORMAL 0x02 /* PRAGMA synchronous=NORMAL */ +#define PAGER_SYNCHRONOUS_FULL 0x03 /* PRAGMA synchronous=FULL */ +#define PAGER_SYNCHRONOUS_MASK 0x03 /* Mask for three values above */ +#define PAGER_FULLFSYNC 0x04 /* PRAGMA fullfsync=ON */ +#define PAGER_CKPT_FULLFSYNC 0x08 /* PRAGMA checkpoint_fullfsync=ON */ +#define PAGER_CACHESPILL 0x10 /* PRAGMA cache_spill=ON */ +#define PAGER_FLAGS_MASK 0x1c /* All above except SYNCHRONOUS */ + +/* +** The remainder of this file contains the declarations of the functions +** that make up the Pager sub-system API. See source code comments for +** a detailed description of each routine. +*/ + +/* Open and close a Pager connection. */ +SQLITE_PRIVATE int sqlite3PagerOpen( + sqlite3_vfs*, + Pager **ppPager, + const char*, + int, + int, + int, + void(*)(DbPage*) +); +SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager); +SQLITE_PRIVATE int sqlite3PagerReadFileheader(Pager*, int, unsigned char*); + +/* Functions used to configure a Pager object. */ +SQLITE_PRIVATE void sqlite3PagerSetBusyhandler(Pager*, int(*)(void *), void *); +SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager*, u32*, int); +#ifdef SQLITE_HAS_CODEC +SQLITE_PRIVATE void sqlite3PagerAlignReserve(Pager*,Pager*); +#endif +SQLITE_PRIVATE int sqlite3PagerMaxPageCount(Pager*, int); +SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager*, int); +SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *, sqlite3_int64); +SQLITE_PRIVATE void sqlite3PagerSetFlags(Pager*,unsigned); +#if 0 +SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *, int); +SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *, int); +SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager*); +SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager*); +SQLITE_PRIVATE i64 sqlite3PagerJournalSizeLimit(Pager *, i64); +#endif + +/* Functions used to obtain and release page references. */ +SQLITE_PRIVATE int sqlite3PagerAcquire(Pager *pPager, Pgno pgno, DbPage **ppPage, int clrFlag); +#define sqlite3PagerGet(A,B,C) sqlite3PagerAcquire(A,B,C,0) +SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno); +SQLITE_PRIVATE void sqlite3PagerRef(DbPage*); +SQLITE_PRIVATE void sqlite3PagerUnref(DbPage*); +SQLITE_PRIVATE void sqlite3PagerUnrefNotNull(DbPage*); + +/* Operations on page references. */ +SQLITE_PRIVATE int sqlite3PagerWrite(DbPage*); +SQLITE_PRIVATE void sqlite3PagerDontWrite(DbPage*); +SQLITE_PRIVATE int sqlite3PagerMovepage(Pager*,DbPage*,Pgno,int); +SQLITE_PRIVATE int sqlite3PagerPageRefcount(DbPage*); +SQLITE_PRIVATE void *sqlite3PagerGetData(DbPage *); +SQLITE_PRIVATE void *sqlite3PagerGetExtra(DbPage *); + +/* Functions used to manage pager transactions and savepoints. */ +SQLITE_PRIVATE void sqlite3PagerPagecount(Pager*, int*); +SQLITE_PRIVATE int sqlite3PagerBegin(Pager*, int exFlag, int); +SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne(Pager*,const char *zMaster, int); +SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager*); +SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager, const char *zMaster); +SQLITE_PRIVATE int sqlite3PagerCommitPhaseTwo(Pager*); +SQLITE_PRIVATE int sqlite3PagerRollback(Pager*); +SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int n); +SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint); +SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager); + +#ifndef SQLITE_OMIT_WAL +SQLITE_PRIVATE int sqlite3PagerCheckpoint(Pager *pPager, int, int*, int*); +SQLITE_PRIVATE int sqlite3PagerWalSupported(Pager *pPager); +#if 0 +SQLITE_PRIVATE int sqlite3PagerWalCallback(Pager *pPager); +#endif +SQLITE_PRIVATE int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen); +#if 0 +SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager); +#endif +#endif + +#ifdef SQLITE_ENABLE_ZIPVFS +SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager); +#endif + +/* Functions used to query pager state and configuration. */ +SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager*); +SQLITE_PRIVATE u32 sqlite3PagerDataVersion(Pager*); +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3PagerRefcount(Pager*); +#endif +#if 0 +SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager*); +#endif +SQLITE_PRIVATE const char *sqlite3PagerFilename(Pager*, int); +SQLITE_PRIVATE const sqlite3_vfs *sqlite3PagerVfs(Pager*); +#ifdef SQLITE_DIRECT_OVERFLOW_READ +SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager*); +#endif +SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager*); +SQLITE_PRIVATE int sqlite3PagerNosync(Pager*); +SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager*); +#if 0 +SQLITE_PRIVATE void sqlite3PagerCacheStat(Pager *, int, int, int *); +SQLITE_PRIVATE void sqlite3PagerClearCache(Pager *); +#endif +SQLITE_PRIVATE int sqlite3SectorSize(sqlite3_file *); + +/* Functions used to truncate the database file. */ +SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager*,Pgno); + +SQLITE_PRIVATE void sqlite3PagerRekey(DbPage*, Pgno, u16); + +#if defined(SQLITE_HAS_CODEC) && !defined(SQLITE_OMIT_WAL) +SQLITE_PRIVATE void *sqlite3PagerCodec(DbPage *); +#endif + +/* Functions to support testing and debugging. */ +#if !defined(NDEBUG) || defined(SQLITE_TEST) +SQLITE_PRIVATE Pgno sqlite3PagerPagenumber(DbPage*); +SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage*); +#endif +#ifdef SQLITE_TEST +SQLITE_PRIVATE int *sqlite3PagerStats(Pager*); +SQLITE_PRIVATE void sqlite3PagerRefdump(Pager*); + void disable_simulated_io_errors(void); + void enable_simulated_io_errors(void); +#else +# define disable_simulated_io_errors() +# define enable_simulated_io_errors() +#endif + +#endif /* _PAGER_H_ */ + +/************** End of pager.h ***********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ +/************** Include pcache.h in the middle of sqliteInt.h ****************/ +/************** Begin file pcache.h ******************************************/ +/* +** 2008 August 05 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the interface that the sqlite page cache +** subsystem. +*/ + +#ifndef _PCACHE_H_ + +typedef struct PgHdr PgHdr; +typedef struct PCache PCache; + +/* +** Every page in the cache is controlled by an instance of the following +** structure. +*/ +struct PgHdr { + sqlite3_pcache_page *pPage; /* Pcache object page handle */ + void *pData; /* Page data */ + void *pExtra; /* Extra content */ + PgHdr *pDirty; /* Transient list of dirty pages */ + Pager *pPager; /* The pager this page is part of */ + Pgno pgno; /* Page number for this page */ +#ifdef SQLITE_CHECK_PAGES + u32 pageHash; /* Hash of page content */ +#endif + u16 flags; /* PGHDR flags defined below */ + + /********************************************************************** + ** Elements above are public. All that follows is private to pcache.c + ** and should not be accessed by other modules. + */ + i16 nRef; /* Number of users of this page */ + PCache *pCache; /* Cache that owns this page */ + + PgHdr *pDirtyNext; /* Next element in list of dirty pages */ + PgHdr *pDirtyPrev; /* Previous element in list of dirty pages */ +}; + +/* Bit values for PgHdr.flags */ +#define PGHDR_CLEAN 0x001 /* Page not on the PCache.pDirty list */ +#define PGHDR_DIRTY 0x002 /* Page is on the PCache.pDirty list */ +#define PGHDR_WRITEABLE 0x004 /* Journaled and ready to modify */ +#define PGHDR_NEED_SYNC 0x008 /* Fsync the rollback journal before + ** writing this page to the database */ +#define PGHDR_NEED_READ 0x010 /* Content is unread */ +#define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */ +#define PGHDR_MMAP 0x040 /* This is an mmap page object */ + +/* Initialize and shutdown the page cache subsystem */ +SQLITE_PRIVATE int sqlite3PcacheInitialize(void); +SQLITE_PRIVATE void sqlite3PcacheShutdown(void); + +/* Page cache buffer management: +** These routines implement SQLITE_CONFIG_PAGECACHE. +*/ +SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *, int sz, int n); + +/* Create a new pager cache. +** Under memory stress, invoke xStress to try to make pages clean. +** Only clean and unpinned pages can be reclaimed. +*/ +SQLITE_PRIVATE int sqlite3PcacheOpen( + int szPage, /* Size of every page */ + int szExtra, /* Extra space associated with each page */ + int bPurgeable, /* True if pages are on backing store */ + int (*xStress)(void*, PgHdr*), /* Call to try to make pages clean */ + void *pStress, /* Argument to xStress */ + PCache *pToInit /* Preallocated space for the PCache */ +); + +/* Modify the page-size after the cache has been created. */ +SQLITE_PRIVATE int sqlite3PcacheSetPageSize(PCache *, int); + +/* Return the size in bytes of a PCache object. Used to preallocate +** storage space. +*/ +SQLITE_PRIVATE int sqlite3PcacheSize(void); + +/* One release per successful fetch. Page is pinned until released. +** Reference counted. +*/ +SQLITE_PRIVATE sqlite3_pcache_page *sqlite3PcacheFetch(PCache*, Pgno, int createFlag); +SQLITE_PRIVATE int sqlite3PcacheFetchStress(PCache*, Pgno, sqlite3_pcache_page**); +SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish(PCache*, Pgno, sqlite3_pcache_page *pPage); +SQLITE_PRIVATE void sqlite3PcacheRelease(PgHdr*); + +SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr*); /* Remove page from cache */ +SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr*); /* Make sure page is marked dirty */ +SQLITE_PRIVATE void sqlite3PcacheMakeClean(PgHdr*); /* Mark a single page as clean */ +SQLITE_PRIVATE void sqlite3PcacheCleanAll(PCache*); /* Mark all dirty list pages as clean */ + +/* Change a page number. Used by incr-vacuum. */ +SQLITE_PRIVATE void sqlite3PcacheMove(PgHdr*, Pgno); + +/* Remove all pages with pgno>x. Reset the cache if x==0 */ +SQLITE_PRIVATE void sqlite3PcacheTruncate(PCache*, Pgno x); + +/* Get a list of all dirty pages in the cache, sorted by page number */ +SQLITE_PRIVATE PgHdr *sqlite3PcacheDirtyList(PCache*); + +/* Reset and close the cache object */ +SQLITE_PRIVATE void sqlite3PcacheClose(PCache*); + +/* Clear flags from pages of the page cache */ +SQLITE_PRIVATE void sqlite3PcacheClearSyncFlags(PCache *); + +/* Discard the contents of the cache */ +SQLITE_PRIVATE void sqlite3PcacheClear(PCache*); + +/* Return the total number of outstanding page references */ +SQLITE_PRIVATE int sqlite3PcacheRefCount(PCache*); + +/* Increment the reference count of an existing page */ +SQLITE_PRIVATE void sqlite3PcacheRef(PgHdr*); + +SQLITE_PRIVATE int sqlite3PcachePageRefcount(PgHdr*); + +#ifdef SQLITE_TEST +/* Return the total number of pages stored in the cache */ +SQLITE_PRIVATE int sqlite3PcachePagecount(PCache*); +#endif + +#if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG) +/* Iterate through all dirty pages currently stored in the cache. This +** interface is only available if SQLITE_CHECK_PAGES is defined when the +** library is built. +*/ +SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *)); +#endif + +/* Set and get the suggested cache-size for the specified pager-cache. +** +** If no global maximum is configured, then the system attempts to limit +** the total number of pages cached by purgeable pager-caches to the sum +** of the suggested cache-sizes. +*/ +SQLITE_PRIVATE void sqlite3PcacheSetCachesize(PCache *, int); +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3PcacheGetCachesize(PCache *); +#endif + +#if 0 +/* Free up as much memory as possible from the page cache */ +SQLITE_PRIVATE void sqlite3PcacheShrink(PCache*); +#endif + +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT +/* Try to return memory used by the pcache module to the main memory heap */ +SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int); +#endif + +#ifdef SQLITE_TEST +SQLITE_PRIVATE void sqlite3PcacheStats(int*,int*,int*,int*); +#endif + +SQLITE_PRIVATE void sqlite3PCacheSetDefault(void); + +/* Return the header size */ +SQLITE_PRIVATE int sqlite3HeaderSizePcache(void); +SQLITE_PRIVATE int sqlite3HeaderSizePcache1(void); + +#endif /* _PCACHE_H_ */ + +/************** End of pcache.h **********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/************** Include os.h in the middle of sqliteInt.h ********************/ +/************** Begin file os.h **********************************************/ +/* +** 2001 September 16 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This header file (together with is companion C source-code file +** "os.c") attempt to abstract the underlying operating system so that +** the SQLite library will work on both POSIX and windows systems. +** +** This header file is #include-ed by sqliteInt.h and thus ends up +** being included by every source file. +*/ +#ifndef _SQLITE_OS_H_ +#define _SQLITE_OS_H_ + +/* +** Attempt to automatically detect the operating system and setup the +** necessary pre-processor macros for it. +*/ +/************** Include os_setup.h in the middle of os.h *********************/ +/************** Begin file os_setup.h ****************************************/ +/* +** 2013 November 25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains pre-processor directives related to operating system +** detection and/or setup. +*/ +#ifndef _OS_SETUP_H_ +#define _OS_SETUP_H_ + +/* +** Figure out if we are dealing with Unix, Windows, or some other operating +** system. +** +** After the following block of preprocess macros, all of SQLITE_OS_UNIX, +** SQLITE_OS_WIN, and SQLITE_OS_OTHER will defined to either 1 or 0. One of +** the three will be 1. The other two will be 0. +*/ +#if defined(SQLITE_OS_OTHER) +# if SQLITE_OS_OTHER==1 +# undef SQLITE_OS_UNIX +# define SQLITE_OS_UNIX 0 +# undef SQLITE_OS_WIN +# define SQLITE_OS_WIN 0 +# else +# undef SQLITE_OS_OTHER +# endif +#endif +#if !defined(SQLITE_OS_UNIX) && !defined(SQLITE_OS_OTHER) +# define SQLITE_OS_OTHER 0 +# ifndef SQLITE_OS_WIN +# if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \ + defined(__MINGW32__) || defined(__BORLANDC__) +# define SQLITE_OS_WIN 1 +# define SQLITE_OS_UNIX 0 +# else +# define SQLITE_OS_WIN 0 +# define SQLITE_OS_UNIX 1 +# endif +# else +# define SQLITE_OS_UNIX 0 +# endif +#else +# ifndef SQLITE_OS_WIN +# define SQLITE_OS_WIN 0 +# endif +#endif + +#endif /* _OS_SETUP_H_ */ + +/************** End of os_setup.h ********************************************/ +/************** Continuing where we left off in os.h *************************/ + +/* If the SET_FULLSYNC macro is not defined above, then make it +** a no-op +*/ +#ifndef SET_FULLSYNC +# define SET_FULLSYNC(x,y) +#endif + +/* +** The default size of a disk sector +*/ +#ifndef SQLITE_DEFAULT_SECTOR_SIZE +# define SQLITE_DEFAULT_SECTOR_SIZE 4096 +#endif + +/* +** Temporary files are named starting with this prefix followed by 16 random +** alphanumeric characters, and no file extension. They are stored in the +** OS's standard temporary file directory, and are deleted prior to exit. +** If sqlite is being embedded in another program, you may wish to change the +** prefix to reflect your program's name, so that if your program exits +** prematurely, old temporary files can be easily identified. This can be done +** using -DSQLITE_TEMP_FILE_PREFIX=myprefix_ on the compiler command line. +** +** 2006-10-31: The default prefix used to be "sqlite_". But then +** Mcafee started using SQLite in their anti-virus product and it +** started putting files with the "sqlite" name in the c:/temp folder. +** This annoyed many windows users. Those users would then do a +** Google search for "sqlite", find the telephone numbers of the +** developers and call to wake them up at night and complain. +** For this reason, the default name prefix is changed to be "sqlite" +** spelled backwards. So the temp files are still identified, but +** anybody smart enough to figure out the code is also likely smart +** enough to know that calling the developer will not help get rid +** of the file. +*/ +#ifndef SQLITE_TEMP_FILE_PREFIX +# define SQLITE_TEMP_FILE_PREFIX "etilqs_" +#endif + +/* +** The following values may be passed as the second argument to +** sqlite3OsLock(). The various locks exhibit the following semantics: +** +** SHARED: Any number of processes may hold a SHARED lock simultaneously. +** RESERVED: A single process may hold a RESERVED lock on a file at +** any time. Other processes may hold and obtain new SHARED locks. +** PENDING: A single process may hold a PENDING lock on a file at +** any one time. Existing SHARED locks may persist, but no new +** SHARED locks may be obtained by other processes. +** EXCLUSIVE: An EXCLUSIVE lock precludes all other locks. +** +** PENDING_LOCK may not be passed directly to sqlite3OsLock(). Instead, a +** process that requests an EXCLUSIVE lock may actually obtain a PENDING +** lock. This can be upgraded to an EXCLUSIVE lock by a subsequent call to +** sqlite3OsLock(). +*/ +#define NO_LOCK 0 +#define SHARED_LOCK 1 +#define RESERVED_LOCK 2 +#define PENDING_LOCK 3 +#define EXCLUSIVE_LOCK 4 + +/* +** File Locking Notes: (Mostly about windows but also some info for Unix) +** +** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because +** those functions are not available. So we use only LockFile() and +** UnlockFile(). +** +** LockFile() prevents not just writing but also reading by other processes. +** A SHARED_LOCK is obtained by locking a single randomly-chosen +** byte out of a specific range of bytes. The lock byte is obtained at +** random so two separate readers can probably access the file at the +** same time, unless they are unlucky and choose the same lock byte. +** An EXCLUSIVE_LOCK is obtained by locking all bytes in the range. +** There can only be one writer. A RESERVED_LOCK is obtained by locking +** a single byte of the file that is designated as the reserved lock byte. +** A PENDING_LOCK is obtained by locking a designated byte different from +** the RESERVED_LOCK byte. +** +** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available, +** which means we can use reader/writer locks. When reader/writer locks +** are used, the lock is placed on the same range of bytes that is used +** for probabilistic locking in Win95/98/ME. Hence, the locking scheme +** will support two or more Win95 readers or two or more WinNT readers. +** But a single Win95 reader will lock out all WinNT readers and a single +** WinNT reader will lock out all other Win95 readers. +** +** The following #defines specify the range of bytes used for locking. +** SHARED_SIZE is the number of bytes available in the pool from which +** a random byte is selected for a shared lock. The pool of bytes for +** shared locks begins at SHARED_FIRST. +** +** The same locking strategy and +** byte ranges are used for Unix. This leaves open the possibility of having +** clients on win95, winNT, and unix all talking to the same shared file +** and all locking correctly. To do so would require that samba (or whatever +** tool is being used for file sharing) implements locks correctly between +** windows and unix. I'm guessing that isn't likely to happen, but by +** using the same locking range we are at least open to the possibility. +** +** Locking in windows is manditory. For this reason, we cannot store +** actual data in the bytes used for locking. The pager never allocates +** the pages involved in locking therefore. SHARED_SIZE is selected so +** that all locks will fit on a single page even at the minimum page size. +** PENDING_BYTE defines the beginning of the locks. By default PENDING_BYTE +** is set high so that we don't have to allocate an unused page except +** for very large databases. But one should test the page skipping logic +** by setting PENDING_BYTE low and running the entire regression suite. +** +** Changing the value of PENDING_BYTE results in a subtly incompatible +** file format. Depending on how it is changed, you might not notice +** the incompatibility right away, even running a full regression test. +** The default location of PENDING_BYTE is the first byte past the +** 1GB boundary. +** +*/ +#ifdef SQLITE_OMIT_WSD +# define PENDING_BYTE (0x40000000) +#else +# define PENDING_BYTE sqlite3PendingByte +#endif +#define RESERVED_BYTE (PENDING_BYTE+1) +#define SHARED_FIRST (PENDING_BYTE+2) +#define SHARED_SIZE 510 + +/* +** Wrapper around OS specific sqlite3_os_init() function. +*/ +SQLITE_PRIVATE int sqlite3OsInit(void); + +/* +** Functions for accessing sqlite3_file methods +*/ +SQLITE_PRIVATE int sqlite3OsClose(sqlite3_file*); +SQLITE_PRIVATE int sqlite3OsRead(sqlite3_file*, void*, int amt, i64 offset); +SQLITE_PRIVATE int sqlite3OsWrite(sqlite3_file*, const void*, int amt, i64 offset); +SQLITE_PRIVATE int sqlite3OsTruncate(sqlite3_file*, i64 size); +SQLITE_PRIVATE int sqlite3OsSync(sqlite3_file*, int); +SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file*, i64 *pSize); +SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file*, int); +SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file*, int); +SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut); +SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file*,int,void*); +SQLITE_PRIVATE void sqlite3OsFileControlHint(sqlite3_file*,int,void*); +#define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0 +SQLITE_PRIVATE int sqlite3OsSectorSize(sqlite3_file *id); +SQLITE_PRIVATE int sqlite3OsDeviceCharacteristics(sqlite3_file *id); +SQLITE_PRIVATE int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **); +SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int, int, int); +SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id); +SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int); +SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64, int, void **); +SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *, i64, void *); + + +/* +** Functions for accessing sqlite3_vfs methods +*/ +SQLITE_PRIVATE int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *); +SQLITE_PRIVATE int sqlite3OsDelete(sqlite3_vfs *, const char *, int); +SQLITE_PRIVATE int sqlite3OsAccess(sqlite3_vfs *, const char *, int, int *pResOut); +SQLITE_PRIVATE int sqlite3OsFullPathname(sqlite3_vfs *, const char *, int, char *); +SQLITE_PRIVATE int sqlite3OsRandomness(sqlite3_vfs *, int, char *); +SQLITE_PRIVATE int sqlite3OsSleep(sqlite3_vfs *, int); + +#endif /* _SQLITE_OS_H_ */ + +/************** End of os.h **************************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ +/************** Include mutex.h in the middle of sqliteInt.h *****************/ +/************** Begin file mutex.h *******************************************/ +/* +** 2007 August 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains the common header for all mutex implementations. +** The sqliteInt.h header #includes this file so that it is available +** to all source files. We break it out in an effort to keep the code +** better organized. +** +** NOTE: source files should *not* #include this header file directly. +** Source files should #include the sqliteInt.h file and let that file +** include this one indirectly. +*/ + + +/* +** Figure out what version of the code to use. The choices are +** +** SQLITE_MUTEX_OMIT No mutex logic. Not even stubs. The +** mutexes implementation cannot be overridden +** at start-time. +** +** SQLITE_MUTEX_NOOP For single-threaded applications. No +** mutual exclusion is provided. But this +** implementation can be overridden at +** start-time. +** +** SQLITE_MUTEX_PTHREADS For multi-threaded applications on Unix. +** +** SQLITE_MUTEX_W32 For multi-threaded applications on Win32. +*/ +#if !SQLITE_THREADSAFE +# define SQLITE_MUTEX_OMIT +#endif +#if SQLITE_THREADSAFE && !defined(SQLITE_MUTEX_NOOP) +# if SQLITE_OS_UNIX +# define SQLITE_MUTEX_PTHREADS +# elif SQLITE_OS_WIN +# define SQLITE_MUTEX_W32 +# else +# define SQLITE_MUTEX_NOOP +# endif +#endif + +#ifdef SQLITE_MUTEX_OMIT +/* +** If this is a no-op implementation, implement everything as macros. +*/ +#define sqlite3_mutex_alloc(X) ((sqlite3_mutex*)8) +#define sqlite3_mutex_free(X) +#define sqlite3_mutex_enter(X) +#define sqlite3_mutex_try(X) SQLITE_OK +#define sqlite3_mutex_leave(X) +#define sqlite3_mutex_held(X) ((void)(X),1) +#define sqlite3_mutex_notheld(X) ((void)(X),1) +#define sqlite3MutexAlloc(X) ((sqlite3_mutex*)8) +#define sqlite3MutexInit() SQLITE_OK +#define sqlite3MutexEnd() +#define MUTEX_LOGIC(X) +#else +#define MUTEX_LOGIC(X) X +#endif /* defined(SQLITE_MUTEX_OMIT) */ + +/************** End of mutex.h ***********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/* +** The number of different kinds of things that can be limited +** using the sqlite3_limit() interface. +*/ +#define SQLITE_N_LIMIT (SQLITE_LIMIT_WORKER_THREADS+1) + +/* +** Possible values for the sqlite.magic field. +** The numbers are obtained at random and have no special meaning, other +** than being distinct from one another. +*/ +#define SQLITE_MAGIC_OPEN 0xa029a697 /* Database is open */ +#define SQLITE_MAGIC_CLOSED 0x9f3c2d33 /* Database is closed */ +#define SQLITE_MAGIC_SICK 0x4b771290 /* Error and awaiting close */ +#define SQLITE_MAGIC_BUSY 0xf03b7906 /* Database currently in use */ +#define SQLITE_MAGIC_ERROR 0xb5357930 /* An SQLITE_MISUSE error occurred */ +#define SQLITE_MAGIC_ZOMBIE 0x64cffc7f /* Close with last statement close */ + +/* +** The SQLITE_*_BKPT macros are substitutes for the error codes with +** the same name but without the _BKPT suffix. These macros invoke +** routines that report the line-number on which the error originated +** using sqlite3_log(). The routines also provide a convenient place +** to set a debugger breakpoint. +*/ +SQLITE_PRIVATE int sqlite3CorruptError(int); +SQLITE_PRIVATE int sqlite3MisuseError(int); +SQLITE_PRIVATE int sqlite3CantopenError(int); +#define SQLITE_CORRUPT_BKPT sqlite3CorruptError(__LINE__) +#define SQLITE_MISUSE_BKPT sqlite3MisuseError(__LINE__) +#define SQLITE_CANTOPEN_BKPT sqlite3CantopenError(__LINE__) + +#include <ctype.h> + +# define sqlite3Toupper(x) toupper((unsigned char)(x)) +# define sqlite3Isspace(x) isspace((unsigned char)(x)) +# define sqlite3Isalnum(x) isalnum((unsigned char)(x)) +# define sqlite3Isalpha(x) isalpha((unsigned char)(x)) +# define sqlite3Isdigit(x) isdigit((unsigned char)(x)) +# define sqlite3Isxdigit(x) isxdigit((unsigned char)(x)) +# define sqlite3Tolower(x) tolower((unsigned char)(x)) + +/* +** Internal function prototypes +*/ +SQLITE_PRIVATE int sqlite3Strlen30(const char*); + +SQLITE_PRIVATE int sqlite3MallocInit(void); +SQLITE_PRIVATE void sqlite3MallocEnd(void); +SQLITE_PRIVATE void *sqlite3Malloc(u64); +SQLITE_PRIVATE void *sqlite3MallocZero(u64); +SQLITE_PRIVATE void *sqlite3DbMallocZero(Btree*, u64); +SQLITE_PRIVATE void *sqlite3DbMallocRaw(Btree*, u64); +SQLITE_PRIVATE void *sqlite3Realloc(void*, u64); +SQLITE_PRIVATE void *sqlite3DbReallocOrFree(Btree*, void *, u64); +SQLITE_PRIVATE void *sqlite3DbRealloc(Btree*, void *, u64); +SQLITE_PRIVATE void sqlite3DbFree(Btree*, void*); +SQLITE_PRIVATE int sqlite3MallocSize(void*); +SQLITE_PRIVATE int sqlite3DbMallocSize(Btree*, void*); +SQLITE_PRIVATE void *sqlite3ScratchMalloc(int); +SQLITE_PRIVATE void sqlite3ScratchFree(void*); +SQLITE_PRIVATE void *sqlite3PageMalloc(int); +SQLITE_PRIVATE void sqlite3PageFree(void*); +SQLITE_PRIVATE void sqlite3MemSetDefault(void); +SQLITE_PRIVATE int sqlite3HeapNearlyFull(void); + +/* +** On systems with ample stack space and that support alloca(), make +** use of alloca() to obtain space for large automatic objects. By default, +** obtain space from malloc(). +** +** The alloca() routine never returns NULL. This will cause code paths +** that deal with sqlite3StackAlloc() failures to be unreachable. +*/ +#ifdef SQLITE_USE_ALLOCA +# define sqlite3StackAllocRaw(D,N) alloca(N) +# define sqlite3StackAllocZero(D,N) memset(alloca(N), 0, N) +# define sqlite3StackFree(D,P) +#else +# define sqlite3StackAllocRaw(D,N) sqlite3DbMallocRaw(D,N) +# define sqlite3StackAllocZero(D,N) sqlite3DbMallocZero(D,N) +# define sqlite3StackFree(D,P) sqlite3DbFree(D,P) +#endif + +#ifndef SQLITE_MUTEX_OMIT +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void); +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void); +SQLITE_PRIVATE sqlite3_mutex *sqlite3MutexAlloc(int); +SQLITE_PRIVATE int sqlite3MutexInit(void); +SQLITE_PRIVATE int sqlite3MutexEnd(void); +#endif +#if !defined(SQLITE_MUTEX_OMIT) && !defined(SQLITE_MUTEX_NOOP) +SQLITE_PRIVATE void sqlite3MemoryBarrier(void); +#else +# define sqlite3MemoryBarrier() +#endif + +#ifndef SQLITE_OMIT_FLOATING_POINT +SQLITE_PRIVATE int sqlite3IsNaN(double); +#else +# define sqlite3IsNaN(X) 0 +#endif + +/* +** An instance of the following structure holds information about SQL +** functions arguments that are the parameters to the printf() function. +*/ +struct PrintfArguments { + int nArg; /* Total number of arguments */ + int nUsed; /* Number of arguments used so far */ + Mem **apArg; /* The argument values */ +}; + +#define SQLITE_PRINTF_INTERNAL 0x01 +#define SQLITE_PRINTF_SQLFUNC 0x02 + +SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum*, Btree* pBtree, char*, int, int); +SQLITE_PRIVATE void sqlite3StrAccumAppend(StrAccum*,const char*,int); +SQLITE_PRIVATE void sqlite3AppendChar(StrAccum*,int,char); +SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum*); +SQLITE_PRIVATE void sqlite3StrAccumReset(StrAccum*); + +SQLITE_PRIVATE int sqlite3SafetyCheckSickOrOk(Btree*); + +SQLITE_PRIVATE int sqlite3InvokeBusyHandler(BusyHandler*); + +SQLITE_PRIVATE void sqlite3BackupRestart(sqlite3_backup *); +SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *, Pgno, const u8 *); + +typedef int (*RecordCompare)(int,const void*,UnpackedRecord*); + +#define SQLITE_RecoveryMode 0x00010000 /* Ignore schema errors */ +#define SQLITE_CellSizeCk 0x10000000 /* Check btree cell sizes on load */ + +/* +** The following are used as the second parameter to sqlite3Savepoint(), +** and as the P1 argument to the OP_Savepoint instruction. +*/ +#define SAVEPOINT_BEGIN 0 +#define SAVEPOINT_RELEASE 1 +#define SAVEPOINT_ROLLBACK 2 + +/* +** A "Collating Sequence" is defined by an instance of the following +** structure. Conceptually, a collating sequence consists of a name and +** a comparison routine that defines the order of that sequence. +** +** If CollSeq.xCmp is NULL, it means that the +** collating sequence is undefined. Indices built on an undefined +** collating sequence may not be read or written. +*/ +struct CollSeq { + char *zName; /* Name of the collating sequence, UTF-8 encoded */ + u8 enc; /* Text encoding handled by xCmp() */ + void *pUser; /* First argument to xCmp() */ + int (*xCmp)(void*,int, const void*, int, const void*); + void (*xDel)(void*); /* Destructor for pUser */ +}; + +/* +** An instance of the following structure is passed as the first +** argument to sqlite3VdbeKeyCompare and is used to control the +** comparison of the two index keys. +** +** Note that aSortOrder[] and aColl[] have nField+1 slots. There +** are nField slots for the columns of an index then one extra slot +** for the rowid at the end. +*/ +struct KeyInfo { + u32 nRef; /* Number of references to this KeyInfo object */ + u8 enc; /* Text encoding - one of the SQLITE_UTF* values */ + u16 nField; /* Number of key columns in the index */ + u16 nXField; /* Number of columns beyond the key columns */ + Btree *pBtree; /* The database connection */ + u8 *aSortOrder; /* Sort order for each column. */ + CollSeq *aColl[1]; /* Collating sequence for each term of the key */ +}; + +/* +** An objected used to accumulate the text of a string where we +** do not necessarily know how big the string will be in the end. +*/ +struct StrAccum { + Btree *pBtree; /* Optional database for lookaside. Can be NULL */ + char *zBase; /* A base allocation. Not from malloc. */ + char *zText; /* The string collected so far */ + int nChar; /* Length of the string so far */ + int nAlloc; /* Amount of space allocated in zText */ + int mxAlloc; /* Maximum allowed allocation. 0 for no malloc usage */ + u8 accError; /* STRACCUM_NOMEM or STRACCUM_TOOBIG */ +}; +#define STRACCUM_NOMEM 1 +#define STRACCUM_TOOBIG 2 + +/************** Include btreeInt.h in the middle of sqliteInt.h **************/ +/************** Begin file btreeInt.h ****************************************/ +/* +** 2004 April 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements an external (disk-based) database using BTrees. +** For a detailed discussion of BTrees, refer to +** +** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3: +** "Sorting And Searching", pages 473-480. Addison-Wesley +** Publishing Company, Reading, Massachusetts. +** +** The basic idea is that each page of the file contains N database +** entries and N+1 pointers to subpages. +** +** ---------------------------------------------------------------- +** | Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N-1) | Ptr(N) | +** ---------------------------------------------------------------- +** +** All of the keys on the page that Ptr(0) points to have values less +** than Key(0). All of the keys on page Ptr(1) and its subpages have +** values greater than Key(0) and less than Key(1). All of the keys +** on Ptr(N) and its subpages have values greater than Key(N-1). And +** so forth. +** +** Finding a particular key requires reading O(log(M)) pages from the +** disk where M is the number of entries in the tree. +** +** In this implementation, a single file can hold one or more separate +** BTrees. Each BTree is identified by the index of its root page. The +** key and data for any entry are combined to form the "payload". A +** fixed amount of payload can be carried directly on the database +** page. If the payload is larger than the preset amount then surplus +** bytes are stored on overflow pages. The payload for an entry +** and the preceding pointer are combined to form a "Cell". Each +** page has a small header which contains the Ptr(N) pointer and other +** information such as the size of key and data. +** +** FORMAT DETAILS +** +** The file is divided into pages. The first page is called page 1, +** the second is page 2, and so forth. A page number of zero indicates +** "no such page". The page size can be any power of 2 between 512 and 65536. +** Each page can be either a btree page, a freelist page, an overflow +** page, or a pointer-map page. +** +** The first page is always a btree page. The first 100 bytes of the first +** page contain a special header (the "file header") that describes the file. +** The format of the file header is as follows: +** +** OFFSET SIZE DESCRIPTION +** 0 16 Header string: "SQLite format 3\000" +** 16 2 Page size in bytes. (1 means 65536) +** 18 1 File format write version +** 19 1 File format read version +** 20 1 Bytes of unused space at the end of each page +** 21 1 Max embedded payload fraction (must be 64) +** 22 1 Min embedded payload fraction (must be 32) +** 23 1 Min leaf payload fraction (must be 32) +** 24 4 File change counter +** 28 4 Reserved for future use +** 32 4 First freelist page +** 36 4 Number of freelist pages in the file +** 40 60 15 4-byte meta values passed to higher layers +** +** 40 4 Schema cookie +** 44 4 File format of schema layer +** 48 4 Size of page cache +** 52 4 Largest root-page (auto/incr_vacuum) +** 56 4 1=UTF-8 2=UTF16le 3=UTF16be +** 60 4 User version +** 64 4 Incremental vacuum mode +** 68 4 Application-ID +** 72 20 unused +** 92 4 The version-valid-for number +** 96 4 SQLITE_VERSION_NUMBER +** +** All of the integer values are big-endian (most significant byte first). +** +** The file change counter is incremented when the database is changed +** This counter allows other processes to know when the file has changed +** and thus when they need to flush their cache. +** +** The max embedded payload fraction is the amount of the total usable +** space in a page that can be consumed by a single cell for standard +** B-tree (non-LEAFDATA) tables. A value of 255 means 100%. The default +** is to limit the maximum cell size so that at least 4 cells will fit +** on one page. Thus the default max embedded payload fraction is 64. +** +** If the payload for a cell is larger than the max payload, then extra +** payload is spilled to overflow pages. Once an overflow page is allocated, +** as many bytes as possible are moved into the overflow pages without letting +** the cell size drop below the min embedded payload fraction. +** +** The min leaf payload fraction is like the min embedded payload fraction +** except that it applies to leaf nodes in a LEAFDATA tree. The maximum +** payload fraction for a LEAFDATA tree is always 100% (or 255) and it +** not specified in the header. +** +** Each btree pages is divided into three sections: The header, the +** cell pointer array, and the cell content area. Page 1 also has a 100-byte +** file header that occurs before the page header. +** +** |----------------| +** | file header | 100 bytes. Page 1 only. +** |----------------| +** | page header | 8 bytes for leaves. 12 bytes for interior nodes +** |----------------| +** | cell pointer | | 2 bytes per cell. Sorted order. +** | array | | Grows downward +** | | v +** |----------------| +** | unallocated | +** | space | +** |----------------| ^ Grows upwards +** | cell content | | Arbitrary order interspersed with freeblocks. +** | area | | and free space fragments. +** |----------------| +** +** The page headers looks like this: +** +** OFFSET SIZE DESCRIPTION +** 0 1 Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf +** 1 2 byte offset to the first freeblock +** 3 2 number of cells on this page +** 5 2 first byte of the cell content area +** 7 1 number of fragmented free bytes +** 8 4 Right child (the Ptr(N) value). Omitted on leaves. +** +** The flags define the format of this btree page. The leaf flag means that +** this page has no children. The zerodata flag means that this page carries +** only keys and no data. The intkey flag means that the key is an integer +** which is stored in the key size entry of the cell header rather than in +** the payload area. +** +** The cell pointer array begins on the first byte after the page header. +** The cell pointer array contains zero or more 2-byte numbers which are +** offsets from the beginning of the page to the cell content in the cell +** content area. The cell pointers occur in sorted order. The system strives +** to keep free space after the last cell pointer so that new cells can +** be easily added without having to defragment the page. +** +** Cell content is stored at the very end of the page and grows toward the +** beginning of the page. +** +** Unused space within the cell content area is collected into a linked list of +** freeblocks. Each freeblock is at least 4 bytes in size. The byte offset +** to the first freeblock is given in the header. Freeblocks occur in +** increasing order. Because a freeblock must be at least 4 bytes in size, +** any group of 3 or fewer unused bytes in the cell content area cannot +** exist on the freeblock chain. A group of 3 or fewer free bytes is called +** a fragment. The total number of bytes in all fragments is recorded. +** in the page header at offset 7. +** +** SIZE DESCRIPTION +** 2 Byte offset of the next freeblock +** 2 Bytes in this freeblock +** +** Cells are of variable length. Cells are stored in the cell content area at +** the end of the page. Pointers to the cells are in the cell pointer array +** that immediately follows the page header. Cells is not necessarily +** contiguous or in order, but cell pointers are contiguous and in order. +** +** Cell content makes use of variable length integers. A variable +** length integer is 1 to 9 bytes where the lower 7 bits of each +** byte are used. The integer consists of all bytes that have bit 8 set and +** the first byte with bit 8 clear. The most significant byte of the integer +** appears first. A variable-length integer may not be more than 9 bytes long. +** As a special case, all 8 bytes of the 9th byte are used as data. This +** allows a 64-bit integer to be encoded in 9 bytes. +** +** 0x00 becomes 0x00000000 +** 0x7f becomes 0x0000007f +** 0x81 0x00 becomes 0x00000080 +** 0x82 0x00 becomes 0x00000100 +** 0x80 0x7f becomes 0x0000007f +** 0x8a 0x91 0xd1 0xac 0x78 becomes 0x12345678 +** 0x81 0x81 0x81 0x81 0x01 becomes 0x10204081 +** +** Variable length integers are used for rowids and to hold the number of +** bytes of key and data in a btree cell. +** +** The content of a cell looks like this: +** +** SIZE DESCRIPTION +** 4 Page number of the left child. Omitted if leaf flag is set. +** var Number of bytes of data. Omitted if the zerodata flag is set. +** var Number of bytes of key. Or the key itself if intkey flag is set. +** * Payload +** 4 First page of the overflow chain. Omitted if no overflow +** +** Overflow pages form a linked list. Each page except the last is completely +** filled with data (pagesize - 4 bytes). The last page can have as little +** as 1 byte of data. +** +** SIZE DESCRIPTION +** 4 Page number of next overflow page +** * Data +** +** Freelist pages come in two subtypes: trunk pages and leaf pages. The +** file header points to the first in a linked list of trunk page. Each trunk +** page points to multiple leaf pages. The content of a leaf page is +** unspecified. A trunk page looks like this: +** +** SIZE DESCRIPTION +** 4 Page number of next trunk page +** 4 Number of leaf pointers on this page +** * zero or more pages numbers of leaves +*/ +/* #include "sqliteInt.h" */ + + +/* The following value is the maximum cell size assuming a maximum page +** size give above. +*/ +#define MX_CELL_SIZE(pBt) ((int)(pBt->pageSize-8)) + +/* The maximum number of cells on a single page of the database. This +** assumes a minimum cell size of 6 bytes (4 bytes for the cell itself +** plus 2 bytes for the index to the cell in the page header). Such +** small cells will be rare, but they are possible. +*/ +#define MX_CELL(pBt) ((pBt->pageSize-8)/6) + +/* Forward declarations */ +typedef struct MemPage MemPage; +typedef struct BtLock BtLock; +typedef struct CellInfo CellInfo; + +/* +** This is a magic string that appears at the beginning of every +** SQLite database in order to identify the file as a real database. +** +** You can change this value at compile-time by specifying a +** -DSQLITE_FILE_HEADER="..." on the compiler command-line. The +** header must be exactly 16 bytes including the zero-terminator so +** the string itself should be 15 characters long. If you change +** the header, then your custom library will not be able to read +** databases generated by the standard tools and the standard tools +** will not be able to read databases created by your custom library. +*/ +#ifndef SQLITE_FILE_HEADER /* 123456789 123456 */ +# define SQLITE_FILE_HEADER "SQLite Btree v3" + +#endif + +/* +** Page type flags. An ORed combination of these flags appear as the +** first byte of on-disk image of every BTree page. +*/ +#define PTF_INTKEY 0x01 +#define PTF_ZERODATA 0x02 +#define PTF_LEAFDATA 0x04 +#define PTF_LEAF 0x08 + +/* +** As each page of the file is loaded into memory, an instance of the following +** structure is appended and initialized to zero. This structure stores +** information about the page that is decoded from the raw file page. +** +** The pParent field points back to the parent page. This allows us to +** walk up the BTree from any leaf to the root. Care must be taken to +** unref() the parent page pointer when this page is no longer referenced. +** The pageDestructor() routine handles that chore. +** +** Access to all fields of this structure is controlled by the mutex +** stored in MemPage.pBt->mutex. +*/ +struct MemPage { + u8 isInit; /* True if previously initialized. MUST BE FIRST! */ + u8 nOverflow; /* Number of overflow cell bodies in aCell[] */ + u8 intKey; /* True if table b-trees. False for index b-trees */ + u8 intKeyLeaf; /* True if the leaf of an intKey table */ + u8 noPayload; /* True if internal intKey page (thus w/o data) */ + u8 leaf; /* True if a leaf page */ + u8 hdrOffset; /* 100 for page 1. 0 otherwise */ + u8 childPtrSize; /* 0 if leaf==1. 4 if leaf==0 */ + u8 max1bytePayload; /* min(maxLocal,127) */ + u8 bBusy; /* Prevent endless loops on corrupt database files */ + u16 maxLocal; /* Copy of BtShared.maxLocal or BtShared.maxLeaf */ + u16 minLocal; /* Copy of BtShared.minLocal or BtShared.minLeaf */ + u16 cellOffset; /* Index in aData of first cell pointer */ + u16 nFree; /* Number of free bytes on the page */ + u16 nCell; /* Number of cells on this page, local and ovfl */ + u16 maskPage; /* Mask for page offset */ + u16 aiOvfl[5]; /* Insert the i-th overflow cell before the aiOvfl-th + ** non-overflow cell */ + u8 *apOvfl[5]; /* Pointers to the body of overflow cells */ + BtShared *pBt; /* Pointer to BtShared that this page is part of */ + u8 *aData; /* Pointer to disk image of the page data */ + u8 *aDataEnd; /* One byte past the end of usable data */ + u8 *aCellIdx; /* The cell index area */ + u8 *aDataOfst; /* Same as aData for leaves. aData+4 for interior */ + DbPage *pDbPage; /* Pager page handle */ + u16 (*xCellSize)(MemPage*,u8*); /* cellSizePtr method */ + void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */ + Pgno pgno; /* Page number for this page */ +}; + +/* +** The in-memory image of a disk page has the auxiliary information appended +** to the end. EXTRA_SIZE is the number of bytes of space needed to hold +** that extra information. +*/ +#define EXTRA_SIZE sizeof(MemPage) + +/* +** A linked list of the following structures is stored at BtShared.pLock. +** Locks are added (or upgraded from READ_LOCK to WRITE_LOCK) when a cursor +** is opened on the table with root page BtShared.iTable. Locks are removed +** from this list when a transaction is committed or rolled back, or when +** a btree handle is closed. +*/ +struct BtLock { + Btree *pBtree; /* Btree handle holding this lock */ + Pgno iTable; /* Root page of table */ + u8 eLock; /* READ_LOCK or WRITE_LOCK */ + BtLock *pNext; /* Next in BtShared.pLock list */ +}; + +/* Candidate values for BtLock.eLock */ +#define READ_LOCK 1 +#define WRITE_LOCK 2 + +/* A Btree handle +** +** A database connection contains a pointer to an instance of +** this object for every database file that it has open. This structure +** is opaque to the database connection. The database connection cannot +** see the internals of this structure and only deals with pointers to +** this structure. +** +** For some database files, the same underlying database cache might be +** shared between multiple connections. In that case, each connection +** has it own instance of this object. But each instance of this object +** points to the same BtShared object. The database cache and the +** schema associated with the database file are all contained within +** the BtShared object. +** +** All fields in this structure are accessed under sqlite3.mutex. +** The pBt pointer itself may not be changed while there exists cursors +** in the referenced BtShared that point back to this Btree since those +** cursors have to go through this Btree to find their BtShared and +** they often do so without holding sqlite3.mutex. +*/ +struct Btree { + sqlite3_mutex *mutex; /* Connection mutex */ + int flags; /* Miscellaneous flags */ + i64 szMmap; /* Default mmap_size setting */ + u8 enc; /* Text encoding */ + u8 mallocFailed; /* True if we have seen a malloc failure */ + u32 magic; /* Magic number for detect library misuse */ + int nVdbeRead; /* Number of active VDBEs that read or write */ + BusyHandler busyHandler; /* Busy callback */ + int nSavepoint; /* Number of non-transaction savepoints */ + int *pnBytesFreed; /* If not NULL, increment this in DbFree() */ + BtShared *pBt; /* Sharable content of this btree */ + u8 inTrans; /* TRANS_NONE, TRANS_READ or TRANS_WRITE */ + u8 sharable; /* True if we can share pBt with another db */ + u8 locked; /* True if db currently has pBt locked */ + u8 hasIncrblobCur; /* True if there are one or more Incrblob cursors */ + int wantToLock; /* Number of nested calls to sqlite3BtreeEnter() */ + int nBackup; /* Number of backup operations reading this btree */ + u32 iDataVersion; /* Combines with pBt->pPager->iDataVersion */ + Btree *pNext; /* List of other sharable Btrees from the same db */ + Btree *pPrev; /* Back pointer of the same list */ +#ifndef SQLITE_OMIT_SHARED_CACHE + BtLock lock; /* Object used to lock page 1 */ +#endif +}; + +/* +** Btree.inTrans may take one of the following values. +** +** If the shared-data extension is enabled, there may be multiple users +** of the Btree structure. At most one of these may open a write transaction, +** but any number may have active read transactions. +*/ +#define TRANS_NONE 0 +#define TRANS_READ 1 +#define TRANS_WRITE 2 + +/* +** An instance of this object represents a single database file. +** +** A single database file can be in use at the same time by two +** or more database connections. When two or more connections are +** sharing the same database file, each connection has it own +** private Btree object for the file and each of those Btrees points +** to this one BtShared object. BtShared.nRef is the number of +** connections currently sharing this database file. +** +** Fields in this structure are accessed under the BtShared.mutex +** mutex, except for nRef and pNext which are accessed under the +** global SQLITE_MUTEX_STATIC_MASTER mutex. The pPager field +** may not be modified once it is initially set as long as nRef>0. +** The pSchema field may be set once under BtShared.mutex and +** thereafter is unchanged as long as nRef>0. +** +** isPending: +** +** If a BtShared client fails to obtain a write-lock on a database +** table (because there exists one or more read-locks on the table), +** the shared-cache enters 'pending-lock' state and isPending is +** set to true. +** +** The shared-cache leaves the 'pending lock' state when either of +** the following occur: +** +** 1) The current writer (BtShared.pWriter) concludes its transaction, OR +** 2) The number of locks held by other connections drops to zero. +** +** while in the 'pending-lock' state, no connection may start a new +** transaction. +** +** This feature is included to help prevent writer-starvation. +*/ +struct BtShared { + Pager *pPager; /* The page cache */ + Btree *pBt; /* Database connection currently using this Btree */ + BtCursor *pCursor; /* A list of all open cursors */ + MemPage *pPage1; /* First page of the database */ + u8 openFlags; /* Flags to sqlite3BtreeOpen() */ +#ifndef SQLITE_OMIT_AUTOVACUUM + u8 autoVacuum; /* True if auto-vacuum is enabled */ + u8 incrVacuum; /* True if incr-vacuum is enabled */ + u8 bDoTruncate; /* True to truncate db on commit */ +#endif + u8 inTransaction; /* Transaction state */ + u8 max1bytePayload; /* Maximum first byte of cell for a 1-byte payload */ +#ifdef SQLITE_HAS_CODEC + u8 optimalReserve; /* Desired amount of reserved space per page */ +#endif + u16 btsFlags; /* Boolean parameters. See BTS_* macros below */ + u16 maxLocal; /* Maximum local payload in non-LEAFDATA tables */ + u16 minLocal; /* Minimum local payload in non-LEAFDATA tables */ + u16 maxLeaf; /* Maximum local payload in a LEAFDATA table */ + u16 minLeaf; /* Minimum local payload in a LEAFDATA table */ + u32 pageSize; /* Total number of bytes on a page */ + u32 file_format; /* File Format */ + u32 usableSize; /* Number of usable bytes on each page */ + int nTransaction; /* Number of open transactions (read + write) */ + u32 nPage; /* Number of pages in the database */ + void *pSchema; /* Pointer to space allocated by sqlite3BtreeSchema() */ + void (*xFreeSchema)(void*); /* Destructor for BtShared.pSchema */ + sqlite3_mutex *mutex; /* Non-recursive mutex required to access this object */ + Bitvec *pHasContent; /* Set of pages moved to free-list this transaction */ +#ifndef SQLITE_OMIT_SHARED_CACHE + int nRef; /* Number of references to this structure */ + BtShared *pNext; /* Next on a list of sharable BtShared structs */ + BtLock *pLock; /* List of locks held on this shared-btree struct */ + Btree *pWriter; /* Btree with currently open write transaction */ +#endif + u8 *pTmpSpace; /* Temp space sufficient to hold a single cell */ +}; + +/* +** Allowed values for BtShared.btsFlags +*/ +#define BTS_READ_ONLY 0x0001 /* Underlying file is readonly */ +#define BTS_PAGESIZE_FIXED 0x0002 /* Page size can no longer be changed */ +#define BTS_SECURE_DELETE 0x0004 /* PRAGMA secure_delete is enabled */ +#define BTS_INITIALLY_EMPTY 0x0008 /* Database was empty at trans start */ +#define BTS_NO_WAL 0x0010 /* Do not open write-ahead-log files */ +#define BTS_EXCLUSIVE 0x0020 /* pWriter has an exclusive lock */ +#define BTS_PENDING 0x0040 /* Waiting for read-locks to clear */ + +/* +** An instance of the following structure is used to hold information +** about a cell. The parseCellPtr() function fills in this structure +** based on information extract from the raw disk page. +*/ +struct CellInfo { + i64 nKey; /* The key for INTKEY tables, or nPayload otherwise */ + u8 *pPayload; /* Pointer to the start of payload */ + u32 nPayload; /* Bytes of payload */ + u16 nLocal; /* Amount of payload held locally, not on overflow */ + u16 iOverflow; /* Offset to overflow page number. Zero if no overflow */ + u16 nSize; /* Size of the cell content on the main b-tree page */ +}; + +/* +** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than +** this will be declared corrupt. This value is calculated based on a +** maximum database size of 2^31 pages a minimum fanout of 2 for a +** root-node and 3 for all other internal nodes. +** +** If a tree that appears to be taller than this is encountered, it is +** assumed that the database is corrupt. +*/ +#define BTCURSOR_MAX_DEPTH 20 + +/* +** A cursor is a pointer to a particular entry within a particular +** b-tree within a database file. +** +** The entry is identified by its MemPage and the index in +** MemPage.aCell[] of the entry. +** +** A single database file can be shared by two more database connections, +** but cursors cannot be shared. Each cursor is associated with a +** particular database connection identified BtCursor.pBtree.db. +** +** Fields in this structure are accessed under the BtShared.mutex +** found at self->pBt->mutex. +** +** skipNext meaning: +** eState==SKIPNEXT && skipNext>0: Next sqlite3BtreeNext() is no-op. +** eState==SKIPNEXT && skipNext<0: Next sqlite3BtreePrevious() is no-op. +** eState==FAULT: Cursor fault with skipNext as error code. +*/ +struct BtCursor { + Btree *pBtree; /* The Btree to which this cursor belongs */ + BtShared *pBt; /* The BtShared this cursor points to */ + BtCursor *pNext; /* Forms a linked list of all cursors */ + Pgno *aOverflow; /* Cache of overflow page locations */ + CellInfo info; /* A parse of the cell we are pointing at */ + i64 nKey; /* Size of pKey, or last integer key */ + void *pKey; /* Saved key that was cursor last known position */ + Pgno pgnoRoot; /* The root page of this tree */ + int nOvflAlloc; /* Allocated size of aOverflow[] array */ + int skipNext; /* Prev() is noop if negative. Next() is noop if positive. + ** Error code if eState==CURSOR_FAULT */ + u8 curFlags; /* zero or more BTCF_* flags defined below */ + u8 curPagerFlags; /* Flags to send to sqlite3PagerAcquire() */ + u8 eState; /* One of the CURSOR_XXX constants (see below) */ + u8 hints; /* As configured by CursorSetHints() */ + i8 iPage; /* Index of current page in apPage */ + u8 curIntKey; /* Value of apPage[0]->intKey */ + struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ + void *padding1; /* Make object size a multiple of 16 */ + u16 aiIdx[BTCURSOR_MAX_DEPTH]; /* Current index in apPage[i] */ + MemPage *apPage[BTCURSOR_MAX_DEPTH]; /* Pages from root to current page */ +}; + +/* +** Legal values for BtCursor.curFlags +*/ +#define BTCF_WriteFlag 0x01 /* True if a write cursor */ +#define BTCF_ValidNKey 0x02 /* True if info.nKey is valid */ +#define BTCF_ValidOvfl 0x04 /* True if aOverflow is valid */ +#define BTCF_AtLast 0x08 /* Cursor is pointing ot the last entry */ +#define BTCF_Incrblob 0x10 /* True if an incremental I/O handle */ +#define BTCF_Multiple 0x20 /* Maybe another cursor on the same btree */ + +/* +** Potential values for BtCursor.eState. +** +** CURSOR_INVALID: +** Cursor does not point to a valid entry. This can happen (for example) +** because the table is empty or because BtreeCursorFirst() has not been +** called. +** +** CURSOR_VALID: +** Cursor points to a valid entry. getPayload() etc. may be called. +** +** CURSOR_SKIPNEXT: +** Cursor is valid except that the Cursor.skipNext field is non-zero +** indicating that the next sqlite3BtreeNext() or sqlite3BtreePrevious() +** operation should be a no-op. +** +** CURSOR_REQUIRESEEK: +** The table that this cursor was opened on still exists, but has been +** modified since the cursor was last used. The cursor position is saved +** in variables BtCursor.pKey and BtCursor.nKey. When a cursor is in +** this state, restoreCursorPosition() can be called to attempt to +** seek the cursor to the saved position. +** +** CURSOR_FAULT: +** An unrecoverable error (an I/O error or a malloc failure) has occurred +** on a different connection that shares the BtShared cache with this +** cursor. The error has left the cache in an inconsistent state. +** Do nothing else with this cursor. Any attempt to use the cursor +** should return the error code stored in BtCursor.skipNext +*/ +#define CURSOR_INVALID 0 +#define CURSOR_VALID 1 +#define CURSOR_SKIPNEXT 2 +#define CURSOR_REQUIRESEEK 3 +#define CURSOR_FAULT 4 + +/* +** The database page the PENDING_BYTE occupies. This page is never used. +*/ +# define PENDING_BYTE_PAGE(pBt) PAGER_MJ_PGNO(pBt) + +/* +** These macros define the location of the pointer-map entry for a +** database page. The first argument to each is the number of usable +** bytes on each page of the database (often 1024). The second is the +** page number to look up in the pointer map. +** +** PTRMAP_PAGENO returns the database page number of the pointer-map +** page that stores the required pointer. PTRMAP_PTROFFSET returns +** the offset of the requested map entry. +** +** If the pgno argument passed to PTRMAP_PAGENO is a pointer-map page, +** then pgno is returned. So (pgno==PTRMAP_PAGENO(pgsz, pgno)) can be +** used to test if pgno is a pointer-map page. PTRMAP_ISPAGE implements +** this test. +*/ +#define PTRMAP_PAGENO(pBt, pgno) ptrmapPageno(pBt, pgno) +#define PTRMAP_PTROFFSET(pgptrmap, pgno) (5*(pgno-pgptrmap-1)) +#define PTRMAP_ISPAGE(pBt, pgno) (PTRMAP_PAGENO((pBt),(pgno))==(pgno)) + +/* +** The pointer map is a lookup table that identifies the parent page for +** each child page in the database file. The parent page is the page that +** contains a pointer to the child. Every page in the database contains +** 0 or 1 parent pages. (In this context 'database page' refers +** to any page that is not part of the pointer map itself.) Each pointer map +** entry consists of a single byte 'type' and a 4 byte parent page number. +** The PTRMAP_XXX identifiers below are the valid types. +** +** The purpose of the pointer map is to facility moving pages from one +** position in the file to another as part of autovacuum. When a page +** is moved, the pointer in its parent must be updated to point to the +** new location. The pointer map is used to locate the parent page quickly. +** +** PTRMAP_ROOTPAGE: The database page is a root-page. The page-number is not +** used in this case. +** +** PTRMAP_FREEPAGE: The database page is an unused (free) page. The page-number +** is not used in this case. +** +** PTRMAP_OVERFLOW1: The database page is the first page in a list of +** overflow pages. The page number identifies the page that +** contains the cell with a pointer to this overflow page. +** +** PTRMAP_OVERFLOW2: The database page is the second or later page in a list of +** overflow pages. The page-number identifies the previous +** page in the overflow page list. +** +** PTRMAP_BTREE: The database page is a non-root btree page. The page number +** identifies the parent page in the btree. +*/ +#define PTRMAP_ROOTPAGE 1 +#define PTRMAP_FREEPAGE 2 +#define PTRMAP_OVERFLOW1 3 +#define PTRMAP_OVERFLOW2 4 +#define PTRMAP_BTREE 5 + +/* A bunch of assert() statements to check the transaction state variables +** of handle p (type Btree*) are internally consistent. +*/ +#define btreeIntegrity(p) \ + assert( p->pBt->inTransaction!=TRANS_NONE || p->pBt->nTransaction==0 ); \ + assert( p->pBt->inTransaction>=p->inTrans ); + + +/* +** The ISAUTOVACUUM macro is used within balance_nonroot() to determine +** if the database supports auto-vacuum or not. Because it is used +** within an expression that is an argument to another macro +** (sqliteMallocRaw), it is not possible to use conditional compilation. +** So, this macro is defined instead. +*/ +#ifndef SQLITE_OMIT_AUTOVACUUM +#define ISAUTOVACUUM (pBt->autoVacuum) +#else +#define ISAUTOVACUUM 0 +#endif + + +/* +** This structure is passed around through all the sanity checking routines +** in order to keep track of some global state information. +** +** The aRef[] array is allocated so that there is 1 bit for each page in +** the database. As the integrity-check proceeds, for each page used in +** the database the corresponding bit is set. This allows integrity-check to +** detect pages that are used twice and orphaned pages (both of which +** indicate corruption). +*/ +typedef struct IntegrityCk IntegrityCk; +struct IntegrityCk { + BtShared *pBt; /* The tree being checked out */ + Pager *pPager; /* The associated pager. Also accessible by pBt->pPager */ + u8 *aPgRef; /* 1 bit per page in the db (see above) */ + Pgno nPage; /* Number of pages in the database */ + int mxErr; /* Stop accumulating errors when this reaches zero */ + int nErr; /* Number of messages written to zErrMsg so far */ + int mallocFailed; /* A memory allocation error has occurred */ + const char *zPfx; /* Error message prefix */ + int v1, v2; /* Values for up to two %d fields in zPfx */ + StrAccum errMsg; /* Accumulate the error message text here */ + u32 *heap; /* Min-heap used for analyzing cell coverage */ +}; + +/* +** Routines to read or write a two- and four-byte big-endian integer values. +*/ +#define get2byte(x) ((x)[0]<<8 | (x)[1]) +#define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v)) +#define get4byte sqlite3Get4byte +#define put4byte sqlite3Put4byte + +/* +** get2byteAligned(), unlike get2byte(), requires that its argument point to a +** two-byte aligned address. get2bytea() is only used for accessing the +** cell addresses in a btree header. +*/ +#if SQLITE_BYTEORDER==4321 +# define get2byteAligned(x) (*(u16*)(x)) +#elif SQLITE_BYTEORDER==1234 && !defined(SQLITE_DISABLE_INTRINSIC) \ + && GCC_VERSION>=4008000 +# define get2byteAligned(x) __builtin_bswap16(*(u16*)(x)) +#elif SQLITE_BYTEORDER==1234 && !defined(SQLITE_DISABLE_INTRINSIC) \ + && defined(_MSC_VER) && _MSC_VER>=1300 +# define get2byteAligned(x) _byteswap_ushort(*(u16*)(x)) +#else +# define get2byteAligned(x) ((x)[0]<<8 | (x)[1]) +#endif + +#ifndef SQLITE_OMIT_INCRBLOB +SQLITE_PRIVATE int sqlite3VdbeMemExpandBlob(Mem *); + #define ExpandBlob(P) (((P)->flags&MEM_Zero)?sqlite3VdbeMemExpandBlob(P):0) +#else + #define sqlite3VdbeMemExpandBlob(x) SQLITE_OK + #define ExpandBlob(P) SQLITE_OK +#endif + +SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void); + +SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,UnpackedRecord*); +SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo *, char *, int, char **); + +typedef int (*RecordCompare)(int,const void*,UnpackedRecord*); +SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord*); + +/************** End of btreeInt.h ********************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ + +/* +** Structure containing global configuration data for the SQLite library. +** +** This structure also contains some state information. +*/ +struct Sqlite3Config { + int bMemstat; /* True to enable memory status */ + int bCoreMutex; /* True to enable core mutexing */ + int bFullMutex; /* True to enable full mutexing */ + int bOpenUri; /* True to interpret filenames as URIs */ + int bUseCis; /* Use covering indices for full-scans */ + int mxStrlen; /* Maximum string length */ + int neverCorrupt; /* Database is always well-formed */ + int szLookaside; /* Default lookaside buffer size */ + int nLookaside; /* Default lookaside buffer count */ + sqlite3_mem_methods m; /* Low-level memory allocation interface */ + sqlite3_mutex_methods mutex; /* Low-level mutex interface */ + sqlite3_pcache_methods2 pcache2; /* Low-level page-cache interface */ + void *pHeap; /* Heap storage space */ + int nHeap; /* Size of pHeap[] */ + int mnReq, mxReq; /* Min and max heap requests sizes */ + sqlite3_int64 szMmap; /* mmap() space per open file */ + sqlite3_int64 mxMmap; /* Maximum value for szMmap */ + void *pScratch; /* Scratch memory */ + int szScratch; /* Size of each scratch buffer */ + int nScratch; /* Number of scratch buffers */ + void *pPage; /* Page cache memory */ + int szPage; /* Size of each page in pPage[] */ + int nPage; /* Number of pages in pPage[] */ + int mxParserStack; /* maximum depth of the parser stack */ + int sharedCacheEnabled; /* true if shared-cache mode enabled */ + u32 szPma; /* Maximum Sorter PMA size */ + /* The above might be initialized to non-zero. The following need to always + ** initially be zero, however. */ + int isInit; /* True after initialization has finished */ + int inProgress; /* True while initialization in progress */ + int isMutexInit; /* True after mutexes are initialized */ + int isMallocInit; /* True after malloc is initialized */ + int isPCacheInit; /* True after malloc is initialized */ + int nRefInitMutex; /* Number of users of pInitMutex */ + sqlite3_mutex *pInitMutex; /* Mutex used by sqlite3BtreeInitialize() */ + void (*xLog)(void*,int,const char*); /* Function for logging */ + void *pLogArg; /* First argument to xLog() */ +#ifdef SQLITE_VDBE_COVERAGE + /* The following callback (if not NULL) is invoked on every VDBE branch + ** operation. Set the callback using SQLITE_TESTCTRL_VDBE_COVERAGE. + */ + void (*xVdbeBranch)(void*,int iSrcLine,u8 eThis,u8 eMx); /* Callback */ + void *pVdbeBranchArg; /* 1st argument */ +#endif + int bLocaltimeFault; /* True to fail localtime() calls */ +}; + +#ifndef SQLITE_AMALGAMATION +SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config; +#ifndef SQLITE_OMIT_WSD +SQLITE_PRIVATE int sqlite3PendingByte; +#endif +#endif + +#if (defined(i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \ + defined(_M_AMD64) || defined(_M_ARM) || defined(__x86) || \ + defined(__arm__)) && !defined(SQLITE_RUNTIME_BYTEORDER) +# define SQLITE_BYTEORDER 1234 +# define SQLITE_BIGENDIAN 0 +# define SQLITE_LITTLEENDIAN 1 +# define SQLITE_UTF16NATIVE SQLITE_UTF16LE +#endif +#if (defined(sparc) || defined(__ppc__)) \ + && !defined(SQLITE_RUNTIME_BYTEORDER) +# define SQLITE_BYTEORDER 4321 +# define SQLITE_BIGENDIAN 1 +# define SQLITE_LITTLEENDIAN 0 +# define SQLITE_UTF16NATIVE SQLITE_UTF16BE +#endif +#if !defined(SQLITE_BYTEORDER) +# define SQLITE_BYTEORDER 0 /* 0 means "unknown at compile-time" */ +# define SQLITE_BIGENDIAN (*(char *)(&sqlite3one)==0) +# define SQLITE_LITTLEENDIAN (*(char *)(&sqlite3one)==1) +# define SQLITE_UTF16NATIVE (SQLITE_BIGENDIAN?SQLITE_UTF16BE:SQLITE_UTF16LE) +#endif + +#ifdef SQLITE_ENABLE_ATOMIC_WRITE +SQLITE_PRIVATE int sqlite3JournalOpen(sqlite3_vfs *, const char *, sqlite3_file *, int, int); +SQLITE_PRIVATE int sqlite3JournalSize(sqlite3_vfs *); +SQLITE_PRIVATE int sqlite3JournalCreate(sqlite3_file *); +SQLITE_PRIVATE int sqlite3JournalExists(sqlite3_file *p); +#else + #define sqlite3JournalSize(pVfs) ((pVfs)->szOsFile) + #define sqlite3JournalExists(p) 1 +#endif + +SQLITE_PRIVATE void sqlite3MemJournalOpen(sqlite3_file *); +SQLITE_PRIVATE int sqlite3MemJournalSize(void); +SQLITE_PRIVATE int sqlite3IsMemJournal(sqlite3_file *); + +#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY +SQLITE_PRIVATE void sqlite3ConnectionBlocked(Btree*, Btree*); +SQLITE_PRIVATE void sqlite3ConnectionUnlocked(Btree *db); +SQLITE_PRIVATE void sqlite3ConnectionClosed(Btree *db); +#else + #define sqlite3ConnectionBlocked(x,y) + #define sqlite3ConnectionUnlocked(x) + #define sqlite3ConnectionClosed(x) +#endif + +/* +** If the SQLITE_ENABLE IOTRACE exists then the global variable +** sqlite3IoTrace is a pointer to a printf-like routine used to +** print I/O tracing messages. +*/ +#ifdef SQLITE_ENABLE_IOTRACE +# define IOTRACE(A) if( sqlite3IoTrace ){ sqlite3IoTrace A; } +SQLITE_PRIVATE void sqlite3VdbeIOTraceSql(Vdbe*); +SQLITE_API SQLITE_EXTERN void (SQLITE_CDECL *sqlite3IoTrace)(const char*,...); +#else +# define IOTRACE(A) +# define sqlite3VdbeIOTraceSql(X) +#endif + +SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32); +SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec*, u32); +SQLITE_PRIVATE int sqlite3BitvecTestNotNull(Bitvec*, u32); +SQLITE_PRIVATE int sqlite3BitvecSet(Bitvec*, u32); +SQLITE_PRIVATE void sqlite3BitvecClear(Bitvec*, u32, void*); +SQLITE_PRIVATE void sqlite3BitvecDestroy(Bitvec*); +SQLITE_PRIVATE u32 sqlite3BitvecSize(Bitvec*); + +SQLITE_PRIVATE const void *sqlite3ValueText(Mem*, u8); + +/* +** These routines are available for the mem2.c debugging memory allocator +** only. They are used to verify that different "types" of memory +** allocations are properly tracked by the system. +** +** sqlite3MemdebugSetType() sets the "type" of an allocation to one of +** the MEMTYPE_* macros defined below. The type must be a bitmask with +** a single bit set. +** +** sqlite3MemdebugHasType() returns true if any of the bits in its second +** argument match the type set by the previous sqlite3MemdebugSetType(). +** sqlite3MemdebugHasType() is intended for use inside assert() statements. +** +** sqlite3MemdebugNoType() returns true if none of the bits in its second +** argument match the type set by the previous sqlite3MemdebugSetType(). +** +** Perhaps the most important point is the difference between MEMTYPE_HEAP +** and MEMTYPE_LOOKASIDE. If an allocation is MEMTYPE_LOOKASIDE, that means +** it might have been allocated by lookaside, except the allocation was +** too large or lookaside was already full. It is important to verify +** that allocations that might have been satisfied by lookaside are not +** passed back to non-lookaside free() routines. Asserts such as the +** example above are placed on the non-lookaside free() routines to verify +** this constraint. +** +** All of this is no-op for a production build. It only comes into +** play when the SQLITE_MEMDEBUG compile-time option is used. +*/ +#ifdef SQLITE_MEMDEBUG +SQLITE_PRIVATE void sqlite3MemdebugSetType(void*,u8); +SQLITE_PRIVATE int sqlite3MemdebugHasType(void*,u8); +SQLITE_PRIVATE int sqlite3MemdebugNoType(void*,u8); +#else +# define sqlite3MemdebugSetType(X,Y) /* no-op */ +# define sqlite3MemdebugHasType(X,Y) 1 +# define sqlite3MemdebugNoType(X,Y) 1 +#endif +#define MEMTYPE_HEAP 0x01 /* General heap allocations */ +#define MEMTYPE_LOOKASIDE 0x02 /* Heap that might have been lookaside */ +#define MEMTYPE_SCRATCH 0x04 /* Scratch allocations */ +#define MEMTYPE_PCACHE 0x08 /* Page cache allocations */ + + +SQLITE_PRIVATE i64 sqlite3VdbeIntValue(Mem*); +SQLITE_PRIVATE double sqlite3VdbeRealValue(Mem*); + +SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(int, const void *, UnpackedRecord *, int); + +#endif /* _SQLITEINT_H_ */ + +/************** End of sqliteInt.h *******************************************/ +/************** Begin file global.c ******************************************/ +/* +** 2008 June 13 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains definitions of global variables and constants. +*/ +/* #include "sqliteInt.h" */ + +/* EVIDENCE-OF: R-02982-34736 In order to maintain full backwards +** compatibility for legacy applications, the URI filename capability is +** disabled by default. +** +** EVIDENCE-OF: R-38799-08373 URI filenames can be enabled or disabled +** using the SQLITE_USE_URI=1 or SQLITE_USE_URI=0 compile-time options. +** +** EVIDENCE-OF: R-43642-56306 By default, URI handling is globally +** disabled. The default value may be changed by compiling with the +** SQLITE_USE_URI symbol defined. +*/ +#ifndef SQLITE_USE_URI +# define SQLITE_USE_URI 0 +#endif + +/* EVIDENCE-OF: R-38720-18127 The default setting is determined by the +** SQLITE_ALLOW_COVERING_INDEX_SCAN compile-time option, or is "on" if +** that compile-time option is omitted. +*/ +#ifndef SQLITE_ALLOW_COVERING_INDEX_SCAN +# define SQLITE_ALLOW_COVERING_INDEX_SCAN 1 +#endif + +/* The minimum PMA size is set to this value multiplied by the database +** page size in bytes. +*/ +#ifndef SQLITE_SORTER_PMASZ +# define SQLITE_SORTER_PMASZ 250 +#endif + +/* +** The following singleton contains the global configuration for +** the SQLite library. +*/ +SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = { + SQLITE_DEFAULT_MEMSTATUS, /* bMemstat */ + 1, /* bCoreMutex */ + SQLITE_THREADSAFE==1, /* bFullMutex */ + SQLITE_USE_URI, /* bOpenUri */ + SQLITE_ALLOW_COVERING_INDEX_SCAN, /* bUseCis */ + 0x7ffffffe, /* mxStrlen */ + 0, /* neverCorrupt */ + 128, /* szLookaside */ + 500, /* nLookaside */ + {0,0,0,0,0,0,0,0}, /* m */ + {0,0,0,0,0,0,0,0,0}, /* mutex */ + {0,0,0,0,0,0,0,0,0,0,0,0,0},/* pcache2 */ + (void*)0, /* pHeap */ + 0, /* nHeap */ + 0, 0, /* mnHeap, mxHeap */ + SQLITE_DEFAULT_MMAP_SIZE, /* szMmap */ + SQLITE_MAX_MMAP_SIZE, /* mxMmap */ + (void*)0, /* pScratch */ + 0, /* szScratch */ + 0, /* nScratch */ + (void*)0, /* pPage */ + 0, /* szPage */ + SQLITE_DEFAULT_PCACHE_INITSZ, /* nPage */ + 0, /* mxParserStack */ + 0, /* sharedCacheEnabled */ + SQLITE_SORTER_PMASZ, /* szPma */ + /* All the rest should always be initialized to zero */ + 0, /* isInit */ + 0, /* inProgress */ + 0, /* isMutexInit */ + 0, /* isMallocInit */ + 0, /* isPCacheInit */ + 0, /* nRefInitMutex */ + 0, /* pInitMutex */ + 0, /* xLog */ + 0, /* pLogArg */ +#ifdef SQLITE_VDBE_COVERAGE + 0, /* xVdbeBranch */ + 0, /* pVbeBranchArg */ +#endif + 0 /* bLocaltimeFault */ +}; + +/* +** The value of the "pending" byte must be 0x40000000 (1 byte past the +** 1-gibabyte boundary) in a compatible database. SQLite never uses +** the database page that contains the pending byte. It never attempts +** to read or write that page. The pending byte page is set assign +** for use by the VFS layers as space for managing file locks. +** +** During testing, it is often desirable to move the pending byte to +** a different position in the file. This allows code that has to +** deal with the pending byte to run on files that are much smaller +** than 1 GiB. The sqlite3_test_control() interface can be used to +** move the pending byte. +** +** IMPORTANT: Changing the pending byte to any value other than +** 0x40000000 results in an incompatible database file format! +** Changing the pending byte during operation will result in undefined +** and incorrect behavior. +*/ +#ifndef SQLITE_OMIT_WSD +SQLITE_PRIVATE int sqlite3PendingByte = 0x40000000; +#endif + +/************** End of global.c **********************************************/ +/************** Begin file status.c ******************************************/ +/* +** 2008 June 18 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This module implements the sqlite3_status() interface and related +** functionality. +*/ +/* #include "sqliteInt.h" */ + +/* +** Variables in which to record status information. +*/ +typedef struct sqlite3StatType sqlite3StatType; +static SQLITE_WSD struct sqlite3StatType { +#if SQLITE_PTRSIZE>4 + sqlite3_int64 nowValue[10]; /* Current value */ + sqlite3_int64 mxValue[10]; /* Maximum value */ +#else + u32 nowValue[10]; /* Current value */ + u32 mxValue[10]; /* Maximum value */ +#endif +} sqlite3Stat = { {0,}, {0,} }; + +/* +** Elements of sqlite3Stat[] are protected by either the memory allocator +** mutex, or by the pcache1 mutex. The following array determines which. +*/ +static const char statMutex[] = { + 0, /* SQLITE_STATUS_MEMORY_USED */ + 1, /* SQLITE_STATUS_PAGECACHE_USED */ + 1, /* SQLITE_STATUS_PAGECACHE_OVERFLOW */ + 0, /* SQLITE_STATUS_SCRATCH_USED */ + 0, /* SQLITE_STATUS_SCRATCH_OVERFLOW */ + 0, /* SQLITE_STATUS_MALLOC_SIZE */ + 0, /* SQLITE_STATUS_PARSER_STACK */ + 1, /* SQLITE_STATUS_PAGECACHE_SIZE */ + 0, /* SQLITE_STATUS_SCRATCH_SIZE */ + 0, /* SQLITE_STATUS_MALLOC_COUNT */ +}; + + +/* The "wsdStat" macro will resolve to the status information +** state vector. If writable static data is unsupported on the target, +** we have to locate the state vector at run-time. In the more common +** case where writable static data is supported, wsdStat can refer directly +** to the "sqlite3Stat" state vector declared above. +*/ +#ifdef SQLITE_OMIT_WSD +# define wsdStatInit sqlite3StatType *x = &GLOBAL(sqlite3StatType,sqlite3Stat) +# define wsdStat x[0] +#else +# define wsdStatInit +# define wsdStat sqlite3Stat +#endif + +/* +** Return the current value of a status parameter. The caller must +** be holding the appropriate mutex. +*/ +SQLITE_PRIVATE sqlite3_int64 sqlite3StatusValue(int op){ + wsdStatInit; + assert( op>=0 && op<ArraySize(wsdStat.nowValue) ); + assert( op>=0 && op<ArraySize(statMutex) ); + assert( sqlite3_mutex_held(statMutex[op] ? sqlite3Pcache1Mutex() + : sqlite3MallocMutex()) ); + return wsdStat.nowValue[op]; +} + + +/* +** Add N to the value of a status record. The caller must hold the +** appropriate mutex. (Locking is checked by assert()). +** +** The StatusUp() routine can accept positive or negative values for N. +** The value of N is added to the current status value and the high-water +** mark is adjusted if necessary. +** +** The StatusDown() routine lowers the current value by N. The highwater +** mark is unchanged. N must be non-negative for StatusDown(). +*/ +SQLITE_PRIVATE void sqlite3StatusUp(int op, int N){ + wsdStatInit; + assert( op>=0 && op<ArraySize(wsdStat.nowValue) ); + assert( op>=0 && op<ArraySize(statMutex) ); + assert( sqlite3_mutex_held(statMutex[op] ? sqlite3Pcache1Mutex() + : sqlite3MallocMutex()) ); + wsdStat.nowValue[op] += N; + if( wsdStat.nowValue[op]>wsdStat.mxValue[op] ){ + wsdStat.mxValue[op] = wsdStat.nowValue[op]; + } +} +SQLITE_PRIVATE void sqlite3StatusDown(int op, int N){ + wsdStatInit; + assert( N>=0 ); + assert( op>=0 && op<ArraySize(statMutex) ); + assert( sqlite3_mutex_held(statMutex[op] ? sqlite3Pcache1Mutex() + : sqlite3MallocMutex()) ); + assert( op>=0 && op<ArraySize(wsdStat.nowValue) ); + wsdStat.nowValue[op] -= N; +} + +/* +** Set the value of a status to X. The highwater mark is adjusted if +** necessary. The caller must hold the appropriate mutex. +*/ +SQLITE_PRIVATE void sqlite3StatusSet(int op, int X){ + wsdStatInit; + assert( op>=0 && op<ArraySize(wsdStat.nowValue) ); + assert( op>=0 && op<ArraySize(statMutex) ); + assert( sqlite3_mutex_held(statMutex[op] ? sqlite3Pcache1Mutex() + : sqlite3MallocMutex()) ); + wsdStat.nowValue[op] = X; + if( wsdStat.nowValue[op]>wsdStat.mxValue[op] ){ + wsdStat.mxValue[op] = wsdStat.nowValue[op]; + } +} + +/************** End of status.c **********************************************/ +/************** Begin file os.c **********************************************/ +/* +** 2005 November 29 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains OS interface code that is common to all +** architectures. +*/ +#define _SQLITE_OS_C_ 1 +/* #include "sqliteInt.h" */ +#undef _SQLITE_OS_C_ + +/* +** The default SQLite sqlite3_vfs implementations do not allocate +** memory (actually, os_unix.c allocates a small amount of memory +** from within OsOpen()), but some third-party implementations may. +** So we test the effects of a malloc() failing and the sqlite3OsXXX() +** function returning SQLITE_IOERR_NOMEM using the DO_OS_MALLOC_TEST macro. +** +** The following functions are instrumented for malloc() failure +** testing: +** +** sqlite3OsRead() +** sqlite3OsWrite() +** sqlite3OsSync() +** sqlite3OsFileSize() +** sqlite3OsLock() +** sqlite3OsCheckReservedLock() +** sqlite3OsFileControl() +** sqlite3OsShmMap() +** sqlite3OsOpen() +** sqlite3OsDelete() +** sqlite3OsAccess() +** sqlite3OsFullPathname() +** +*/ +#if defined(SQLITE_TEST) +SQLITE_PRIVATE int sqlite3_memdebug_vfs_oom_test = 1; + #define DO_OS_MALLOC_TEST(x) \ + if (sqlite3_memdebug_vfs_oom_test && (!x || !sqlite3IsMemJournal(x))) { \ + void *pTstAlloc = sqlite3Malloc(10); \ + if (!pTstAlloc) return SQLITE_IOERR_NOMEM; \ + sqlite3_free(pTstAlloc); \ + } +#else + #define DO_OS_MALLOC_TEST(x) +#endif + +/* +** The following routines are convenience wrappers around methods +** of the sqlite3_file object. This is mostly just syntactic sugar. All +** of this would be completely automatic if SQLite were coded using +** C++ instead of plain old C. +*/ +SQLITE_PRIVATE int sqlite3OsClose(sqlite3_file *pId){ + int rc = SQLITE_OK; + if( pId->pMethods ){ + rc = pId->pMethods->xClose(pId); + pId->pMethods = 0; + } + return rc; +} +SQLITE_PRIVATE int sqlite3OsRead(sqlite3_file *id, void *pBuf, int amt, i64 offset){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xRead(id, pBuf, amt, offset); +} +SQLITE_PRIVATE int sqlite3OsWrite(sqlite3_file *id, const void *pBuf, int amt, i64 offset){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xWrite(id, pBuf, amt, offset); +} +SQLITE_PRIVATE int sqlite3OsTruncate(sqlite3_file *id, i64 size){ + return id->pMethods->xTruncate(id, size); +} +SQLITE_PRIVATE int sqlite3OsSync(sqlite3_file *id, int flags){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xSync(id, flags); +} +SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file *id, i64 *pSize){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xFileSize(id, pSize); +} +SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file *id, int lockType){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xLock(id, lockType); +} +SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file *id, int lockType){ + return id->pMethods->xUnlock(id, lockType); +} +SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xCheckReservedLock(id, pResOut); +} + +/* +** Use sqlite3OsFileControl() when we are doing something that might fail +** and we need to know about the failures. Use sqlite3OsFileControlHint() +** when simply tossing information over the wall to the VFS and we do not +** really care if the VFS receives and understands the information since it +** is only a hint and can be safely ignored. The sqlite3OsFileControlHint() +** routine has no return value since the return value would be meaningless. +*/ +SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file *id, int op, void *pArg){ +#ifdef SQLITE_TEST + if( op!=SQLITE_FCNTL_COMMIT_PHASETWO ){ + /* Faults are not injected into COMMIT_PHASETWO because, assuming SQLite + ** is using a regular VFS, it is called after the corresponding + ** transaction has been committed. Injecting a fault at this point + ** confuses the test scripts - the COMMIT comand returns SQLITE_NOMEM + ** but the transaction is committed anyway. + ** + ** The core must call OsFileControl() though, not OsFileControlHint(), + ** as if a custom VFS (e.g. zipvfs) returns an error here, it probably + ** means the commit really has failed and an error should be returned + ** to the user. */ + DO_OS_MALLOC_TEST(id); + } +#endif + return id->pMethods->xFileControl(id, op, pArg); +} +SQLITE_PRIVATE void sqlite3OsFileControlHint(sqlite3_file *id, int op, void *pArg){ + (void)id->pMethods->xFileControl(id, op, pArg); +} + +SQLITE_PRIVATE int sqlite3OsSectorSize(sqlite3_file *id){ + int (*xSectorSize)(sqlite3_file*) = id->pMethods->xSectorSize; + return (xSectorSize ? xSectorSize(id) : SQLITE_DEFAULT_SECTOR_SIZE); +} +SQLITE_PRIVATE int sqlite3OsDeviceCharacteristics(sqlite3_file *id){ + return id->pMethods->xDeviceCharacteristics(id); +} +SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){ + return id->pMethods->xShmLock(id, offset, n, flags); +} +SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id){ + id->pMethods->xShmBarrier(id); +} +SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int deleteFlag){ + return id->pMethods->xShmUnmap(id, deleteFlag); +} +SQLITE_PRIVATE int sqlite3OsShmMap( + sqlite3_file *id, /* Database file handle */ + int iPage, + int pgsz, + int bExtend, /* True to extend file if necessary */ + void volatile **pp /* OUT: Pointer to mapping */ +){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xShmMap(id, iPage, pgsz, bExtend, pp); +} + +#if SQLITE_MAX_MMAP_SIZE>0 +/* The real implementation of xFetch and xUnfetch */ +SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xFetch(id, iOff, iAmt, pp); +} +SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){ + return id->pMethods->xUnfetch(id, iOff, p); +} +#else +/* No-op stubs to use when memory-mapped I/O is disabled */ +SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){ + *pp = 0; + return SQLITE_OK; +} +SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){ + return SQLITE_OK; +} +#endif + +/* +** The next group of routines are convenience wrappers around the +** VFS methods. +*/ +SQLITE_PRIVATE int sqlite3OsOpen( + sqlite3_vfs *pVfs, + const char *zPath, + sqlite3_file *pFile, + int flags, + int *pFlagsOut +){ + int rc; + DO_OS_MALLOC_TEST(0); + /* 0x87f7f is a mask of SQLITE_OPEN_ flags that are valid to be passed + ** down into the VFS layer. Some SQLITE_OPEN_ flags (for example, + ** SQLITE_OPEN_FULLMUTEX or SQLITE_OPEN_SHAREDCACHE) are blocked before + ** reaching the VFS. */ + rc = pVfs->xOpen(pVfs, zPath, pFile, flags & 0x87f7f, pFlagsOut); + assert( rc==SQLITE_OK || pFile->pMethods==0 ); + return rc; +} +SQLITE_PRIVATE int sqlite3OsDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){ + DO_OS_MALLOC_TEST(0); + assert( dirSync==0 || dirSync==1 ); + return pVfs->xDelete(pVfs, zPath, dirSync); +} +SQLITE_PRIVATE int sqlite3OsAccess( + sqlite3_vfs *pVfs, + const char *zPath, + int flags, + int *pResOut +){ + DO_OS_MALLOC_TEST(0); + return pVfs->xAccess(pVfs, zPath, flags, pResOut); +} +SQLITE_PRIVATE int sqlite3OsFullPathname( + sqlite3_vfs *pVfs, + const char *zPath, + int nPathOut, + char *zPathOut +){ + DO_OS_MALLOC_TEST(0); + zPathOut[0] = 0; + return pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut); +} +SQLITE_PRIVATE int sqlite3OsRandomness(sqlite3_vfs *pVfs, int nByte, char *zBufOut){ + return pVfs->xRandomness(pVfs, nByte, zBufOut); +} +SQLITE_PRIVATE int sqlite3OsSleep(sqlite3_vfs *pVfs, int nMicro){ + return pVfs->xSleep(pVfs, nMicro); +} + +/* +** This function is a wrapper around the OS specific implementation of +** sqlite3_os_init(). The purpose of the wrapper is to provide the +** ability to simulate a malloc failure, so that the handling of an +** error in sqlite3_os_init() by the upper layers can be tested. +*/ +SQLITE_PRIVATE int sqlite3OsInit(void){ + void *p = sqlite3_malloc(10); + if( p==0 ) return SQLITE_NOMEM; + sqlite3_free(p); + return sqlite3_os_init(); +} + +/* +** The list of all registered VFS implementations. +*/ +static sqlite3_vfs * SQLITE_WSD vfsList = 0; +#define vfsList GLOBAL(sqlite3_vfs *, vfsList) + +/* +** Locate a VFS by name. If no name is given, simply return the +** first VFS on the list. +*/ +SQLITE_PRIVATE sqlite3_vfs *sqlite3_vfs_find(const char *zVfs){ + sqlite3_vfs *pVfs = 0; +#if SQLITE_THREADSAFE + sqlite3_mutex *mutex; +#endif +#ifndef SQLITE_OMIT_AUTOINIT + int rc = sqlite3BtreeInitialize(); + if( rc ) return 0; +#endif +#if SQLITE_THREADSAFE + mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); +#endif + sqlite3_mutex_enter(mutex); + for(pVfs = vfsList; pVfs; pVfs=pVfs->pNext){ + if( zVfs==0 ) break; + if( strcmp(zVfs, pVfs->zName)==0 ) break; + } + sqlite3_mutex_leave(mutex); + return pVfs; +} + +/* +** Unlink a VFS from the linked list +*/ +static void vfsUnlink(sqlite3_vfs *pVfs){ + assert( sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)) ); + if( pVfs==0 ){ + /* No-op */ + }else if( vfsList==pVfs ){ + vfsList = pVfs->pNext; + }else if( vfsList ){ + sqlite3_vfs *p = vfsList; + while( p->pNext && p->pNext!=pVfs ){ + p = p->pNext; + } + if( p->pNext==pVfs ){ + p->pNext = pVfs->pNext; + } + } +} + +/* +** Register a VFS with the system. It is harmless to register the same +** VFS multiple times. The new VFS becomes the default if makeDflt is +** true. +*/ +SQLITE_PRIVATE int sqlite3_vfs_register(sqlite3_vfs *pVfs, int makeDflt){ + MUTEX_LOGIC(sqlite3_mutex *mutex;) +#ifndef SQLITE_OMIT_AUTOINIT + int rc = sqlite3BtreeInitialize(); + if( rc ) return rc; +#endif +#ifdef SQLITE_ENABLE_API_ARMOR + if( pVfs==0 ) return SQLITE_MISUSE_BKPT; +#endif + + MUTEX_LOGIC( mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); ) + sqlite3_mutex_enter(mutex); + vfsUnlink(pVfs); + if( makeDflt || vfsList==0 ){ + pVfs->pNext = vfsList; + vfsList = pVfs; + }else{ + pVfs->pNext = vfsList->pNext; + vfsList->pNext = pVfs; + } + assert(vfsList); + sqlite3_mutex_leave(mutex); + return SQLITE_OK; +} + +/************** End of os.c **************************************************/ +/************** Begin file mem1.c ********************************************/ +/* +** 2007 August 14 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains low-level memory allocation drivers for when +** SQLite will use the standard C-library malloc/realloc/free interface +** to obtain the memory it needs. +** +** This file contains implementations of the low-level memory allocation +** routines specified in the sqlite3_mem_methods object. The content of +** this file is only used if SQLITE_SYSTEM_MALLOC is defined. The +** SQLITE_SYSTEM_MALLOC macro is defined automatically if neither the +** SQLITE_MEMDEBUG nor the SQLITE_WIN32_MALLOC macros are defined. The +** default configuration is to use memory allocation routines in this +** file. +** +** C-preprocessor macro summary: +** +** HAVE_MALLOC_USABLE_SIZE The configure script sets this symbol if +** the malloc_usable_size() interface exists +** on the target platform. Or, this symbol +** can be set manually, if desired. +** If an equivalent interface exists by +** a different name, using a separate -D +** option to rename it. +** +** SQLITE_WITHOUT_ZONEMALLOC Some older macs lack support for the zone +** memory allocator. Set this symbol to enable +** building on older macs. +** +** SQLITE_WITHOUT_MSIZE Set this symbol to disable the use of +** _msize() on windows systems. This might +** be necessary when compiling for Delphi, +** for example. +*/ +/* #include "sqliteInt.h" */ + +/* +** This version of the memory allocator is the default. It is +** used when no other memory allocator is specified using compile-time +** macros. +*/ +#ifdef SQLITE_SYSTEM_MALLOC +#if defined(__APPLE__) && !defined(SQLITE_WITHOUT_ZONEMALLOC) + +/* +** Use the zone allocator available on apple products unless the +** SQLITE_WITHOUT_ZONEMALLOC symbol is defined. +*/ +#include <sys/sysctl.h> +#include <malloc/malloc.h> +#include <libkern/OSAtomic.h> +static malloc_zone_t* _sqliteZone_; +#define SQLITE_MALLOC(x) malloc_zone_malloc(_sqliteZone_, (x)) +#define SQLITE_FREE(x) malloc_zone_free(_sqliteZone_, (x)); +#define SQLITE_REALLOC(x,y) malloc_zone_realloc(_sqliteZone_, (x), (y)) +#define SQLITE_MALLOCSIZE(x) \ + (_sqliteZone_ ? _sqliteZone_->size(_sqliteZone_,x) : malloc_size(x)) + +#else /* if not __APPLE__ */ + +/* +** Use standard C library malloc and free on non-Apple systems. +** Also used by Apple systems if SQLITE_WITHOUT_ZONEMALLOC is defined. +*/ +#define SQLITE_MALLOC(x) malloc(x) +#define SQLITE_FREE(x) free(x) +#define SQLITE_REALLOC(x,y) realloc((x),(y)) + +/* +** The malloc.h header file is needed for malloc_usable_size() function +** on some systems (e.g. Linux). +*/ +#if HAVE_MALLOC_H && HAVE_MALLOC_USABLE_SIZE +# define SQLITE_USE_MALLOC_H 1 +# define SQLITE_USE_MALLOC_USABLE_SIZE 1 +/* +** The MSVCRT has malloc_usable_size(), but it is called _msize(). The +** use of _msize() is automatic, but can be disabled by compiling with +** -DSQLITE_WITHOUT_MSIZE. Using the _msize() function also requires +** the malloc.h header file. +*/ +#elif defined(_MSC_VER) && !defined(SQLITE_WITHOUT_MSIZE) +# define SQLITE_USE_MALLOC_H +# define SQLITE_USE_MSIZE +#endif + +/* +** Include the malloc.h header file, if necessary. Also set define macro +** SQLITE_MALLOCSIZE to the appropriate function name, which is _msize() +** for MSVC and malloc_usable_size() for most other systems (e.g. Linux). +** The memory size function can always be overridden manually by defining +** the macro SQLITE_MALLOCSIZE to the desired function name. +*/ +#if defined(SQLITE_USE_MALLOC_H) +# include <malloc.h> +# if defined(SQLITE_USE_MALLOC_USABLE_SIZE) +# if !defined(SQLITE_MALLOCSIZE) +# define SQLITE_MALLOCSIZE(x) malloc_usable_size(x) +# endif +# elif defined(SQLITE_USE_MSIZE) +# if !defined(SQLITE_MALLOCSIZE) +# define SQLITE_MALLOCSIZE _msize +# endif +# endif +#endif /* defined(SQLITE_USE_MALLOC_H) */ + +#endif /* __APPLE__ or not __APPLE__ */ + +/* +** Like malloc(), but remember the size of the allocation +** so that we can find it later using sqlite3MemSize(). +** +** For this low-level routine, we are guaranteed that nByte>0 because +** cases of nByte<=0 will be intercepted and dealt with by higher level +** routines. +*/ +static void *sqlite3MemMalloc(int nByte){ +#ifdef SQLITE_MALLOCSIZE + void *p = SQLITE_MALLOC( nByte ); + if( p==0 ){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_NOMEM, "failed to allocate %u bytes of memory", nByte); + } + return p; +#else + sqlite3_int64 *p; + assert( nByte>0 ); + nByte = ROUND8(nByte); + p = SQLITE_MALLOC( nByte+8 ); + if( p ){ + p[0] = nByte; + p++; + }else{ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_NOMEM, "failed to allocate %u bytes of memory", nByte); + } + return (void *)p; +#endif +} + +/* +** Like free() but works for allocations obtained from sqlite3MemMalloc() +** or sqlite3MemRealloc(). +** +** For this low-level routine, we already know that pPrior!=0 since +** cases where pPrior==0 will have been intecepted and dealt with +** by higher-level routines. +*/ +static void sqlite3MemFree(void *pPrior){ +#ifdef SQLITE_MALLOCSIZE + SQLITE_FREE(pPrior); +#else + sqlite3_int64 *p = (sqlite3_int64*)pPrior; + assert( pPrior!=0 ); + p--; + SQLITE_FREE(p); +#endif +} + +/* +** Report the allocated size of a prior return from xMalloc() +** or xRealloc(). +*/ +static int sqlite3MemSize(void *pPrior){ +#ifdef SQLITE_MALLOCSIZE + return pPrior ? (int)SQLITE_MALLOCSIZE(pPrior) : 0; +#else + sqlite3_int64 *p; + if( pPrior==0 ) return 0; + p = (sqlite3_int64*)pPrior; + p--; + return (int)p[0]; +#endif +} + +/* +** Like realloc(). Resize an allocation previously obtained from +** sqlite3MemMalloc(). +** +** For this low-level interface, we know that pPrior!=0. Cases where +** pPrior==0 while have been intercepted by higher-level routine and +** redirected to xMalloc. Similarly, we know that nByte>0 because +** cases where nByte<=0 will have been intercepted by higher-level +** routines and redirected to xFree. +*/ +static void *sqlite3MemRealloc(void *pPrior, int nByte){ +#ifdef SQLITE_MALLOCSIZE + void *p = SQLITE_REALLOC(pPrior, nByte); + if( p==0 ){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_NOMEM, + "failed memory resize %u to %u bytes", + SQLITE_MALLOCSIZE(pPrior), nByte); + } + return p; +#else + sqlite3_int64 *p = (sqlite3_int64*)pPrior; + assert( pPrior!=0 && nByte>0 ); + assert( nByte==ROUND8(nByte) ); /* EV: R-46199-30249 */ + p--; + p = SQLITE_REALLOC(p, nByte+8 ); + if( p ){ + p[0] = nByte; + p++; + }else{ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_NOMEM, + "failed memory resize %u to %u bytes", + sqlite3MemSize(pPrior), nByte); + } + return (void*)p; +#endif +} + +/* +** Round up a request size to the next valid allocation size. +*/ +static int sqlite3MemRoundup(int n){ + return ROUND8(n); +} + +/* +** Initialize this module. +*/ +static int sqlite3MemInit(void *NotUsed){ +#if defined(__APPLE__) && !defined(SQLITE_WITHOUT_ZONEMALLOC) + int cpuCount; + size_t len; + if( _sqliteZone_ ){ + return SQLITE_OK; + } + len = sizeof(cpuCount); + /* One usually wants to use hw.acctivecpu for MT decisions, but not here */ + sysctlbyname("hw.ncpu", &cpuCount, &len, NULL, 0); + if( cpuCount>1 ){ + /* defer MT decisions to system malloc */ + _sqliteZone_ = malloc_default_zone(); + }else{ + /* only 1 core, use our own zone to contention over global locks, + ** e.g. we have our own dedicated locks */ + bool success; + malloc_zone_t* newzone = malloc_create_zone(4096, 0); + malloc_set_zone_name(newzone, "Sqlite_Heap"); + do{ + success = OSAtomicCompareAndSwapPtrBarrier(NULL, newzone, + (void * volatile *)&_sqliteZone_); + }while(!_sqliteZone_); + if( !success ){ + /* somebody registered a zone first */ + malloc_destroy_zone(newzone); + } + } +#endif + UNUSED_PARAMETER(NotUsed); + return SQLITE_OK; +} + +/* +** Deinitialize this module. +*/ +static void sqlite3MemShutdown(void *NotUsed){ + UNUSED_PARAMETER(NotUsed); + return; +} + +/* +** This routine is the only routine in this file with external linkage. +** +** Populate the low-level memory allocation function pointers in +** sqlite3GlobalConfig.m with pointers to the routines in this file. +*/ +SQLITE_PRIVATE void sqlite3MemSetDefault(void){ + static const sqlite3_mem_methods defaultMethods = { + sqlite3MemMalloc, + sqlite3MemFree, + sqlite3MemRealloc, + sqlite3MemSize, + sqlite3MemRoundup, + sqlite3MemInit, + sqlite3MemShutdown, + 0 + }; + sqlite3_config(SQLITE_CONFIG_MALLOC, &defaultMethods); +} + +#endif /* SQLITE_SYSTEM_MALLOC */ + +/************** End of mem1.c ************************************************/ +/************** Begin file mutex.c *******************************************/ +/* +** 2007 August 14 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement mutexes. +** +** This file contains code that is common across all mutex implementations. +*/ +/* #include "sqliteInt.h" */ + +#if defined(SQLITE_DEBUG) && !defined(SQLITE_MUTEX_OMIT) +/* +** For debugging purposes, record when the mutex subsystem is initialized +** and uninitialized so that we can assert() if there is an attempt to +** allocate a mutex while the system is uninitialized. +*/ +static SQLITE_WSD int mutexIsInit = 0; +#endif /* SQLITE_DEBUG && !defined(SQLITE_MUTEX_OMIT) */ + + +#ifndef SQLITE_MUTEX_OMIT +/* +** Initialize the mutex system. +*/ +SQLITE_PRIVATE int sqlite3MutexInit(void){ + int rc = SQLITE_OK; + if( !sqlite3GlobalConfig.mutex.xMutexAlloc ){ + /* If the xMutexAlloc method has not been set, then the user did not + ** install a mutex implementation via sqlite3_config() prior to + ** sqlite3BtreeInitialize() being called. This block copies pointers to + ** the default implementation into the sqlite3GlobalConfig structure. + */ + sqlite3_mutex_methods const *pFrom; + sqlite3_mutex_methods *pTo = &sqlite3GlobalConfig.mutex; + + if( sqlite3GlobalConfig.bCoreMutex ){ + pFrom = sqlite3DefaultMutex(); + }else{ + pFrom = sqlite3NoopMutex(); + } + pTo->xMutexInit = pFrom->xMutexInit; + pTo->xMutexEnd = pFrom->xMutexEnd; + pTo->xMutexFree = pFrom->xMutexFree; + pTo->xMutexEnter = pFrom->xMutexEnter; + pTo->xMutexTry = pFrom->xMutexTry; + pTo->xMutexLeave = pFrom->xMutexLeave; + pTo->xMutexHeld = pFrom->xMutexHeld; + pTo->xMutexNotheld = pFrom->xMutexNotheld; + sqlite3MemoryBarrier(); + pTo->xMutexAlloc = pFrom->xMutexAlloc; + } + assert( sqlite3GlobalConfig.mutex.xMutexInit ); + rc = sqlite3GlobalConfig.mutex.xMutexInit(); + +#ifdef SQLITE_DEBUG + GLOBAL(int, mutexIsInit) = 1; +#endif + + return rc; +} + +/* +** Shutdown the mutex system. This call frees resources allocated by +** sqlite3MutexInit(). +*/ +SQLITE_PRIVATE int sqlite3MutexEnd(void){ + int rc = SQLITE_OK; + if( sqlite3GlobalConfig.mutex.xMutexEnd ){ + rc = sqlite3GlobalConfig.mutex.xMutexEnd(); + } + +#ifdef SQLITE_DEBUG + GLOBAL(int, mutexIsInit) = 0; +#endif + + return rc; +} + +/* +** Retrieve a pointer to a static mutex or allocate a new dynamic one. +*/ +SQLITE_PRIVATE sqlite3_mutex *sqlite3_mutex_alloc(int id){ +#ifndef SQLITE_OMIT_AUTOINIT + if( id<=SQLITE_MUTEX_RECURSIVE && sqlite3BtreeInitialize() ) return 0; + if( id>SQLITE_MUTEX_RECURSIVE && sqlite3MutexInit() ) return 0; +#endif + assert( sqlite3GlobalConfig.mutex.xMutexAlloc ); + return sqlite3GlobalConfig.mutex.xMutexAlloc(id); +} + +SQLITE_PRIVATE sqlite3_mutex *sqlite3MutexAlloc(int id){ + if( !sqlite3GlobalConfig.bCoreMutex ){ + return 0; + } + assert( GLOBAL(int, mutexIsInit) ); + assert( sqlite3GlobalConfig.mutex.xMutexAlloc ); + return sqlite3GlobalConfig.mutex.xMutexAlloc(id); +} + +/* +** Free a dynamic mutex. +*/ +SQLITE_PRIVATE void sqlite3_mutex_free(sqlite3_mutex *p){ + if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexFree ); + sqlite3GlobalConfig.mutex.xMutexFree(p); + } +} + +/* +** Obtain the mutex p. If some other thread already has the mutex, block +** until it can be obtained. +*/ +SQLITE_PRIVATE void sqlite3_mutex_enter(sqlite3_mutex *p){ + if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexEnter ); + sqlite3GlobalConfig.mutex.xMutexEnter(p); + } +} + +/* +** Obtain the mutex p. If successful, return SQLITE_OK. Otherwise, if another +** thread holds the mutex and it cannot be obtained, return SQLITE_BUSY. +*/ +SQLITE_PRIVATE int sqlite3_mutex_try(sqlite3_mutex *p){ + int rc = SQLITE_OK; + if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexTry ); + return sqlite3GlobalConfig.mutex.xMutexTry(p); + } + return rc; +} + +/* +** The sqlite3_mutex_leave() routine exits a mutex that was previously +** entered by the same thread. The behavior is undefined if the mutex +** is not currently entered. If a NULL pointer is passed as an argument +** this function is a no-op. +*/ +SQLITE_PRIVATE void sqlite3_mutex_leave(sqlite3_mutex *p){ + if( p ){ + assert( sqlite3GlobalConfig.mutex.xMutexLeave ); + sqlite3GlobalConfig.mutex.xMutexLeave(p); + } +} + +#ifndef NDEBUG +/* +** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are +** intended for use inside assert() statements. +*/ +SQLITE_PRIVATE int sqlite3_mutex_held(sqlite3_mutex *p){ + assert( p==0 || sqlite3GlobalConfig.mutex.xMutexHeld ); + return p==0 || sqlite3GlobalConfig.mutex.xMutexHeld(p); +} +SQLITE_PRIVATE int sqlite3_mutex_notheld(sqlite3_mutex *p){ + assert( p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld ); + return p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld(p); +} +#endif + +#endif /* !defined(SQLITE_MUTEX_OMIT) */ + +/************** End of mutex.c ***********************************************/ +/************** Begin file mutex_noop.c **************************************/ +/* +** 2008 October 07 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement mutexes. +** +** This implementation in this file does not provide any mutual +** exclusion and is thus suitable for use only in applications +** that use SQLite in a single thread. The routines defined +** here are place-holders. Applications can substitute working +** mutex routines at start-time using the +** +** sqlite3_config(SQLITE_CONFIG_MUTEX,...) +** +** interface. +** +** If compiled with SQLITE_DEBUG, then additional logic is inserted +** that does error checking on mutexes to make sure they are being +** called correctly. +*/ +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_MUTEX_OMIT + +#ifndef SQLITE_DEBUG +/* +** Stub routines for all mutex methods. +** +** This routines provide no mutual exclusion or error checking. +*/ +static int noopMutexInit(void){ return SQLITE_OK; } +static int noopMutexEnd(void){ return SQLITE_OK; } +static sqlite3_mutex *noopMutexAlloc(int id){ + UNUSED_PARAMETER(id); + return (sqlite3_mutex*)8; +} +static void noopMutexFree(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; } +static void noopMutexEnter(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; } +static int noopMutexTry(sqlite3_mutex *p){ + UNUSED_PARAMETER(p); + return SQLITE_OK; +} +static void noopMutexLeave(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; } + +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void){ + static const sqlite3_mutex_methods sMutex = { + noopMutexInit, + noopMutexEnd, + noopMutexAlloc, + noopMutexFree, + noopMutexEnter, + noopMutexTry, + noopMutexLeave, + + 0, + 0, + }; + + return &sMutex; +} +#endif /* !SQLITE_DEBUG */ + +#ifdef SQLITE_DEBUG +/* +** In this implementation, error checking is provided for testing +** and debugging purposes. The mutexes still do not provide any +** mutual exclusion. +*/ + +/* +** The mutex object +*/ +typedef struct sqlite3_debug_mutex { + int id; /* The mutex type */ + int cnt; /* Number of entries without a matching leave */ +} sqlite3_debug_mutex; + +/* +** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are +** intended for use inside assert() statements. +*/ +static int debugMutexHeld(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + return p==0 || p->cnt>0; +} +static int debugMutexNotheld(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + return p==0 || p->cnt==0; +} + +/* +** Initialize and deinitialize the mutex subsystem. +*/ +static int debugMutexInit(void){ return SQLITE_OK; } +static int debugMutexEnd(void){ return SQLITE_OK; } + +/* +** The sqlite3_mutex_alloc() routine allocates a new +** mutex and returns a pointer to it. If it returns NULL +** that means that a mutex could not be allocated. +*/ +static sqlite3_mutex *debugMutexAlloc(int id){ + static sqlite3_debug_mutex aStatic[SQLITE_MUTEX_STATIC_VFS3 - 1]; + sqlite3_debug_mutex *pNew = 0; + switch( id ){ + case SQLITE_MUTEX_FAST: + case SQLITE_MUTEX_RECURSIVE: { + pNew = sqlite3Malloc(sizeof(*pNew)); + if( pNew ){ + pNew->id = id; + pNew->cnt = 0; + } + break; + } + default: { +#ifdef SQLITE_ENABLE_API_ARMOR + if( id-2<0 || id-2>=ArraySize(aStatic) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + pNew = &aStatic[id-2]; + pNew->id = id; + break; + } + } + return (sqlite3_mutex*)pNew; +} + +/* +** This routine deallocates a previously allocated mutex. +*/ +static void debugMutexFree(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + assert( p->cnt==0 ); + if( p->id==SQLITE_MUTEX_RECURSIVE || p->id==SQLITE_MUTEX_FAST ){ + sqlite3_free(p); + }else{ +#ifdef SQLITE_ENABLE_API_ARMOR + (void)SQLITE_MISUSE_BKPT; +#endif + } +} + +/* +** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt +** to enter a mutex. If another thread is already within the mutex, +** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return +** SQLITE_BUSY. The sqlite3_mutex_try() interface returns SQLITE_OK +** upon successful entry. Mutexes created using SQLITE_MUTEX_RECURSIVE can +** be entered multiple times by the same thread. In such cases the, +** mutex must be exited an equal number of times before another thread +** can enter. If the same thread tries to enter any other kind of mutex +** more than once, the behavior is undefined. +*/ +static void debugMutexEnter(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) ); + p->cnt++; +} +static int debugMutexTry(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) ); + p->cnt++; + return SQLITE_OK; +} + +/* +** The sqlite3_mutex_leave() routine exits a mutex that was +** previously entered by the same thread. The behavior +** is undefined if the mutex is not currently entered or +** is not currently allocated. SQLite will never do either. +*/ +static void debugMutexLeave(sqlite3_mutex *pX){ + sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; + assert( debugMutexHeld(pX) ); + p->cnt--; + assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) ); +} + +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void){ + static const sqlite3_mutex_methods sMutex = { + debugMutexInit, + debugMutexEnd, + debugMutexAlloc, + debugMutexFree, + debugMutexEnter, + debugMutexTry, + debugMutexLeave, + + debugMutexHeld, + debugMutexNotheld + }; + + return &sMutex; +} +#endif /* SQLITE_DEBUG */ + +/* +** If compiled with SQLITE_MUTEX_NOOP, then the no-op mutex implementation +** is used regardless of the run-time threadsafety setting. +*/ +#ifdef SQLITE_MUTEX_NOOP +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ + return sqlite3NoopMutex(); +} +#endif /* defined(SQLITE_MUTEX_NOOP) */ +#endif /* !defined(SQLITE_MUTEX_OMIT) */ + +/************** End of mutex_noop.c ******************************************/ +/************** Begin file mutex_unix.c **************************************/ +/* +** 2007 August 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement mutexes for pthreads +*/ +/* #include "sqliteInt.h" */ + +/* +** The code in this file is only used if we are compiling threadsafe +** under unix with pthreads. +** +** Note that this implementation requires a version of pthreads that +** supports recursive mutexes. +*/ +#ifdef SQLITE_MUTEX_PTHREADS + +#include <pthread.h> + +/* +** The sqlite3_mutex.id, sqlite3_mutex.nRef, and sqlite3_mutex.owner fields +** are necessary under two condidtions: (1) Debug builds and (2) using +** home-grown mutexes. Encapsulate these conditions into a single #define. +*/ +#if defined(SQLITE_DEBUG) || defined(SQLITE_HOMEGROWN_RECURSIVE_MUTEX) +# define SQLITE_MUTEX_NREF 1 +#else +# define SQLITE_MUTEX_NREF 0 +#endif + +/* +** Each recursive mutex is an instance of the following structure. +*/ +struct sqlite3_mutex { + pthread_mutex_t mutex; /* Mutex controlling the lock */ +#if SQLITE_MUTEX_NREF || defined(SQLITE_ENABLE_API_ARMOR) + int id; /* Mutex type */ +#endif +#if SQLITE_MUTEX_NREF + volatile int nRef; /* Number of entrances */ + volatile pthread_t owner; /* Thread that is within this mutex */ + int trace; /* True to trace changes */ +#endif +}; +#if SQLITE_MUTEX_NREF +#define SQLITE3_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER, 0, 0, (pthread_t)0, 0 } +#else +#define SQLITE3_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER } +#endif + +/* +** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are +** intended for use only inside assert() statements. On some platforms, +** there might be race conditions that can cause these routines to +** deliver incorrect results. In particular, if pthread_equal() is +** not an atomic operation, then these routines might delivery +** incorrect results. On most platforms, pthread_equal() is a +** comparison of two integers and is therefore atomic. But we are +** told that HPUX is not such a platform. If so, then these routines +** will not always work correctly on HPUX. +** +** On those platforms where pthread_equal() is not atomic, SQLite +** should be compiled without -DSQLITE_DEBUG and with -DNDEBUG to +** make sure no assert() statements are evaluated and hence these +** routines are never called. +*/ +#if !defined(NDEBUG) || defined(SQLITE_DEBUG) +static int pthreadMutexHeld(sqlite3_mutex *p){ + return (p->nRef!=0 && pthread_equal(p->owner, pthread_self())); +} +static int pthreadMutexNotheld(sqlite3_mutex *p){ + return p->nRef==0 || pthread_equal(p->owner, pthread_self())==0; +} +#endif + +/* +** Try to provide a memory barrier operation, needed for initialization +** and also for the implementation of xShmBarrier in the VFS in cases +** where SQLite is compiled without mutexes. +*/ +SQLITE_PRIVATE void sqlite3MemoryBarrier(void){ +#if defined(SQLITE_MEMORY_BARRIER) + SQLITE_MEMORY_BARRIER; +#elif defined(__GNUC__) && GCC_VERSION>=4001000 + __sync_synchronize(); +#endif +} + +/* +** Initialize and deinitialize the mutex subsystem. +*/ +static int pthreadMutexInit(void){ return SQLITE_OK; } +static int pthreadMutexEnd(void){ return SQLITE_OK; } + +/* +** The sqlite3_mutex_alloc() routine allocates a new +** mutex and returns a pointer to it. If it returns NULL +** that means that a mutex could not be allocated. SQLite +** will unwind its stack and return an error. The argument +** to sqlite3_mutex_alloc() is one of these integer constants: +** +** <ul> +** <li> SQLITE_MUTEX_FAST +** <li> SQLITE_MUTEX_RECURSIVE +** <li> SQLITE_MUTEX_STATIC_MASTER +** <li> SQLITE_MUTEX_STATIC_MEM +** <li> SQLITE_MUTEX_STATIC_OPEN +** <li> SQLITE_MUTEX_STATIC_PRNG +** <li> SQLITE_MUTEX_STATIC_LRU +** <li> SQLITE_MUTEX_STATIC_PMEM +** <li> SQLITE_MUTEX_STATIC_APP1 +** <li> SQLITE_MUTEX_STATIC_APP2 +** <li> SQLITE_MUTEX_STATIC_APP3 +** <li> SQLITE_MUTEX_STATIC_VFS1 +** <li> SQLITE_MUTEX_STATIC_VFS2 +** <li> SQLITE_MUTEX_STATIC_VFS3 +** </ul> +** +** The first two constants cause sqlite3_mutex_alloc() to create +** a new mutex. The new mutex is recursive when SQLITE_MUTEX_RECURSIVE +** is used but not necessarily so when SQLITE_MUTEX_FAST is used. +** The mutex implementation does not need to make a distinction +** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does +** not want to. But SQLite will only request a recursive mutex in +** cases where it really needs one. If a faster non-recursive mutex +** implementation is available on the host platform, the mutex subsystem +** might return such a mutex in response to SQLITE_MUTEX_FAST. +** +** The other allowed parameters to sqlite3_mutex_alloc() each return +** a pointer to a static preexisting mutex. Six static mutexes are +** used by the current version of SQLite. Future versions of SQLite +** may add additional static mutexes. Static mutexes are for internal +** use by SQLite only. Applications that use SQLite mutexes should +** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or +** SQLITE_MUTEX_RECURSIVE. +** +** Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST +** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() +** returns a different mutex on every call. But for the static +** mutex types, the same mutex is returned on every call that has +** the same type number. +*/ +static sqlite3_mutex *pthreadMutexAlloc(int iType){ + static sqlite3_mutex staticMutexes[] = { + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER + }; + sqlite3_mutex *p; + switch( iType ){ + case SQLITE_MUTEX_RECURSIVE: { + p = sqlite3MallocZero( sizeof(*p) ); + if( p ){ +#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX + /* If recursive mutexes are not available, we will have to + ** build our own. See below. */ + pthread_mutex_init(&p->mutex, 0); +#else + /* Use a recursive mutex if it is available */ + pthread_mutexattr_t recursiveAttr; + pthread_mutexattr_init(&recursiveAttr); + pthread_mutexattr_settype(&recursiveAttr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&p->mutex, &recursiveAttr); + pthread_mutexattr_destroy(&recursiveAttr); +#endif + } + break; + } + case SQLITE_MUTEX_FAST: { + p = sqlite3MallocZero( sizeof(*p) ); + if( p ){ + pthread_mutex_init(&p->mutex, 0); + } + break; + } + default: { +#ifdef SQLITE_ENABLE_API_ARMOR + if( iType-2<0 || iType-2>=ArraySize(staticMutexes) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + p = &staticMutexes[iType-2]; + break; + } + } +#if SQLITE_MUTEX_NREF || defined(SQLITE_ENABLE_API_ARMOR) + if( p ) p->id = iType; +#endif + return p; +} + + +/* +** This routine deallocates a previously +** allocated mutex. SQLite is careful to deallocate every +** mutex that it allocates. +*/ +static void pthreadMutexFree(sqlite3_mutex *p){ + assert( p->nRef==0 ); +#if SQLITE_ENABLE_API_ARMOR + if( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE ) +#endif + { + pthread_mutex_destroy(&p->mutex); + sqlite3_free(p); + } +#ifdef SQLITE_ENABLE_API_ARMOR + else{ + (void)SQLITE_MISUSE_BKPT; + } +#endif +} + +/* +** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt +** to enter a mutex. If another thread is already within the mutex, +** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return +** SQLITE_BUSY. The sqlite3_mutex_try() interface returns SQLITE_OK +** upon successful entry. Mutexes created using SQLITE_MUTEX_RECURSIVE can +** be entered multiple times by the same thread. In such cases the, +** mutex must be exited an equal number of times before another thread +** can enter. If the same thread tries to enter any other kind of mutex +** more than once, the behavior is undefined. +*/ +static void pthreadMutexEnter(sqlite3_mutex *p){ + assert( p->id==SQLITE_MUTEX_RECURSIVE || pthreadMutexNotheld(p) ); + +#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX + /* If recursive mutexes are not available, then we have to grow + ** our own. This implementation assumes that pthread_equal() + ** is atomic - that it cannot be deceived into thinking self + ** and p->owner are equal if p->owner changes between two values + ** that are not equal to self while the comparison is taking place. + ** This implementation also assumes a coherent cache - that + ** separate processes cannot read different values from the same + ** address at the same time. If either of these two conditions + ** are not met, then the mutexes will fail and problems will result. + */ + { + pthread_t self = pthread_self(); + if( p->nRef>0 && pthread_equal(p->owner, self) ){ + p->nRef++; + }else{ + pthread_mutex_lock(&p->mutex); + assert( p->nRef==0 ); + p->owner = self; + p->nRef = 1; + } + } +#else + /* Use the built-in recursive mutexes if they are available. + */ + pthread_mutex_lock(&p->mutex); +#if SQLITE_MUTEX_NREF + assert( p->nRef>0 || p->owner==0 ); + p->owner = pthread_self(); + p->nRef++; +#endif +#endif + +#ifdef SQLITE_DEBUG + if( p->trace ){ + printf("enter mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef); + } +#endif +} +static int pthreadMutexTry(sqlite3_mutex *p){ + int rc; + assert( p->id==SQLITE_MUTEX_RECURSIVE || pthreadMutexNotheld(p) ); + +#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX + /* If recursive mutexes are not available, then we have to grow + ** our own. This implementation assumes that pthread_equal() + ** is atomic - that it cannot be deceived into thinking self + ** and p->owner are equal if p->owner changes between two values + ** that are not equal to self while the comparison is taking place. + ** This implementation also assumes a coherent cache - that + ** separate processes cannot read different values from the same + ** address at the same time. If either of these two conditions + ** are not met, then the mutexes will fail and problems will result. + */ + { + pthread_t self = pthread_self(); + if( p->nRef>0 && pthread_equal(p->owner, self) ){ + p->nRef++; + rc = SQLITE_OK; + }else if( pthread_mutex_trylock(&p->mutex)==0 ){ + assert( p->nRef==0 ); + p->owner = self; + p->nRef = 1; + rc = SQLITE_OK; + }else{ + rc = SQLITE_BUSY; + } + } +#else + /* Use the built-in recursive mutexes if they are available. + */ + if( pthread_mutex_trylock(&p->mutex)==0 ){ +#if SQLITE_MUTEX_NREF + p->owner = pthread_self(); + p->nRef++; +#endif + rc = SQLITE_OK; + }else{ + rc = SQLITE_BUSY; + } +#endif + +#ifdef SQLITE_DEBUG + if( rc==SQLITE_OK && p->trace ){ + printf("enter mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef); + } +#endif + return rc; +} + +/* +** The sqlite3_mutex_leave() routine exits a mutex that was +** previously entered by the same thread. The behavior +** is undefined if the mutex is not currently entered or +** is not currently allocated. SQLite will never do either. +*/ +static void pthreadMutexLeave(sqlite3_mutex *p){ + assert( pthreadMutexHeld(p) ); +#if SQLITE_MUTEX_NREF + p->nRef--; + if( p->nRef==0 ) p->owner = 0; +#endif + assert( p->nRef==0 || p->id==SQLITE_MUTEX_RECURSIVE ); + +#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX + if( p->nRef==0 ){ + pthread_mutex_unlock(&p->mutex); + } +#else + pthread_mutex_unlock(&p->mutex); +#endif + +#ifdef SQLITE_DEBUG + if( p->trace ){ + printf("leave mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef); + } +#endif +} + +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ + static const sqlite3_mutex_methods sMutex = { + pthreadMutexInit, + pthreadMutexEnd, + pthreadMutexAlloc, + pthreadMutexFree, + pthreadMutexEnter, + pthreadMutexTry, + pthreadMutexLeave, +#ifdef SQLITE_DEBUG + pthreadMutexHeld, + pthreadMutexNotheld +#else + 0, + 0 +#endif + }; + + return &sMutex; +} + +#endif /* SQLITE_MUTEX_PTHREADS */ + +/************** End of mutex_unix.c ******************************************/ +/************** Begin file mutex_w32.c ***************************************/ +/* +** 2007 August 14 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement mutexes for Win32. +*/ +/* #include "sqliteInt.h" */ + +#if SQLITE_OS_WIN +/* +** Include code that is common to all os_*.c files +*/ +/************** Include os_common.h in the middle of mutex_w32.c *************/ +/************** Begin file os_common.h ***************************************/ +/* +** 2004 May 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains macros and a little bit of code that is common to +** all of the platform-specific files (os_*.c) and is #included into those +** files. +** +** This file should be #included by the os_*.c files only. It is not a +** general purpose header file. +*/ +#ifndef _OS_COMMON_H_ +#define _OS_COMMON_H_ + +/* +** At least two bugs have slipped in because we changed the MEMORY_DEBUG +** macro to SQLITE_DEBUG and some older makefiles have not yet made the +** switch. The following code should catch this problem at compile-time. +*/ +#ifdef MEMORY_DEBUG +# error "The MEMORY_DEBUG macro is obsolete. Use SQLITE_DEBUG instead." +#endif + +/* +** Macros for performance tracing. Normally turned off. Only works +** on i486 hardware. +*/ +#ifdef SQLITE_PERFORMANCE_TRACE + +/* +** hwtime.h contains inline assembler code for implementing +** high-performance timing routines. +*/ +#include "hwtime.h" + +static sqlite_uint64 g_start; +static sqlite_uint64 g_elapsed; +#define TIMER_START g_start=sqlite3Hwtime() +#define TIMER_END g_elapsed=sqlite3Hwtime()-g_start +#define TIMER_ELAPSED g_elapsed +#else +#define TIMER_START +#define TIMER_END +#define TIMER_ELAPSED ((sqlite_uint64)0) +#endif + +/* +** If we compile with the SQLITE_TEST macro set, then the following block +** of code will give us the ability to simulate a disk I/O error. This +** is used for testing the I/O recovery logic. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_io_error_hit = 0; /* Total number of I/O Errors */ +SQLITE_PRIVATE int sqlite3_io_error_hardhit = 0; /* Number of non-benign errors */ +SQLITE_PRIVATE int sqlite3_io_error_pending = 0; /* Count down to first I/O error */ +SQLITE_PRIVATE int sqlite3_io_error_persist = 0; /* True if I/O errors persist */ +SQLITE_PRIVATE int sqlite3_io_error_benign = 0; /* True if errors are benign */ +SQLITE_PRIVATE int sqlite3_diskfull_pending = 0; +SQLITE_PRIVATE int sqlite3_diskfull = 0; +#define SimulateIOErrorBenign(X) sqlite3_io_error_benign=(X) +#define SimulateIOError(CODE) \ + if( (sqlite3_io_error_persist && sqlite3_io_error_hit) \ + || sqlite3_io_error_pending-- == 1 ) \ + { local_ioerr(); CODE; } +static void local_ioerr(){ + IOTRACE(("IOERR\n")); + sqlite3_io_error_hit++; + if( !sqlite3_io_error_benign ) sqlite3_io_error_hardhit++; +} +#define SimulateDiskfullError(CODE) \ + if( sqlite3_diskfull_pending ){ \ + if( sqlite3_diskfull_pending == 1 ){ \ + local_ioerr(); \ + sqlite3_diskfull = 1; \ + sqlite3_io_error_hit = 1; \ + CODE; \ + }else{ \ + sqlite3_diskfull_pending--; \ + } \ + } +#else +#define SimulateIOErrorBenign(X) +#define SimulateIOError(A) +#define SimulateDiskfullError(A) +#endif + +/* +** When testing, keep a count of the number of open files. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_open_file_count = 0; +#define OpenCounter(X) sqlite3_open_file_count+=(X) +#else +#define OpenCounter(X) +#endif + +#endif /* !defined(_OS_COMMON_H_) */ + +/************** End of os_common.h *******************************************/ +/************** Continuing where we left off in mutex_w32.c ******************/ + +/* +** Include the header file for the Windows VFS. +*/ +/************** Include os_win.h in the middle of mutex_w32.c ****************/ +/************** Begin file os_win.h ******************************************/ +/* +** 2013 November 25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code that is specific to Windows. +*/ +#ifndef _OS_WIN_H_ +#define _OS_WIN_H_ + +/* +** Include the primary Windows SDK header file. +*/ +#include "windows.h" + +#ifdef __CYGWIN__ +# include <sys/cygwin.h> +# include <errno.h> /* amalgamator: dontcache */ +#endif + +/* +** Determine if we are dealing with Windows NT. +** +** We ought to be able to determine if we are compiling for Windows 9x or +** Windows NT using the _WIN32_WINNT macro as follows: +** +** #if defined(_WIN32_WINNT) +** # define SQLITE_OS_WINNT 1 +** #else +** # define SQLITE_OS_WINNT 0 +** #endif +** +** However, Visual Studio 2005 does not set _WIN32_WINNT by default, as +** it ought to, so the above test does not work. We'll just assume that +** everything is Windows NT unless the programmer explicitly says otherwise +** by setting SQLITE_OS_WINNT to 0. +*/ +#if SQLITE_OS_WIN && !defined(SQLITE_OS_WINNT) +# define SQLITE_OS_WINNT 1 +#endif + +/* +** Determine if we are dealing with Windows CE - which has a much reduced +** API. +*/ +#if defined(_WIN32_WCE) +# define SQLITE_OS_WINCE 1 +#else +# define SQLITE_OS_WINCE 0 +#endif + +/* +** Determine if we are dealing with WinRT, which provides only a subset of +** the full Win32 API. +*/ +#if !defined(SQLITE_OS_WINRT) +# define SQLITE_OS_WINRT 0 +#endif + +/* +** For WinCE, some API function parameters do not appear to be declared as +** volatile. +*/ +#if SQLITE_OS_WINCE +# define SQLITE_WIN32_VOLATILE +#else +# define SQLITE_WIN32_VOLATILE volatile +#endif + +/* +** For some Windows sub-platforms, the _beginthreadex() / _endthreadex() +** functions are not available (e.g. those not using MSVC, Cygwin, etc). +*/ +#if SQLITE_OS_WIN && !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && \ + SQLITE_THREADSAFE>0 && !defined(__CYGWIN__) +# define SQLITE_OS_WIN_THREADS 1 +#else +# define SQLITE_OS_WIN_THREADS 0 +#endif + +#endif /* _OS_WIN_H_ */ + +/************** End of os_win.h **********************************************/ +/************** Continuing where we left off in mutex_w32.c ******************/ +#endif + +/* +** The code in this file is only used if we are compiling multithreaded +** on a Win32 system. +*/ +#ifdef SQLITE_MUTEX_W32 + +/* +** Each recursive mutex is an instance of the following structure. +*/ +struct sqlite3_mutex { + CRITICAL_SECTION mutex; /* Mutex controlling the lock */ + int id; /* Mutex type */ +#ifdef SQLITE_DEBUG + volatile int nRef; /* Number of enterances */ + volatile DWORD owner; /* Thread holding this mutex */ + volatile int trace; /* True to trace changes */ +#endif +}; + +/* +** These are the initializer values used when declaring a "static" mutex +** on Win32. It should be noted that all mutexes require initialization +** on the Win32 platform. +*/ +#define SQLITE_W32_MUTEX_INITIALIZER { 0 } + +#ifdef SQLITE_DEBUG +#define SQLITE3_MUTEX_INITIALIZER { SQLITE_W32_MUTEX_INITIALIZER, 0, \ + 0L, (DWORD)0, 0 } +#else +#define SQLITE3_MUTEX_INITIALIZER { SQLITE_W32_MUTEX_INITIALIZER, 0 } +#endif + +#ifdef SQLITE_DEBUG +/* +** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are +** intended for use only inside assert() statements. +*/ +static int winMutexHeld(sqlite3_mutex *p){ + return p->nRef!=0 && p->owner==GetCurrentThreadId(); +} + +static int winMutexNotheld2(sqlite3_mutex *p, DWORD tid){ + return p->nRef==0 || p->owner!=tid; +} + +static int winMutexNotheld(sqlite3_mutex *p){ + DWORD tid = GetCurrentThreadId(); + return winMutexNotheld2(p, tid); +} +#endif + +/* +** Try to provide a memory barrier operation, needed for initialization +** and also for the xShmBarrier method of the VFS in cases when SQLite is +** compiled without mutexes (SQLITE_THREADSAFE=0). +*/ +SQLITE_PRIVATE void sqlite3MemoryBarrier(void){ +#if defined(SQLITE_MEMORY_BARRIER) + SQLITE_MEMORY_BARRIER; +#elif defined(__GNUC__) + __sync_synchronize(); +#elif !defined(SQLITE_DISABLE_INTRINSIC) && \ + defined(_MSC_VER) && _MSC_VER>=1300 + _ReadWriteBarrier(); +#elif defined(MemoryBarrier) + MemoryBarrier(); +#endif +} + +/* +** Initialize and deinitialize the mutex subsystem. +*/ +static sqlite3_mutex winMutex_staticMutexes[] = { + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER +}; + +static int winMutex_isInit = 0; +static int winMutex_isNt = -1; /* <0 means "need to query" */ + +/* As the winMutexInit() and winMutexEnd() functions are called as part +** of the sqlite3BtreeInitialize() and sqlite3BtreeShutdown() processing, the +** "interlocked" magic used here is probably not strictly necessary. +*/ +static LONG SQLITE_WIN32_VOLATILE winMutex_lock = 0; + +SQLITE_PRIVATE int sqlite3_win32_is_nt(void); /* os_win.c */ +SQLITE_PRIVATE void sqlite3_win32_sleep(DWORD milliseconds); /* os_win.c */ + +static int winMutexInit(void){ + /* The first to increment to 1 does actual initialization */ + if( InterlockedCompareExchange(&winMutex_lock, 1, 0)==0 ){ + int i; + for(i=0; i<ArraySize(winMutex_staticMutexes); i++){ +#if SQLITE_OS_WINRT + InitializeCriticalSectionEx(&winMutex_staticMutexes[i].mutex, 0, 0); +#else + InitializeCriticalSection(&winMutex_staticMutexes[i].mutex); +#endif + } + winMutex_isInit = 1; + }else{ + /* Another thread is (in the process of) initializing the static + ** mutexes */ + while( !winMutex_isInit ){ + sqlite3_win32_sleep(1); + } + } + return SQLITE_OK; +} + +static int winMutexEnd(void){ + /* The first to decrement to 0 does actual shutdown + ** (which should be the last to shutdown.) */ + if( InterlockedCompareExchange(&winMutex_lock, 0, 1)==1 ){ + if( winMutex_isInit==1 ){ + int i; + for(i=0; i<ArraySize(winMutex_staticMutexes); i++){ + DeleteCriticalSection(&winMutex_staticMutexes[i].mutex); + } + winMutex_isInit = 0; + } + } + return SQLITE_OK; +} + +/* +** The sqlite3_mutex_alloc() routine allocates a new +** mutex and returns a pointer to it. If it returns NULL +** that means that a mutex could not be allocated. SQLite +** will unwind its stack and return an error. The argument +** to sqlite3_mutex_alloc() is one of these integer constants: +** +** <ul> +** <li> SQLITE_MUTEX_FAST +** <li> SQLITE_MUTEX_RECURSIVE +** <li> SQLITE_MUTEX_STATIC_MASTER +** <li> SQLITE_MUTEX_STATIC_MEM +** <li> SQLITE_MUTEX_STATIC_OPEN +** <li> SQLITE_MUTEX_STATIC_PRNG +** <li> SQLITE_MUTEX_STATIC_LRU +** <li> SQLITE_MUTEX_STATIC_PMEM +** <li> SQLITE_MUTEX_STATIC_APP1 +** <li> SQLITE_MUTEX_STATIC_APP2 +** <li> SQLITE_MUTEX_STATIC_APP3 +** <li> SQLITE_MUTEX_STATIC_VFS1 +** <li> SQLITE_MUTEX_STATIC_VFS2 +** <li> SQLITE_MUTEX_STATIC_VFS3 +** </ul> +** +** The first two constants cause sqlite3_mutex_alloc() to create +** a new mutex. The new mutex is recursive when SQLITE_MUTEX_RECURSIVE +** is used but not necessarily so when SQLITE_MUTEX_FAST is used. +** The mutex implementation does not need to make a distinction +** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does +** not want to. But SQLite will only request a recursive mutex in +** cases where it really needs one. If a faster non-recursive mutex +** implementation is available on the host platform, the mutex subsystem +** might return such a mutex in response to SQLITE_MUTEX_FAST. +** +** The other allowed parameters to sqlite3_mutex_alloc() each return +** a pointer to a static preexisting mutex. Six static mutexes are +** used by the current version of SQLite. Future versions of SQLite +** may add additional static mutexes. Static mutexes are for internal +** use by SQLite only. Applications that use SQLite mutexes should +** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or +** SQLITE_MUTEX_RECURSIVE. +** +** Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST +** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() +** returns a different mutex on every call. But for the static +** mutex types, the same mutex is returned on every call that has +** the same type number. +*/ +static sqlite3_mutex *winMutexAlloc(int iType){ + sqlite3_mutex *p; + + switch( iType ){ + case SQLITE_MUTEX_FAST: + case SQLITE_MUTEX_RECURSIVE: { + p = sqlite3MallocZero( sizeof(*p) ); + if( p ){ + p->id = iType; +#ifdef SQLITE_DEBUG +#ifdef SQLITE_WIN32_MUTEX_TRACE_DYNAMIC + p->trace = 1; +#endif +#endif +#if SQLITE_OS_WINRT + InitializeCriticalSectionEx(&p->mutex, 0, 0); +#else + InitializeCriticalSection(&p->mutex); +#endif + } + break; + } + default: { +#ifdef SQLITE_ENABLE_API_ARMOR + if( iType-2<0 || iType-2>=ArraySize(winMutex_staticMutexes) ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif + p = &winMutex_staticMutexes[iType-2]; + p->id = iType; +#ifdef SQLITE_DEBUG +#ifdef SQLITE_WIN32_MUTEX_TRACE_STATIC + p->trace = 1; +#endif +#endif + break; + } + } + return p; +} + + +/* +** This routine deallocates a previously +** allocated mutex. SQLite is careful to deallocate every +** mutex that it allocates. +*/ +static void winMutexFree(sqlite3_mutex *p){ + assert( p ); + assert( p->nRef==0 && p->owner==0 ); + if( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE ){ + DeleteCriticalSection(&p->mutex); + sqlite3_free(p); + }else{ +#ifdef SQLITE_ENABLE_API_ARMOR + (void)SQLITE_MISUSE_BKPT; +#endif + } +} + +/* +** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt +** to enter a mutex. If another thread is already within the mutex, +** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return +** SQLITE_BUSY. The sqlite3_mutex_try() interface returns SQLITE_OK +** upon successful entry. Mutexes created using SQLITE_MUTEX_RECURSIVE can +** be entered multiple times by the same thread. In such cases the, +** mutex must be exited an equal number of times before another thread +** can enter. If the same thread tries to enter any other kind of mutex +** more than once, the behavior is undefined. +*/ +static void winMutexEnter(sqlite3_mutex *p){ +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + DWORD tid = GetCurrentThreadId(); +#endif +#ifdef SQLITE_DEBUG + assert( p ); + assert( p->id==SQLITE_MUTEX_RECURSIVE || winMutexNotheld2(p, tid) ); +#else + assert( p ); +#endif + assert( winMutex_isInit==1 ); + EnterCriticalSection(&p->mutex); +#ifdef SQLITE_DEBUG + assert( p->nRef>0 || p->owner==0 ); + p->owner = tid; + p->nRef++; + if( p->trace ){ + OSTRACE(("ENTER-MUTEX tid=%lu, mutex=%p (%d), nRef=%d\n", + tid, p, p->trace, p->nRef)); + } +#endif +} + +static int winMutexTry(sqlite3_mutex *p){ +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + DWORD tid = GetCurrentThreadId(); +#endif + int rc = SQLITE_BUSY; + assert( p ); + assert( p->id==SQLITE_MUTEX_RECURSIVE || winMutexNotheld2(p, tid) ); + /* + ** The sqlite3_mutex_try() routine is very rarely used, and when it + ** is used it is merely an optimization. So it is OK for it to always + ** fail. + ** + ** The TryEnterCriticalSection() interface is only available on WinNT. + ** And some windows compilers complain if you try to use it without + ** first doing some #defines that prevent SQLite from building on Win98. + ** For that reason, we will omit this optimization for now. See + ** ticket #2685. + */ +#if defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0400 + assert( winMutex_isInit==1 ); + assert( winMutex_isNt>=-1 && winMutex_isNt<=1 ); + if( winMutex_isNt<0 ){ + winMutex_isNt = sqlite3_win32_is_nt(); + } + assert( winMutex_isNt==0 || winMutex_isNt==1 ); + if( winMutex_isNt && TryEnterCriticalSection(&p->mutex) ){ +#ifdef SQLITE_DEBUG + p->owner = tid; + p->nRef++; +#endif + rc = SQLITE_OK; + } +#else + UNUSED_PARAMETER(p); +#endif +#ifdef SQLITE_DEBUG + if( p->trace ){ + OSTRACE(("TRY-MUTEX tid=%lu, mutex=%p (%d), owner=%lu, nRef=%d, rc=%s\n", + tid, p, p->trace, p->owner, p->nRef, sqlite3ErrName(rc))); + } +#endif + return rc; +} + +/* +** The sqlite3_mutex_leave() routine exits a mutex that was +** previously entered by the same thread. The behavior +** is undefined if the mutex is not currently entered or +** is not currently allocated. SQLite will never do either. +*/ +static void winMutexLeave(sqlite3_mutex *p){ +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) + DWORD tid = GetCurrentThreadId(); +#endif + assert( p ); +#ifdef SQLITE_DEBUG + assert( p->nRef>0 ); + assert( p->owner==tid ); + p->nRef--; + if( p->nRef==0 ) p->owner = 0; + assert( p->nRef==0 || p->id==SQLITE_MUTEX_RECURSIVE ); +#endif + assert( winMutex_isInit==1 ); + LeaveCriticalSection(&p->mutex); +#ifdef SQLITE_DEBUG + if( p->trace ){ + OSTRACE(("LEAVE-MUTEX tid=%lu, mutex=%p (%d), nRef=%d\n", + tid, p, p->trace, p->nRef)); + } +#endif +} + +SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ + static const sqlite3_mutex_methods sMutex = { + winMutexInit, + winMutexEnd, + winMutexAlloc, + winMutexFree, + winMutexEnter, + winMutexTry, + winMutexLeave, +#ifdef SQLITE_DEBUG + winMutexHeld, + winMutexNotheld +#else + 0, + 0 +#endif + }; + return &sMutex; +} + +#endif /* SQLITE_MUTEX_W32 */ + +/************** End of mutex_w32.c *******************************************/ +/************** Begin file malloc.c ******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** Memory allocation functions used throughout sqlite. +*/ +/* #include "sqliteInt.h" */ +/* #include <stdarg.h> */ + +/* +** Attempt to release up to n bytes of non-essential memory currently +** held by SQLite. An example of non-essential memory is memory used to +** cache database pages that are not currently in use. +*/ +SQLITE_PRIVATE int sqlite3_release_memory(int n){ +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + return sqlite3PcacheReleaseMemory(n); +#else + /* IMPLEMENTATION-OF: R-34391-24921 The sqlite3_release_memory() routine + ** is a no-op returning zero if SQLite is not compiled with + ** SQLITE_ENABLE_MEMORY_MANAGEMENT. */ + UNUSED_PARAMETER(n); + return 0; +#endif +} + +/* +** An instance of the following object records the location of +** each unused scratch buffer. +*/ +typedef struct ScratchFreeslot { + struct ScratchFreeslot *pNext; /* Next unused scratch buffer */ +} ScratchFreeslot; + +/* +** State information local to the memory allocation subsystem. +*/ +static SQLITE_WSD struct Mem0Global { + sqlite3_mutex *mutex; /* Mutex to serialize access */ + sqlite3_int64 alarmThreshold; /* The soft heap limit */ + + /* + ** Pointers to the end of sqlite3GlobalConfig.pScratch memory + ** (so that a range test can be used to determine if an allocation + ** being freed came from pScratch) and a pointer to the list of + ** unused scratch allocations. + */ + void *pScratchEnd; + ScratchFreeslot *pScratchFree; + u32 nScratchFree; + + /* + ** True if heap is nearly "full" where "full" is defined by the + ** sqlite3_soft_heap_limit() setting. + */ + int nearlyFull; +} mem0 = { 0, 0, 0, 0, 0, 0 }; + +#define mem0 GLOBAL(struct Mem0Global, mem0) + +#ifndef NDEBUG +/* +** Return the memory allocator mutex. sqlite3_status() needs it. +*/ +SQLITE_PRIVATE sqlite3_mutex *sqlite3MallocMutex(void){ + return mem0.mutex; +} +#endif + +/* +** Initialize the memory allocation subsystem. +*/ +SQLITE_PRIVATE int sqlite3MallocInit(void){ + int rc; + if( sqlite3GlobalConfig.m.xMalloc==0 ){ + sqlite3MemSetDefault(); + } + memset(&mem0, 0, sizeof(mem0)); + if( sqlite3GlobalConfig.bCoreMutex ){ + mem0.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM); + } + if( sqlite3GlobalConfig.pScratch && sqlite3GlobalConfig.szScratch>=100 + && sqlite3GlobalConfig.nScratch>0 ){ + int i, n, sz; + ScratchFreeslot *pSlot; + sz = ROUNDDOWN8(sqlite3GlobalConfig.szScratch); + sqlite3GlobalConfig.szScratch = sz; + pSlot = (ScratchFreeslot*)sqlite3GlobalConfig.pScratch; + n = sqlite3GlobalConfig.nScratch; + mem0.pScratchFree = pSlot; + mem0.nScratchFree = n; + for(i=0; i<n-1; i++){ + pSlot->pNext = (ScratchFreeslot*)(sz+(char*)pSlot); + pSlot = pSlot->pNext; + } + pSlot->pNext = 0; + mem0.pScratchEnd = (void*)&pSlot[1]; + }else{ + mem0.pScratchEnd = 0; + sqlite3GlobalConfig.pScratch = 0; + sqlite3GlobalConfig.szScratch = 0; + sqlite3GlobalConfig.nScratch = 0; + } + if( sqlite3GlobalConfig.pPage==0 || sqlite3GlobalConfig.szPage<512 + || sqlite3GlobalConfig.nPage<=0 ){ + sqlite3GlobalConfig.pPage = 0; + sqlite3GlobalConfig.szPage = 0; + } + rc = sqlite3GlobalConfig.m.xInit(sqlite3GlobalConfig.m.pAppData); + if( rc!=SQLITE_OK ) memset(&mem0, 0, sizeof(mem0)); + return rc; +} + +/* +** Return true if the heap is currently under memory pressure - in other +** words if the amount of heap used is close to the limit set by +** sqlite3_soft_heap_limit(). +*/ +SQLITE_PRIVATE int sqlite3HeapNearlyFull(void){ + return mem0.nearlyFull; +} + +/* +** Deinitialize the memory allocation subsystem. +*/ +SQLITE_PRIVATE void sqlite3MallocEnd(void){ + if( sqlite3GlobalConfig.m.xShutdown ){ + sqlite3GlobalConfig.m.xShutdown(sqlite3GlobalConfig.m.pAppData); + } + memset(&mem0, 0, sizeof(mem0)); +} + +/* +** Trigger the alarm +*/ +static void sqlite3MallocAlarm(int nByte){ + if( mem0.alarmThreshold<=0 ) return; + sqlite3_mutex_leave(mem0.mutex); + sqlite3_release_memory(nByte); + sqlite3_mutex_enter(mem0.mutex); +} + +/* +** Do a memory allocation with statistics and alarms. Assume the +** lock is already held. +*/ +static int mallocWithAlarm(int n, void **pp){ + int nFull; + void *p; + assert( sqlite3_mutex_held(mem0.mutex) ); + nFull = sqlite3GlobalConfig.m.xRoundup(n); + sqlite3StatusSet(SQLITE_STATUS_MALLOC_SIZE, n); + if( mem0.alarmThreshold>0 ){ + sqlite3_int64 nUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED); + if( nUsed >= mem0.alarmThreshold - nFull ){ + mem0.nearlyFull = 1; + sqlite3MallocAlarm(nFull); + }else{ + mem0.nearlyFull = 0; + } + } + p = sqlite3GlobalConfig.m.xMalloc(nFull); +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + if( p==0 && mem0.alarmThreshold>0 ){ + sqlite3MallocAlarm(nFull); + p = sqlite3GlobalConfig.m.xMalloc(nFull); + } +#endif + if( p ){ + nFull = sqlite3MallocSize(p); + sqlite3StatusUp(SQLITE_STATUS_MEMORY_USED, nFull); + sqlite3StatusUp(SQLITE_STATUS_MALLOC_COUNT, 1); + } + *pp = p; + return nFull; +} + +/* +** Allocate memory. This routine is like sqlite3_malloc() except that it +** assumes the memory subsystem has already been initialized. +*/ +SQLITE_PRIVATE void *sqlite3Malloc(u64 n){ + void *p; + if( n==0 || n>=0x7fffff00 ){ + /* A memory allocation of a number of bytes which is near the maximum + ** signed integer value might cause an integer overflow inside of the + ** xMalloc(). Hence we limit the maximum size to 0x7fffff00, giving + ** 255 bytes of overhead. SQLite itself will never use anything near + ** this amount. The only way to reach the limit is with sqlite3_malloc() */ + p = 0; + }else if( sqlite3GlobalConfig.bMemstat ){ + sqlite3_mutex_enter(mem0.mutex); + mallocWithAlarm((int)n, &p); + sqlite3_mutex_leave(mem0.mutex); + }else{ + p = sqlite3GlobalConfig.m.xMalloc((int)n); + } + assert( EIGHT_BYTE_ALIGNMENT(p) ); /* IMP: R-11148-40995 */ + return p; +} + +/* +** This version of the memory allocation is for use by the application. +** First make sure the memory subsystem is initialized, then do the +** allocation. +*/ +SQLITE_PRIVATE void *sqlite3_malloc(int n){ +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3BtreeInitialize() ) return 0; +#endif + return n<=0 ? 0 : sqlite3Malloc(n); +} +SQLITE_PRIVATE void *sqlite3_malloc64(sqlite3_uint64 n){ +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3BtreeInitialize() ) return 0; +#endif + return sqlite3Malloc(n); +} + +/* +** Each thread may only have a single outstanding allocation from +** xScratchMalloc(). We verify this constraint in the single-threaded +** case by setting scratchAllocOut to 1 when an allocation +** is outstanding clearing it when the allocation is freed. +*/ +#if SQLITE_THREADSAFE==0 && !defined(NDEBUG) +static int scratchAllocOut = 0; +#endif + + +/* +** Allocate memory that is to be used and released right away. +** This routine is similar to alloca() in that it is not intended +** for situations where the memory might be held long-term. This +** routine is intended to get memory to old large transient data +** structures that would not normally fit on the stack of an +** embedded processor. +*/ +SQLITE_PRIVATE void *sqlite3ScratchMalloc(int n){ + void *p; + assert( n>0 ); + + sqlite3_mutex_enter(mem0.mutex); + sqlite3StatusSet(SQLITE_STATUS_SCRATCH_SIZE, n); + if( mem0.nScratchFree && sqlite3GlobalConfig.szScratch>=n ){ + p = mem0.pScratchFree; + mem0.pScratchFree = mem0.pScratchFree->pNext; + mem0.nScratchFree--; + sqlite3StatusUp(SQLITE_STATUS_SCRATCH_USED, 1); + sqlite3_mutex_leave(mem0.mutex); + }else{ + sqlite3_mutex_leave(mem0.mutex); + p = sqlite3Malloc(n); + if( sqlite3GlobalConfig.bMemstat && p ){ + sqlite3_mutex_enter(mem0.mutex); + sqlite3StatusUp(SQLITE_STATUS_SCRATCH_OVERFLOW, sqlite3MallocSize(p)); + sqlite3_mutex_leave(mem0.mutex); + } + sqlite3MemdebugSetType(p, MEMTYPE_SCRATCH); + } + assert( sqlite3_mutex_notheld(mem0.mutex) ); + + +#if SQLITE_THREADSAFE==0 && !defined(NDEBUG) + /* EVIDENCE-OF: R-12970-05880 SQLite will not use more than one scratch + ** buffers per thread. + ** + ** This can only be checked in single-threaded mode. + */ + assert( scratchAllocOut==0 ); + if( p ) scratchAllocOut++; +#endif + + return p; +} +SQLITE_PRIVATE void sqlite3ScratchFree(void *p){ + if( p ){ + +#if SQLITE_THREADSAFE==0 && !defined(NDEBUG) + /* Verify that no more than two scratch allocation per thread + ** is outstanding at one time. (This is only checked in the + ** single-threaded case since checking in the multi-threaded case + ** would be much more complicated.) */ + assert( scratchAllocOut>=1 && scratchAllocOut<=2 ); + scratchAllocOut--; +#endif + + if( p>=sqlite3GlobalConfig.pScratch && p<mem0.pScratchEnd ){ + /* Release memory from the SQLITE_CONFIG_SCRATCH allocation */ + ScratchFreeslot *pSlot; + pSlot = (ScratchFreeslot*)p; + sqlite3_mutex_enter(mem0.mutex); + pSlot->pNext = mem0.pScratchFree; + mem0.pScratchFree = pSlot; + mem0.nScratchFree++; + assert( mem0.nScratchFree <= (u32)sqlite3GlobalConfig.nScratch ); + sqlite3StatusDown(SQLITE_STATUS_SCRATCH_USED, 1); + sqlite3_mutex_leave(mem0.mutex); + }else{ + /* Release memory back to the heap */ + assert( sqlite3MemdebugHasType(p, MEMTYPE_SCRATCH) ); + assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_SCRATCH) ); + sqlite3MemdebugSetType(p, MEMTYPE_HEAP); + if( sqlite3GlobalConfig.bMemstat ){ + int iSize = sqlite3MallocSize(p); + sqlite3_mutex_enter(mem0.mutex); + sqlite3StatusDown(SQLITE_STATUS_SCRATCH_OVERFLOW, iSize); + sqlite3StatusDown(SQLITE_STATUS_MEMORY_USED, iSize); + sqlite3StatusDown(SQLITE_STATUS_MALLOC_COUNT, 1); + sqlite3GlobalConfig.m.xFree(p); + sqlite3_mutex_leave(mem0.mutex); + }else{ + sqlite3GlobalConfig.m.xFree(p); + } + } + } +} + +/* +** Return the size of a memory allocation previously obtained from +** sqlite3Malloc() or sqlite3_malloc(). +*/ +SQLITE_PRIVATE int sqlite3MallocSize(void *p){ + assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); + return sqlite3GlobalConfig.m.xSize(p); +} +SQLITE_PRIVATE int sqlite3DbMallocSize(Btree *pBtree, void *p){ + if( pBtree==0 ){ +#if SQLITE_DEBUG + assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_HEAP) ); + assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); +#endif + return sqlite3GlobalConfig.m.xSize(p); + }else{ + assert( sqlite3_mutex_held(pBtree->mutex) ); + return 0; + } +} + +/* +** Free memory previously obtained from sqlite3Malloc(). +*/ +SQLITE_PRIVATE void sqlite3_free(void *p){ + if( p==0 ) return; /* IMP: R-49053-54554 */ + assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); + assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_HEAP) ); + if( sqlite3GlobalConfig.bMemstat ){ + sqlite3_mutex_enter(mem0.mutex); + sqlite3StatusDown(SQLITE_STATUS_MEMORY_USED, sqlite3MallocSize(p)); + sqlite3StatusDown(SQLITE_STATUS_MALLOC_COUNT, 1); + sqlite3GlobalConfig.m.xFree(p); + sqlite3_mutex_leave(mem0.mutex); + }else{ + sqlite3GlobalConfig.m.xFree(p); + } +} + +/* +** Add the size of memory allocation "p" to the count in +** *db->pnBytesFreed. +*/ +static SQLITE_NOINLINE void measureAllocationSize(Btree *pBtree, void *p){ + *pBtree->pnBytesFreed += sqlite3DbMallocSize(pBtree,p); +} + +/* +** Free memory that might be associated with a particular database +** connection. +*/ +SQLITE_PRIVATE void sqlite3DbFree(Btree *pBtree, void *p){ + assert( pBTree==0 || sqlite3_mutex_held(pTree->mutex) ); + if( p==0 ) return; + if( pBtree ){ + if( pBtree->pnBytesFreed ){ + measureAllocationSize(pBtree, p); + return; + } + } + assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); + assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); + assert( pBtree!=0 || sqlite3MemdebugNoType(p, MEMTYPE_LOOKASIDE) ); + sqlite3MemdebugSetType(p, MEMTYPE_HEAP); + sqlite3_free(p); +} + +/* +** Change the size of an existing memory allocation +*/ +SQLITE_PRIVATE void *sqlite3Realloc(void *pOld, u64 nBytes){ + int nOld, nNew, nDiff; + void *pNew; + assert( sqlite3MemdebugHasType(pOld, MEMTYPE_HEAP) ); + assert( sqlite3MemdebugNoType(pOld, (u8)~MEMTYPE_HEAP) ); + if( pOld==0 ){ + return sqlite3Malloc(nBytes); /* IMP: R-04300-56712 */ + } + if( nBytes==0 ){ + sqlite3_free(pOld); /* IMP: R-26507-47431 */ + return 0; + } + if( nBytes>=0x7fffff00 ){ + /* The 0x7ffff00 limit term is explained in comments on sqlite3Malloc() */ + return 0; + } + nOld = sqlite3MallocSize(pOld); + /* IMPLEMENTATION-OF: R-46199-30249 SQLite guarantees that the second + ** argument to xRealloc is always a value returned by a prior call to + ** xRoundup. */ + nNew = sqlite3GlobalConfig.m.xRoundup((int)nBytes); + if( nOld==nNew ){ + pNew = pOld; + }else if( sqlite3GlobalConfig.bMemstat ){ + sqlite3_mutex_enter(mem0.mutex); + sqlite3StatusSet(SQLITE_STATUS_MALLOC_SIZE, (int)nBytes); + nDiff = nNew - nOld; + if( sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED) >= + mem0.alarmThreshold-nDiff ){ + sqlite3MallocAlarm(nDiff); + } + pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); + if( pNew==0 && mem0.alarmThreshold>0 ){ + sqlite3MallocAlarm((int)nBytes); + pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); + } + if( pNew ){ + nNew = sqlite3MallocSize(pNew); + sqlite3StatusUp(SQLITE_STATUS_MEMORY_USED, nNew-nOld); + } + sqlite3_mutex_leave(mem0.mutex); + }else{ + pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); + } + assert( EIGHT_BYTE_ALIGNMENT(pNew) ); /* IMP: R-11148-40995 */ + return pNew; +} + +/* +** The public interface to sqlite3Realloc. Make sure that the memory +** subsystem is initialized prior to invoking sqliteRealloc. +*/ +SQLITE_PRIVATE void *sqlite3_realloc(void *pOld, int n){ +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3BtreeInitialize() ) return 0; +#endif + if( n<0 ) n = 0; /* IMP: R-26507-47431 */ + return sqlite3Realloc(pOld, n); +} +SQLITE_PRIVATE void *sqlite3_realloc64(void *pOld, sqlite3_uint64 n){ +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3BtreeInitialize() ) return 0; +#endif + return sqlite3Realloc(pOld, n); +} + + +/* +** Allocate and zero memory. +*/ +SQLITE_PRIVATE void *sqlite3MallocZero(u64 n){ + void *p = sqlite3Malloc(n); + if( p ){ + memset(p, 0, (size_t)n); + } + return p; +} + +/* +** Allocate and zero memory. If the allocation fails, make +** the mallocFailed flag in the connection pointer. +*/ +SQLITE_PRIVATE void *sqlite3DbMallocZero(Btree *pBtree, u64 n){ + void *p = sqlite3DbMallocRaw(pBtree, n); + if( p ){ + memset(p, 0, (size_t)n); + } + return p; +} + +/* +** Allocate and zero memory. If the allocation fails, make +** the mallocFailed flag in the connection pointer. +** +** If pBtree!=0 and pBtree->mallocFailed is true (indicating a prior malloc +** failure on the same database connection) then always return 0. +** Hence for a particular database connection, once malloc starts +** failing, it fails consistently until mallocFailed is reset. +** This is an important assumption. There are many places in the +** code that do things like this: +** +** int *a = (int*)sqlite3DbMallocRaw(pBtree, 100); +** int *b = (int*)sqlite3DbMallocRaw(pBtree, 200); +** if( b ) a[10] = 9; +** +** In other words, if a subsequent malloc (ex: "b") worked, it is assumed +** that all prior mallocs (ex: "a") worked too. +*/ +SQLITE_PRIVATE void *sqlite3DbMallocRaw(Btree *pBtree, u64 n){ + void *p; + assert( pBtree==0 || sqlite3_mutex_held(pBtree->mutex) ); + assert( pBtree==0 || pBtree->pnBytesFreed==0 ); + if( pBtree && pBtree->mallocFailed ){ + return 0; + } + p = sqlite3Malloc(n); + if( !p && pBtree ){ + pBtree->mallocFailed = 1; + } + sqlite3MemdebugSetType(p, MEMTYPE_HEAP); + return p; +} + +/* +** Resize the block of memory pointed to by p to n bytes. If the +** resize fails, set the mallocFailed flag in the connection object. +*/ +SQLITE_PRIVATE void *sqlite3DbRealloc(Btree *pBtree, void *p, u64 n){ + void *pNew = 0; + assert( pBtree!=0 ); + assert( sqlite3_mutex_held(pBtree->mutex) ); + if( pBtree->mallocFailed==0 ){ + if( p==0 ){ + return sqlite3DbMallocRaw(pBtree, n); + } + assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); + assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); + sqlite3MemdebugSetType(p, MEMTYPE_HEAP); + pNew = sqlite3_realloc64(p, n); + if( !pNew ){ + pBtree->mallocFailed = 1; + } + sqlite3MemdebugSetType(pNew, MEMTYPE_HEAP); + } + return pNew; +} + +/* +** Attempt to reallocate p. If the reallocation fails, then free p +** and set the mallocFailed flag in the database connection. +*/ +SQLITE_PRIVATE void *sqlite3DbReallocOrFree(Btree *pBtree, void *p, u64 n){ + void *pNew; + pNew = sqlite3DbRealloc(pBtree, p, n); + if( !pNew ){ + sqlite3DbFree(pBtree, p); + } + return pNew; +} + +/************** End of malloc.c **********************************************/ +/************** Begin file printf.c ******************************************/ +/* +** The "printf" code that follows dates from the 1980's. It is in +** the public domain. +** +************************************************************************** +** +** This file contains code for a set of "printf"-like routines. These +** routines format strings much like the printf() from the standard C +** library, though the implementation here has enhancements to support +** SQLite. +*/ +/* #include "sqliteInt.h" */ + +/* +** Conversion types fall into various categories as defined by the +** following enumeration. +*/ +#define etRADIX 1 /* Integer types. %d, %x, %o, and so forth */ +#define etFLOAT 2 /* Floating point. %f */ +#define etEXP 3 /* Exponentional notation. %e and %E */ +#define etGENERIC 4 /* Floating or exponential, depending on exponent. %g */ +#define etSIZE 5 /* Return number of characters processed so far. %n */ +#define etSTRING 6 /* Strings. %s */ +#define etDYNSTRING 7 /* Dynamically allocated strings. %z */ +#define etPERCENT 8 /* Percent symbol. %% */ +#define etCHARX 9 /* Characters. %c */ +/* The rest are extensions, not normally found in printf() */ +#define etSQLESCAPE 10 /* Strings with '\'' doubled. %q */ +#define etSQLESCAPE2 11 /* Strings with '\'' doubled and enclosed in '', + NULL pointers replaced by SQL NULL. %Q */ +#define etPOINTER 14 /* The %p conversion */ +#define etSQLESCAPE3 15 /* %w -> Strings with '\"' doubled */ +#define etORDINAL 16 /* %r -> 1st, 2nd, 3rd, 4th, etc. English only */ + +#define etINVALID 0 /* Any unrecognized conversion type */ + + +/* +** An "etByte" is an 8-bit unsigned value. +*/ +typedef unsigned char etByte; + +/* +** Each builtin conversion character (ex: the 'd' in "%d") is described +** by an instance of the following structure +*/ +typedef struct et_info { /* Information about each format field */ + char fmttype; /* The format field code letter */ + etByte base; /* The base for radix conversion */ + etByte flags; /* One or more of FLAG_ constants below */ + etByte type; /* Conversion paradigm */ + etByte charset; /* Offset into aDigits[] of the digits string */ + etByte prefix; /* Offset into aPrefix[] of the prefix string */ +} et_info; + +/* +** Allowed values for et_info.flags +*/ +#define FLAG_SIGNED 1 /* True if the value to convert is signed */ +#define FLAG_INTERN 2 /* True if for internal use only */ +#define FLAG_STRING 4 /* Allow infinity precision */ + + +/* +** The following table is searched linearly, so it is good to put the +** most frequently used conversion types first. +*/ +static const char aDigits[] = "0123456789ABCDEF0123456789abcdef"; +static const char aPrefix[] = "-x0\000X0"; +static const et_info fmtinfo[] = { + { 'd', 10, 1, etRADIX, 0, 0 }, + { 's', 0, 4, etSTRING, 0, 0 }, + { 'g', 0, 1, etGENERIC, 30, 0 }, + { 'z', 0, 4, etDYNSTRING, 0, 0 }, + { 'q', 0, 4, etSQLESCAPE, 0, 0 }, + { 'Q', 0, 4, etSQLESCAPE2, 0, 0 }, + { 'w', 0, 4, etSQLESCAPE3, 0, 0 }, + { 'c', 0, 0, etCHARX, 0, 0 }, + { 'o', 8, 0, etRADIX, 0, 2 }, + { 'u', 10, 0, etRADIX, 0, 0 }, + { 'x', 16, 0, etRADIX, 16, 1 }, + { 'X', 16, 0, etRADIX, 0, 4 }, +#ifndef SQLITE_OMIT_FLOATING_POINT + { 'f', 0, 1, etFLOAT, 0, 0 }, + { 'e', 0, 1, etEXP, 30, 0 }, + { 'E', 0, 1, etEXP, 14, 0 }, + { 'G', 0, 1, etGENERIC, 14, 0 }, +#endif + { 'i', 10, 1, etRADIX, 0, 0 }, + { 'n', 0, 0, etSIZE, 0, 0 }, + { '%', 0, 0, etPERCENT, 0, 0 }, + { 'p', 16, 0, etPOINTER, 0, 1 }, + +/* All the rest have the FLAG_INTERN bit set and are thus for internal +** use only */ + { 'r', 10, 3, etORDINAL, 0, 0 }, +}; + +/* +** If SQLITE_OMIT_FLOATING_POINT is defined, then none of the floating point +** conversions will work. +*/ +#ifndef SQLITE_OMIT_FLOATING_POINT +/* +** "*val" is a double such that 0.1 <= *val < 10.0 +** Return the ascii code for the leading digit of *val, then +** multiply "*val" by 10.0 to renormalize. +** +** Example: +** input: *val = 3.14159 +** output: *val = 1.4159 function return = '3' +** +** The counter *cnt is incremented each time. After counter exceeds +** 16 (the number of significant digits in a 64-bit float) '0' is +** always returned. +*/ +static char et_getdigit(LONGDOUBLE_TYPE *val, int *cnt){ + int digit; + LONGDOUBLE_TYPE d; + if( (*cnt)<=0 ) return '0'; + (*cnt)--; + digit = (int)*val; + d = digit; + digit += '0'; + *val = (*val - d)*10.0; + return (char)digit; +} +#endif /* SQLITE_OMIT_FLOATING_POINT */ + +/* +** Set the StrAccum object to an error mode. +*/ +static void setStrAccumError(StrAccum *p, u8 eError){ + assert( eError==STRACCUM_NOMEM || eError==STRACCUM_TOOBIG ); + p->accError = eError; + p->nAlloc = 0; +} + +/* +** Extra argument values from a PrintfArguments object +*/ +static sqlite3_int64 getIntArg(PrintfArguments *p){ + if( p->nArg<=p->nUsed ) return 0; + return sqlite3VdbeIntValue((Mem*)p->apArg[p->nUsed++]); +} +static double getDoubleArg(PrintfArguments *p){ + if( p->nArg<=p->nUsed ) return 0.0; + return sqlite3VdbeRealValue((Mem*)p->apArg[p->nUsed++]); +} +static char *getTextArg(PrintfArguments *p){ + if( p->nArg<=p->nUsed ) return 0; + return (char*)sqlite3ValueText(p->apArg[p->nUsed++], SQLITE_UTF8); +} + + +/* +** On machines with a small stack size, you can redefine the +** SQLITE_PRINT_BUF_SIZE to be something smaller, if desired. +*/ +#ifndef SQLITE_PRINT_BUF_SIZE +# define SQLITE_PRINT_BUF_SIZE 70 +#endif +#define etBUFSIZE SQLITE_PRINT_BUF_SIZE /* Size of the output buffer */ + +/* +** Render a string given by "fmt" into the StrAccum object. +*/ +SQLITE_PRIVATE void sqlite3VXPrintf( + StrAccum *pAccum, /* Accumulate results here */ + u32 bFlags, /* SQLITE_PRINTF_* flags */ + const char *fmt, /* Format string */ + va_list ap /* arguments */ +){ + int c; /* Next character in the format string */ + char *bufpt; /* Pointer to the conversion buffer */ + int precision; /* Precision of the current field */ + int length; /* Length of the field */ + int idx; /* A general purpose loop counter */ + int width; /* Width of the current field */ + etByte flag_leftjustify; /* True if "-" flag is present */ + etByte flag_plussign; /* True if "+" flag is present */ + etByte flag_blanksign; /* True if " " flag is present */ + etByte flag_alternateform; /* True if "#" flag is present */ + etByte flag_altform2; /* True if "!" flag is present */ + etByte flag_zeropad; /* True if field width constant starts with zero */ + etByte flag_long; /* True if "l" flag is present */ + etByte flag_longlong; /* True if the "ll" flag is present */ + etByte done; /* Loop termination flag */ + etByte xtype = 0; /* Conversion paradigm */ + u8 bArgList; /* True for SQLITE_PRINTF_SQLFUNC */ + u8 useIntern; /* Ok to use internal conversions (ex: %T) */ + char prefix; /* Prefix character. "+" or "-" or " " or '\0'. */ + sqlite_uint64 longvalue; /* Value for integer types */ + LONGDOUBLE_TYPE realvalue; /* Value for real types */ + const et_info *infop; /* Pointer to the appropriate info structure */ + char *zOut; /* Rendering buffer */ + int nOut; /* Size of the rendering buffer */ + char *zExtra = 0; /* Malloced memory used by some conversion */ +#ifndef SQLITE_OMIT_FLOATING_POINT + int exp, e2; /* exponent of real numbers */ + int nsd; /* Number of significant digits returned */ + double rounder; /* Used for rounding floating point values */ + etByte flag_dp; /* True if decimal point should be shown */ + etByte flag_rtz; /* True if trailing zeros should be removed */ +#endif + PrintfArguments *pArgList = 0; /* Arguments for SQLITE_PRINTF_SQLFUNC */ + char buf[etBUFSIZE]; /* Conversion buffer */ + + bufpt = 0; + if( bFlags ){ + if( (bArgList = (bFlags & SQLITE_PRINTF_SQLFUNC))!=0 ){ + pArgList = va_arg(ap, PrintfArguments*); + } + useIntern = bFlags & SQLITE_PRINTF_INTERNAL; + }else{ + bArgList = useIntern = 0; + } + for(; (c=(*fmt))!=0; ++fmt){ + if( c!='%' ){ + bufpt = (char *)fmt; +#if HAVE_STRCHRNUL + fmt = strchrnul(fmt, '%'); +#else + do{ fmt++; }while( *fmt && *fmt != '%' ); +#endif + sqlite3StrAccumAppend(pAccum, bufpt, (int)(fmt - bufpt)); + if( *fmt==0 ) break; + } + if( (c=(*++fmt))==0 ){ + sqlite3StrAccumAppend(pAccum, "%", 1); + break; + } + /* Find out what flags are present */ + flag_leftjustify = flag_plussign = flag_blanksign = + flag_alternateform = flag_altform2 = flag_zeropad = 0; + done = 0; + do{ + switch( c ){ + case '-': flag_leftjustify = 1; break; + case '+': flag_plussign = 1; break; + case ' ': flag_blanksign = 1; break; + case '#': flag_alternateform = 1; break; + case '!': flag_altform2 = 1; break; + case '0': flag_zeropad = 1; break; + default: done = 1; break; + } + }while( !done && (c=(*++fmt))!=0 ); + /* Get the field width */ + if( c=='*' ){ + if( bArgList ){ + width = (int)getIntArg(pArgList); + }else{ + width = va_arg(ap,int); + } + if( width<0 ){ + flag_leftjustify = 1; + width = width >= -2147483647 ? -width : 0; + } + c = *++fmt; + }else{ + unsigned wx = 0; + while( c>='0' && c<='9' ){ + wx = wx*10 + c - '0'; + c = *++fmt; + } + testcase( wx>0x7fffffff ); + width = wx & 0x7fffffff; + } + + /* Get the precision */ + if( c=='.' ){ + c = *++fmt; + if( c=='*' ){ + if( bArgList ){ + precision = (int)getIntArg(pArgList); + }else{ + precision = va_arg(ap,int); + } + c = *++fmt; + if( precision<0 ){ + precision = precision >= -2147483647 ? -precision : -1; + } + }else{ + unsigned px = 0; + while( c>='0' && c<='9' ){ + px = px*10 + c - '0'; + c = *++fmt; + } + testcase( px>0x7fffffff ); + precision = px & 0x7fffffff; + } + }else{ + precision = -1; + } + /* Get the conversion type modifier */ + if( c=='l' ){ + flag_long = 1; + c = *++fmt; + if( c=='l' ){ + flag_longlong = 1; + c = *++fmt; + }else{ + flag_longlong = 0; + } + }else{ + flag_long = flag_longlong = 0; + } + /* Fetch the info entry for the field */ + infop = &fmtinfo[0]; + xtype = etINVALID; + for(idx=0; idx<ArraySize(fmtinfo); idx++){ + if( c==fmtinfo[idx].fmttype ){ + infop = &fmtinfo[idx]; + if( useIntern || (infop->flags & FLAG_INTERN)==0 ){ + xtype = infop->type; + }else{ + return; + } + break; + } + } + + /* + ** At this point, variables are initialized as follows: + ** + ** flag_alternateform TRUE if a '#' is present. + ** flag_altform2 TRUE if a '!' is present. + ** flag_plussign TRUE if a '+' is present. + ** flag_leftjustify TRUE if a '-' is present or if the + ** field width was negative. + ** flag_zeropad TRUE if the width began with 0. + ** flag_long TRUE if the letter 'l' (ell) prefixed + ** the conversion character. + ** flag_longlong TRUE if the letter 'll' (ell ell) prefixed + ** the conversion character. + ** flag_blanksign TRUE if a ' ' is present. + ** width The specified field width. This is + ** always non-negative. Zero is the default. + ** precision The specified precision. The default + ** is -1. + ** xtype The class of the conversion. + ** infop Pointer to the appropriate info struct. + */ + switch( xtype ){ + case etPOINTER: + flag_longlong = sizeof(char*)==sizeof(i64); + flag_long = sizeof(char*)==sizeof(long int); + /* Fall through into the next case */ + case etORDINAL: + case etRADIX: + if( infop->flags & FLAG_SIGNED ){ + i64 v; + if( bArgList ){ + v = getIntArg(pArgList); + }else if( flag_longlong ){ + v = va_arg(ap,i64); + }else if( flag_long ){ + v = va_arg(ap,long int); + }else{ + v = va_arg(ap,int); + } + if( v<0 ){ + if( v==SMALLEST_INT64 ){ + longvalue = ((u64)1)<<63; + }else{ + longvalue = -v; + } + prefix = '-'; + }else{ + longvalue = v; + if( flag_plussign ) prefix = '+'; + else if( flag_blanksign ) prefix = ' '; + else prefix = 0; + } + }else{ + if( bArgList ){ + longvalue = (u64)getIntArg(pArgList); + }else if( flag_longlong ){ + longvalue = va_arg(ap,u64); + }else if( flag_long ){ + longvalue = va_arg(ap,unsigned long int); + }else{ + longvalue = va_arg(ap,unsigned int); + } + prefix = 0; + } + if( longvalue==0 ) flag_alternateform = 0; + if( flag_zeropad && precision<width-(prefix!=0) ){ + precision = width-(prefix!=0); + } + if( precision<etBUFSIZE-10 ){ + nOut = etBUFSIZE; + zOut = buf; + }else{ + nOut = precision + 10; + zOut = zExtra = sqlite3Malloc( nOut ); + if( zOut==0 ){ + setStrAccumError(pAccum, STRACCUM_NOMEM); + return; + } + } + bufpt = &zOut[nOut-1]; + if( xtype==etORDINAL ){ + static const char zOrd[] = "thstndrd"; + int x = (int)(longvalue % 10); + if( x>=4 || (longvalue/10)%10==1 ){ + x = 0; + } + *(--bufpt) = zOrd[x*2+1]; + *(--bufpt) = zOrd[x*2]; + } + { + const char *cset = &aDigits[infop->charset]; + u8 base = infop->base; + do{ /* Convert to ascii */ + *(--bufpt) = cset[longvalue%base]; + longvalue = longvalue/base; + }while( longvalue>0 ); + } + length = (int)(&zOut[nOut-1]-bufpt); + for(idx=precision-length; idx>0; idx--){ + *(--bufpt) = '0'; /* Zero pad */ + } + if( prefix ) *(--bufpt) = prefix; /* Add sign */ + if( flag_alternateform && infop->prefix ){ /* Add "0" or "0x" */ + const char *pre; + char x; + pre = &aPrefix[infop->prefix]; + for(; (x=(*pre))!=0; pre++) *(--bufpt) = x; + } + length = (int)(&zOut[nOut-1]-bufpt); + break; + case etFLOAT: + case etEXP: + case etGENERIC: + if( bArgList ){ + realvalue = getDoubleArg(pArgList); + }else{ + realvalue = va_arg(ap,double); + } +#ifdef SQLITE_OMIT_FLOATING_POINT + length = 0; +#else + if( precision<0 ) precision = 6; /* Set default precision */ + if( realvalue<0.0 ){ + realvalue = -realvalue; + prefix = '-'; + }else{ + if( flag_plussign ) prefix = '+'; + else if( flag_blanksign ) prefix = ' '; + else prefix = 0; + } + if( xtype==etGENERIC && precision>0 ) precision--; + testcase( precision>0xfff ); + for(idx=precision&0xfff, rounder=0.5; idx>0; idx--, rounder*=0.1){} + if( xtype==etFLOAT ) realvalue += rounder; + /* Normalize realvalue to within 10.0 > realvalue >= 1.0 */ + exp = 0; + if( sqlite3IsNaN((double)realvalue) ){ + bufpt = "NaN"; + length = 3; + break; + } + if( realvalue>0.0 ){ + LONGDOUBLE_TYPE scale = 1.0; + while( realvalue>=1e100*scale && exp<=350 ){ scale *= 1e100;exp+=100;} + while( realvalue>=1e10*scale && exp<=350 ){ scale *= 1e10; exp+=10; } + while( realvalue>=10.0*scale && exp<=350 ){ scale *= 10.0; exp++; } + realvalue /= scale; + while( realvalue<1e-8 ){ realvalue *= 1e8; exp-=8; } + while( realvalue<1.0 ){ realvalue *= 10.0; exp--; } + if( exp>350 ){ + bufpt = buf; + buf[0] = prefix; + memcpy(buf+(prefix!=0),"Inf",4); + length = 3+(prefix!=0); + break; + } + } + bufpt = buf; + /* + ** If the field type is etGENERIC, then convert to either etEXP + ** or etFLOAT, as appropriate. + */ + if( xtype!=etFLOAT ){ + realvalue += rounder; + if( realvalue>=10.0 ){ realvalue *= 0.1; exp++; } + } + if( xtype==etGENERIC ){ + flag_rtz = !flag_alternateform; + if( exp<-4 || exp>precision ){ + xtype = etEXP; + }else{ + precision = precision - exp; + xtype = etFLOAT; + } + }else{ + flag_rtz = flag_altform2; + } + if( xtype==etEXP ){ + e2 = 0; + }else{ + e2 = exp; + } + if( MAX(e2,0)+(i64)precision+(i64)width > etBUFSIZE - 15 ){ + bufpt = zExtra + = sqlite3Malloc( MAX(e2,0)+(i64)precision+(i64)width+15 ); + if( bufpt==0 ){ + setStrAccumError(pAccum, STRACCUM_NOMEM); + return; + } + } + zOut = bufpt; + nsd = 16 + flag_altform2*10; + flag_dp = (precision>0 ?1:0) | flag_alternateform | flag_altform2; + /* The sign in front of the number */ + if( prefix ){ + *(bufpt++) = prefix; + } + /* Digits prior to the decimal point */ + if( e2<0 ){ + *(bufpt++) = '0'; + }else{ + for(; e2>=0; e2--){ + *(bufpt++) = et_getdigit(&realvalue,&nsd); + } + } + /* The decimal point */ + if( flag_dp ){ + *(bufpt++) = '.'; + } + /* "0" digits after the decimal point but before the first + ** significant digit of the number */ + for(e2++; e2<0; precision--, e2++){ + assert( precision>0 ); + *(bufpt++) = '0'; + } + /* Significant digits after the decimal point */ + while( (precision--)>0 ){ + *(bufpt++) = et_getdigit(&realvalue,&nsd); + } + /* Remove trailing zeros and the "." if no digits follow the "." */ + if( flag_rtz && flag_dp ){ + while( bufpt[-1]=='0' ) *(--bufpt) = 0; + assert( bufpt>zOut ); + if( bufpt[-1]=='.' ){ + if( flag_altform2 ){ + *(bufpt++) = '0'; + }else{ + *(--bufpt) = 0; + } + } + } + /* Add the "eNNN" suffix */ + if( xtype==etEXP ){ + *(bufpt++) = aDigits[infop->charset]; + if( exp<0 ){ + *(bufpt++) = '-'; exp = -exp; + }else{ + *(bufpt++) = '+'; + } + if( exp>=100 ){ + *(bufpt++) = (char)((exp/100)+'0'); /* 100's digit */ + exp %= 100; + } + *(bufpt++) = (char)(exp/10+'0'); /* 10's digit */ + *(bufpt++) = (char)(exp%10+'0'); /* 1's digit */ + } + *bufpt = 0; + + /* The converted number is in buf[] and zero terminated. Output it. + ** Note that the number is in the usual order, not reversed as with + ** integer conversions. */ + length = (int)(bufpt-zOut); + bufpt = zOut; + + /* Special case: Add leading zeros if the flag_zeropad flag is + ** set and we are not left justified */ + if( flag_zeropad && !flag_leftjustify && length < width){ + int i; + int nPad = width - length; + for(i=width; i>=nPad; i--){ + bufpt[i] = bufpt[i-nPad]; + } + i = prefix!=0; + while( nPad-- ) bufpt[i++] = '0'; + length = width; + } +#endif /* !defined(SQLITE_OMIT_FLOATING_POINT) */ + break; + case etSIZE: + if( !bArgList ){ + *(va_arg(ap,int*)) = pAccum->nChar; + } + length = width = 0; + break; + case etPERCENT: + buf[0] = '%'; + bufpt = buf; + length = 1; + break; + case etCHARX: + if( bArgList ){ + bufpt = getTextArg(pArgList); + c = bufpt ? bufpt[0] : 0; + }else{ + c = va_arg(ap,int); + } + if( precision>1 ){ + width -= precision-1; + if( width>1 && !flag_leftjustify ){ + sqlite3AppendChar(pAccum, width-1, ' '); + width = 0; + } + sqlite3AppendChar(pAccum, precision-1, c); + } + length = 1; + buf[0] = c; + bufpt = buf; + break; + case etSTRING: + case etDYNSTRING: + if( bArgList ){ + bufpt = getTextArg(pArgList); + xtype = etSTRING; + }else{ + bufpt = va_arg(ap,char*); + } + if( bufpt==0 ){ + bufpt = ""; + }else if( xtype==etDYNSTRING ){ + zExtra = bufpt; + } + if( precision>=0 ){ + for(length=0; length<precision && bufpt[length]; length++){} + }else{ + length = sqlite3Strlen30(bufpt); + } + break; + case etSQLESCAPE: /* Escape ' characters */ + case etSQLESCAPE2: /* Escape ' and enclose in '...' */ + case etSQLESCAPE3: { /* Escape " characters */ + int i, j, k, n, isnull; + int needQuote; + char ch; + char q = ((xtype==etSQLESCAPE3)?'"':'\''); /* Quote character */ + char *escarg; + + if( bArgList ){ + escarg = getTextArg(pArgList); + }else{ + escarg = va_arg(ap,char*); + } + isnull = escarg==0; + if( isnull ) escarg = (xtype==etSQLESCAPE2 ? "NULL" : "(NULL)"); + k = precision; + for(i=n=0; k!=0 && (ch=escarg[i])!=0; i++, k--){ + if( ch==q ) n++; + } + needQuote = !isnull && xtype==etSQLESCAPE2; + n += i + 3; + if( n>etBUFSIZE ){ + bufpt = zExtra = sqlite3Malloc( n ); + if( bufpt==0 ){ + setStrAccumError(pAccum, STRACCUM_NOMEM); + return; + } + }else{ + bufpt = buf; + } + j = 0; + if( needQuote ) bufpt[j++] = q; + k = i; + for(i=0; i<k; i++){ + bufpt[j++] = ch = escarg[i]; + if( ch==q ) bufpt[j++] = ch; + } + if( needQuote ) bufpt[j++] = q; + bufpt[j] = 0; + length = j; + /* The precision in %q and %Q means how many input characters to + ** consume, not the length of the output... + ** if( precision>=0 && precision<length ) length = precision; */ + break; + } + default: { + assert( xtype==etINVALID ); + return; + } + }/* End switch over the format type */ + /* + ** The text of the conversion is pointed to by "bufpt" and is + ** "length" characters long. The field width is "width". Do + ** the output. + */ + width -= length; + if( width>0 && !flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' '); + sqlite3StrAccumAppend(pAccum, bufpt, length); + if( width>0 && flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' '); + + if( zExtra ){ + sqlite3_free(zExtra); + zExtra = 0; + } + }/* End for loop over the format string */ +} /* End of function */ + +/* +** Enlarge the memory allocation on a StrAccum object so that it is +** able to accept at least N more bytes of text. +** +** Return the number of bytes of text that StrAccum is able to accept +** after the attempted enlargement. The value returned might be zero. +*/ +static int sqlite3StrAccumEnlarge(StrAccum *p, int N){ + char *zNew; + assert( p->nChar+(i64)N >= p->nAlloc ); /* Only called if really needed */ + if( p->accError ){ + testcase(p->accError==STRACCUM_TOOBIG); + testcase(p->accError==STRACCUM_NOMEM); + return 0; + } + if( p->mxAlloc==0 ){ + N = p->nAlloc - p->nChar - 1; + setStrAccumError(p, STRACCUM_TOOBIG); + return N; + }else{ + char *zOld = (p->zText==p->zBase ? 0 : p->zText); + i64 szNew = p->nChar; + szNew += N + 1; + if( szNew+p->nChar<=p->mxAlloc ){ + /* Force exponential buffer size growth as long as it does not overflow, + ** to avoid having to call this routine too often */ + szNew += p->nChar; + } + if( szNew > p->mxAlloc ){ + sqlite3StrAccumReset(p); + setStrAccumError(p, STRACCUM_TOOBIG); + return 0; + }else{ + p->nAlloc = (int)szNew; + } + if( p->pBtree ){ + zNew = sqlite3DbRealloc(p->pBtree, zOld, p->nAlloc); + }else{ + zNew = sqlite3_realloc64(zOld, p->nAlloc); + } + if( zNew ){ + assert( p->zText!=0 || p->nChar==0 ); + if( zOld==0 && p->nChar>0 ) memcpy(zNew, p->zText, p->nChar); + p->zText = zNew; + p->nAlloc = sqlite3DbMallocSize(p->pBtree, zNew); + }else{ + sqlite3StrAccumReset(p); + setStrAccumError(p, STRACCUM_NOMEM); + return 0; + } + } + return N; +} + +/* +** Append N copies of character c to the given string buffer. +*/ +SQLITE_PRIVATE void sqlite3AppendChar(StrAccum *p, int N, char c){ + testcase( p->nChar + (i64)N > 0x7fffffff ); + if( p->nChar+(i64)N >= p->nAlloc && (N = sqlite3StrAccumEnlarge(p, N))<=0 ){ + return; + } + while( (N--)>0 ) p->zText[p->nChar++] = c; +} + +/* +** The StrAccum "p" is not large enough to accept N new bytes of z[]. +** So enlarge if first, then do the append. +** +** This is a helper routine to sqlite3StrAccumAppend() that does special-case +** work (enlarging the buffer) using tail recursion, so that the +** sqlite3StrAccumAppend() routine can use fast calling semantics. +*/ +static void SQLITE_NOINLINE enlargeAndAppend(StrAccum *p, const char *z, int N){ + N = sqlite3StrAccumEnlarge(p, N); + if( N>0 ){ + memcpy(&p->zText[p->nChar], z, N); + p->nChar += N; + } +} + +/* +** Append N bytes of text from z to the StrAccum object. Increase the +** size of the memory allocation for StrAccum if necessary. +*/ +SQLITE_PRIVATE void sqlite3StrAccumAppend(StrAccum *p, const char *z, int N){ + assert( z!=0 || N==0 ); + assert( p->zText!=0 || p->nChar==0 || p->accError ); + assert( N>=0 ); + assert( p->accError==0 || p->nAlloc==0 ); + if( p->nChar+N >= p->nAlloc ){ + enlargeAndAppend(p,z,N); + }else{ + assert( p->zText ); + p->nChar += N; + memcpy(&p->zText[p->nChar-N], z, N); + } +} + +/* +** Finish off a string by making sure it is zero-terminated. +** Return a pointer to the resulting string. Return a NULL +** pointer if any kind of error was encountered. +*/ +SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum *p){ + if( p->zText ){ + p->zText[p->nChar] = 0; + if( p->mxAlloc>0 && p->zText==p->zBase ){ + p->zText = sqlite3DbMallocRaw(p->pBtree, p->nChar+1 ); + if( p->zText ){ + memcpy(p->zText, p->zBase, p->nChar+1); + }else{ + setStrAccumError(p, STRACCUM_NOMEM); + } + } + } + return p->zText; +} + +/* +** Reset an StrAccum string. Reclaim all malloced memory. +*/ +SQLITE_PRIVATE void sqlite3StrAccumReset(StrAccum *p){ + if( p->zText!=p->zBase ){ + sqlite3DbFree(p->pBtree, p->zText); + } + p->zText = 0; +} + +/* +** Initialize a string accumulator. +** +** p: The accumulator to be initialized. +** db: Pointer to a database connection. May be NULL. Lookaside +** memory is used if not NULL. db->mallocFailed is set appropriately +** when not NULL. +** zBase: An initial buffer. May be NULL in which case the initial buffer +** is malloced. +** n: Size of zBase in bytes. If total space requirements never exceed +** n then no memory allocations ever occur. +** mx: Maximum number of bytes to accumulate. If mx==0 then no memory +** allocations will ever occur. +*/ +SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum *p, Btree* pBtree, char *zBase, int n, int mx){ + p->zText = p->zBase = zBase; + p->pBtree = pBtree; + p->nChar = 0; + p->nAlloc = n; + p->mxAlloc = mx; + p->accError = 0; +} + +/* +** Print into memory obtained from sqlite3_malloc(). Omit the internal +** %-conversion extensions. +*/ +SQLITE_PRIVATE char *sqlite3_vmprintf(const char *zFormat, va_list ap){ + char *z; + char zBase[SQLITE_PRINT_BUF_SIZE]; + StrAccum acc; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( zFormat==0 ){ + (void)SQLITE_MISUSE_BKPT; + return 0; + } +#endif +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3BtreeInitialize() ) return 0; +#endif + sqlite3StrAccumInit(&acc, 0, zBase, sizeof(zBase), SQLITE_MAX_LENGTH); + sqlite3VXPrintf(&acc, 0, zFormat, ap); + z = sqlite3StrAccumFinish(&acc); + return z; +} + +/* +** Print into memory obtained from sqlite3_malloc()(). Omit the internal +** %-conversion extensions. +*/ +SQLITE_PRIVATE char *sqlite3_mprintf(const char *zFormat, ...){ + va_list ap; + char *z; +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3BtreeInitialize() ) return 0; +#endif + va_start(ap, zFormat); + z = sqlite3_vmprintf(zFormat, ap); + va_end(ap); + return z; +} + +/* +** sqlite3_snprintf() works like snprintf() except that it ignores the +** current locale settings. This is important for SQLite because we +** are not able to use a "," as the decimal point in place of "." as +** specified by some locales. +** +** Oops: The first two arguments of sqlite3_snprintf() are backwards +** from the snprintf() standard. Unfortunately, it is too late to change +** this without breaking compatibility, so we just have to live with the +** mistake. +** +** sqlite3_vsnprintf() is the varargs version. +*/ +SQLITE_PRIVATE char *sqlite3_vsnprintf(int n, char *zBuf, const char *zFormat, va_list ap){ + StrAccum acc; + if( n<=0 ) return zBuf; +#ifdef SQLITE_ENABLE_API_ARMOR + if( zBuf==0 || zFormat==0 ) { + (void)SQLITE_MISUSE_BKPT; + if( zBuf ) zBuf[0] = 0; + return zBuf; + } +#endif + sqlite3StrAccumInit(&acc, 0, zBuf, n, 0); + sqlite3VXPrintf(&acc, 0, zFormat, ap); + return sqlite3StrAccumFinish(&acc); +} +SQLITE_PRIVATE char *sqlite3_snprintf(int n, char *zBuf, const char *zFormat, ...){ + char *z; + va_list ap; + va_start(ap,zFormat); + z = sqlite3_vsnprintf(n, zBuf, zFormat, ap); + va_end(ap); + return z; +} + +/* +** This is the routine that actually formats the sqlite3_log() message. +** We house it in a separate routine from sqlite3_log() to avoid using +** stack space on small-stack systems when logging is disabled. +** +** sqlite3_log() must render into a static buffer. It cannot dynamically +** allocate memory because it might be called while the memory allocator +** mutex is held. +** +** sqlite3VXPrintf() might ask for *temporary* memory allocations for +** certain format characters (%q) or for very large precisions or widths. +** Care must be taken that any sqlite3_log() calls that occur while the +** memory mutex is held do not use these mechanisms. +*/ +static void renderLogMsg(int iErrCode, const char *zFormat, va_list ap){ + StrAccum acc; /* String accumulator */ + char zMsg[SQLITE_PRINT_BUF_SIZE*3]; /* Complete log message */ + + sqlite3StrAccumInit(&acc, 0, zMsg, sizeof(zMsg), 0); + sqlite3VXPrintf(&acc, 0, zFormat, ap); + sqlite3GlobalConfig.xLog(sqlite3GlobalConfig.pLogArg, iErrCode, + sqlite3StrAccumFinish(&acc)); +} + +/* +** Format and write a message to the log if logging is enabled. +*/ +SQLITE_PRIVATE void sqlite3_log(int iErrCode, const char *zFormat, ...){ + va_list ap; /* Vararg list */ + if( sqlite3GlobalConfig.xLog ){ + va_start(ap, zFormat); + renderLogMsg(iErrCode, zFormat, ap); + va_end(ap); + } +} + +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) +/* +** A version of printf() that understands %lld. Used for debugging. +** The printf() built into some versions of windows does not understand %lld +** and segfaults if you give it a long long int. +*/ +SQLITE_PRIVATE void sqlite3DebugPrintf(const char *zFormat, ...){ + va_list ap; + StrAccum acc; + char zBuf[500]; + sqlite3StrAccumInit(&acc, 0, zBuf, sizeof(zBuf), 0); + va_start(ap,zFormat); + sqlite3VXPrintf(&acc, 0, zFormat, ap); + va_end(ap); + sqlite3StrAccumFinish(&acc); + fprintf(stdout,"%s", zBuf); + fflush(stdout); +} +#endif + + +/* +** variable-argument wrapper around sqlite3VXPrintf(). The bFlags argument +** can contain the bit SQLITE_PRINTF_INTERNAL enable internal formats. +*/ +SQLITE_PRIVATE void sqlite3XPrintf(StrAccum *p, u32 bFlags, const char *zFormat, ...){ + va_list ap; + va_start(ap,zFormat); + sqlite3VXPrintf(p, bFlags, zFormat, ap); + va_end(ap); +} + +/************** End of printf.c **********************************************/ +/************** Begin file random.c ******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code to implement a pseudo-random number +** generator (PRNG) for SQLite. +** +** Random numbers are used by some of the database backends in order +** to generate random integer keys for tables or random filenames. +*/ +/* #include "sqliteInt.h" */ + + +/* All threads share a single random number generator. +** This structure is the current state of the generator. +*/ +static SQLITE_WSD struct sqlite3PrngType { + unsigned char isInit; /* True if initialized */ + unsigned char i, j; /* State variables */ + unsigned char s[256]; /* State variables */ +} sqlite3Prng; + +/* +** Return N random bytes. +*/ +SQLITE_PRIVATE void sqlite3_randomness(int N, void *pBuf){ + unsigned char t; + unsigned char *zBuf = pBuf; + + /* The "wsdPrng" macro will resolve to the pseudo-random number generator + ** state vector. If writable static data is unsupported on the target, + ** we have to locate the state vector at run-time. In the more common + ** case where writable static data is supported, wsdPrng can refer directly + ** to the "sqlite3Prng" state vector declared above. + */ +#ifdef SQLITE_OMIT_WSD + struct sqlite3PrngType *p = &GLOBAL(struct sqlite3PrngType, sqlite3Prng); +# define wsdPrng p[0] +#else +# define wsdPrng sqlite3Prng +#endif + +#if SQLITE_THREADSAFE + sqlite3_mutex *mutex; +#endif + +#ifndef SQLITE_OMIT_AUTOINIT + if( sqlite3BtreeInitialize() ) return; +#endif + +#if SQLITE_THREADSAFE + mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_PRNG); +#endif + + sqlite3_mutex_enter(mutex); + if( N<=0 || pBuf==0 ){ + wsdPrng.isInit = 0; + sqlite3_mutex_leave(mutex); + return; + } + + /* Initialize the state of the random number generator once, + ** the first time this routine is called. The seed value does + ** not need to contain a lot of randomness since we are not + ** trying to do secure encryption or anything like that... + ** + ** Nothing in this file or anywhere else in SQLite does any kind of + ** encryption. The RC4 algorithm is being used as a PRNG (pseudo-random + ** number generator) not as an encryption device. + */ + if( !wsdPrng.isInit ){ + int i; + char k[256]; + wsdPrng.j = 0; + wsdPrng.i = 0; + sqlite3OsRandomness(sqlite3_vfs_find(0), 256, k); + for(i=0; i<256; i++){ + wsdPrng.s[i] = (u8)i; + } + for(i=0; i<256; i++){ + wsdPrng.j += wsdPrng.s[i] + k[i]; + t = wsdPrng.s[wsdPrng.j]; + wsdPrng.s[wsdPrng.j] = wsdPrng.s[i]; + wsdPrng.s[i] = t; + } + wsdPrng.isInit = 1; + } + + assert( N>0 ); + do{ + wsdPrng.i++; + t = wsdPrng.s[wsdPrng.i]; + wsdPrng.j += t; + wsdPrng.s[wsdPrng.i] = wsdPrng.s[wsdPrng.j]; + wsdPrng.s[wsdPrng.j] = t; + t += wsdPrng.s[wsdPrng.i]; + *(zBuf++) = wsdPrng.s[t]; + }while( --N ); + sqlite3_mutex_leave(mutex); +} + +/************** End of random.c **********************************************/ +/************** Begin file util.c ********************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Utility functions used throughout sqlite. +** +** This file contains functions for allocating memory, comparing +** strings, and stuff like that. +** +*/ +/* #include "sqliteInt.h" */ +/* #include <stdarg.h> */ +#if HAVE_ISNAN || SQLITE_HAVE_ISNAN +# include <math.h> +#endif + +/* +** Routine needed to support the testcase() macro. +*/ +#ifdef SQLITE_COVERAGE_TEST +SQLITE_PRIVATE void sqlite3Coverage(int x){ + static unsigned dummy = 0; + dummy += (unsigned)x; +} +#endif + +#ifndef SQLITE_OMIT_FLOATING_POINT +/* +** Return true if the floating point value is Not a Number (NaN). +** +** Use the math library isnan() function if compiled with SQLITE_HAVE_ISNAN. +** Otherwise, we have our own implementation that works on most systems. +*/ +SQLITE_PRIVATE int sqlite3IsNaN(double x){ + int rc; /* The value return */ +#if !SQLITE_HAVE_ISNAN && !HAVE_ISNAN + /* + ** Systems that support the isnan() library function should probably + ** make use of it by compiling with -DSQLITE_HAVE_ISNAN. But we have + ** found that many systems do not have a working isnan() function so + ** this implementation is provided as an alternative. + ** + ** This NaN test sometimes fails if compiled on GCC with -ffast-math. + ** On the other hand, the use of -ffast-math comes with the following + ** warning: + ** + ** This option [-ffast-math] should never be turned on by any + ** -O option since it can result in incorrect output for programs + ** which depend on an exact implementation of IEEE or ISO + ** rules/specifications for math functions. + ** + ** Under MSVC, this NaN test may fail if compiled with a floating- + ** point precision mode other than /fp:precise. From the MSDN + ** documentation: + ** + ** The compiler [with /fp:precise] will properly handle comparisons + ** involving NaN. For example, x != x evaluates to true if x is NaN + ** ... + */ +#ifdef __FAST_MATH__ +# error SQLite will not work correctly with the -ffast-math option of GCC. +#endif + volatile double y = x; + volatile double z = y; + rc = (y!=z); +#else /* if HAVE_ISNAN */ + rc = isnan(x); +#endif /* HAVE_ISNAN */ + testcase( rc ); + return rc; +} +#endif /* SQLITE_OMIT_FLOATING_POINT */ + +/* +** Compute a string length that is limited to what can be stored in +** lower 30 bits of a 32-bit signed integer. +** +** The value returned will never be negative. Nor will it ever be greater +** than the actual length of the string. For very long strings (greater +** than 1GiB) the value returned might be less than the true string length. +*/ +SQLITE_PRIVATE int sqlite3Strlen30(const char *z){ + if( z==0 ) return 0; + return 0x3fffffff & (int)strlen(z); +} + +/* +** The string z[] is an text representation of a real number. +** Convert this string to a double and write it into *pResult. +** +** The string z[] is length bytes in length (bytes, not characters) and +** uses the encoding enc. The string is not necessarily zero-terminated. +** +** Return TRUE if the result is a valid real number (or integer) and FALSE +** if the string is empty or contains extraneous text. Valid numbers +** are in one of these formats: +** +** [+-]digits[E[+-]digits] +** [+-]digits.[digits][E[+-]digits] +** [+-].digits[E[+-]digits] +** +** Leading and trailing whitespace is ignored for the purpose of determining +** validity. +** +** If some prefix of the input string is a valid number, this routine +** returns FALSE but it still converts the prefix and writes the result +** into *pResult. +*/ +SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 enc){ +#ifndef SQLITE_OMIT_FLOATING_POINT + int incr; + const char *zEnd = z + length; + /* sign * significand * (10 ^ (esign * exponent)) */ + int sign = 1; /* sign of significand */ + i64 s = 0; /* significand */ + int d = 0; /* adjust exponent for shifting decimal point */ + int esign = 1; /* sign of exponent */ + int e = 0; /* exponent */ + int eValid = 1; /* True exponent is either not used or is well-formed */ + double result; + int nDigits = 0; + int nonNum = 0; + + assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE ); + *pResult = 0.0; /* Default return value, in case of an error */ + + if( enc==SQLITE_UTF8 ){ + incr = 1; + }else{ + int i; + incr = 2; + assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 ); + for(i=3-enc; i<length && z[i]==0; i+=2){} + nonNum = i<length; + zEnd = z+i+enc-3; + z += (enc&1); + } + + /* skip leading spaces */ + while( z<zEnd && sqlite3Isspace(*z) ) z+=incr; + if( z>=zEnd ) return 0; + + /* get sign of significand */ + if( *z=='-' ){ + sign = -1; + z+=incr; + }else if( *z=='+' ){ + z+=incr; + } + + /* skip leading zeroes */ + while( z<zEnd && z[0]=='0' ) z+=incr, nDigits++; + + /* copy max significant digits to significand */ + while( z<zEnd && sqlite3Isdigit(*z) && s<((LARGEST_INT64-9)/10) ){ + s = s*10 + (*z - '0'); + z+=incr, nDigits++; + } + + /* skip non-significant significand digits + ** (increase exponent by d to shift decimal left) */ + while( z<zEnd && sqlite3Isdigit(*z) ) z+=incr, nDigits++, d++; + if( z>=zEnd ) goto do_atof_calc; + + /* if decimal point is present */ + if( *z=='.' ){ + z+=incr; + /* copy digits from after decimal to significand + ** (decrease exponent by d to shift decimal right) */ + while( z<zEnd && sqlite3Isdigit(*z) && s<((LARGEST_INT64-9)/10) ){ + s = s*10 + (*z - '0'); + z+=incr, nDigits++, d--; + } + /* skip non-significant digits */ + while( z<zEnd && sqlite3Isdigit(*z) ) z+=incr, nDigits++; + } + if( z>=zEnd ) goto do_atof_calc; + + /* if exponent is present */ + if( *z=='e' || *z=='E' ){ + z+=incr; + eValid = 0; + if( z>=zEnd ) goto do_atof_calc; + /* get sign of exponent */ + if( *z=='-' ){ + esign = -1; + z+=incr; + }else if( *z=='+' ){ + z+=incr; + } + /* copy digits to exponent */ + while( z<zEnd && sqlite3Isdigit(*z) ){ + e = e<10000 ? (e*10 + (*z - '0')) : 10000; + z+=incr; + eValid = 1; + } + } + + /* skip trailing spaces */ + if( nDigits && eValid ){ + while( z<zEnd && sqlite3Isspace(*z) ) z+=incr; + } + +do_atof_calc: + /* adjust exponent by d, and update sign */ + e = (e*esign) + d; + if( e<0 ) { + esign = -1; + e *= -1; + } else { + esign = 1; + } + + /* if 0 significand */ + if( !s ) { + /* In the IEEE 754 standard, zero is signed. + ** Add the sign if we've seen at least one digit */ + result = (sign<0 && nDigits) ? -(double)0 : (double)0; + } else { + /* attempt to reduce exponent */ + if( esign>0 ){ + while( s<(LARGEST_INT64/10) && e>0 ) e--,s*=10; + }else{ + while( !(s%10) && e>0 ) e--,s/=10; + } + + /* adjust the sign of significand */ + s = sign<0 ? -s : s; + + /* if exponent, scale significand as appropriate + ** and store in result. */ + if( e ){ + LONGDOUBLE_TYPE scale = 1.0; + /* attempt to handle extremely small/large numbers better */ + if( e>307 && e<342 ){ + while( e%308 ) { scale *= 1.0e+1; e -= 1; } + if( esign<0 ){ + result = s / scale; + result /= 1.0e+308; + }else{ + result = s * scale; + result *= 1.0e+308; + } + }else if( e>=342 ){ + if( esign<0 ){ + result = 0.0*s; + }else{ + result = 1e308*1e308*s; /* Infinity */ + } + }else{ + /* 1.0e+22 is the largest power of 10 than can be + ** represented exactly. */ + while( e%22 ) { scale *= 1.0e+1; e -= 1; } + while( e>0 ) { scale *= 1.0e+22; e -= 22; } + if( esign<0 ){ + result = s / scale; + }else{ + result = s * scale; + } + } + } else { + result = (double)s; + } + } + + /* store the result */ + *pResult = result; + + /* return true if number and no extra non-whitespace chracters after */ + return z>=zEnd && nDigits>0 && eValid && nonNum==0; +#else + return !sqlite3Atoi64(z, pResult, length, enc); +#endif /* SQLITE_OMIT_FLOATING_POINT */ +} + +/* +** Compare the 19-character string zNum against the text representation +** value 2^63: 9223372036854775808. Return negative, zero, or positive +** if zNum is less than, equal to, or greater than the string. +** Note that zNum must contain exactly 19 characters. +** +** Unlike memcmp() this routine is guaranteed to return the difference +** in the values of the last digit if the only difference is in the +** last digit. So, for example, +** +** compare2pow63("9223372036854775800", 1) +** +** will return -8. +*/ +static int compare2pow63(const char *zNum, int incr){ + int c = 0; + int i; + /* 012345678901234567 */ + const char *pow63 = "922337203685477580"; + for(i=0; c==0 && i<18; i++){ + c = (zNum[i*incr]-pow63[i])*10; + } + if( c==0 ){ + c = zNum[18*incr] - '8'; + testcase( c==(-1) ); + testcase( c==0 ); + testcase( c==(+1) ); + } + return c; +} + +/* +** Convert zNum to a 64-bit signed integer. zNum must be decimal. This +** routine does *not* accept hexadecimal notation. +** +** If the zNum value is representable as a 64-bit twos-complement +** integer, then write that value into *pNum and return 0. +** +** If zNum is exactly 9223372036854775808, return 2. This special +** case is broken out because while 9223372036854775808 cannot be a +** signed 64-bit integer, its negative -9223372036854775808 can be. +** +** If zNum is too big for a 64-bit integer and is not +** 9223372036854775808 or if zNum contains any non-numeric text, +** then return 1. +** +** length is the number of bytes in the string (bytes, not characters). +** The string is not necessarily zero-terminated. The encoding is +** given by enc. +*/ +SQLITE_PRIVATE int sqlite3Atoi64(const char *zNum, i64 *pNum, int length, u8 enc){ + int incr; + u64 u = 0; + int neg = 0; /* assume positive */ + int i; + int c = 0; + int nonNum = 0; + const char *zStart; + const char *zEnd = zNum + length; + assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE ); + if( enc==SQLITE_UTF8 ){ + incr = 1; + }else{ + incr = 2; + assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 ); + for(i=3-enc; i<length && zNum[i]==0; i+=2){} + nonNum = i<length; + zEnd = zNum+i+enc-3; + zNum += (enc&1); + } + while( zNum<zEnd && sqlite3Isspace(*zNum) ) zNum+=incr; + if( zNum<zEnd ){ + if( *zNum=='-' ){ + neg = 1; + zNum+=incr; + }else if( *zNum=='+' ){ + zNum+=incr; + } + } + zStart = zNum; + while( zNum<zEnd && zNum[0]=='0' ){ zNum+=incr; } /* Skip leading zeros. */ + for(i=0; &zNum[i]<zEnd && (c=zNum[i])>='0' && c<='9'; i+=incr){ + u = u*10 + c - '0'; + } + if( u>LARGEST_INT64 ){ + *pNum = neg ? SMALLEST_INT64 : LARGEST_INT64; + }else if( neg ){ + *pNum = -(i64)u; + }else{ + *pNum = (i64)u; + } + testcase( i==18 ); + testcase( i==19 ); + testcase( i==20 ); + if( (c!=0 && &zNum[i]<zEnd) || (i==0 && zStart==zNum) || i>19*incr || nonNum ){ + /* zNum is empty or contains non-numeric text or is longer + ** than 19 digits (thus guaranteeing that it is too large) */ + return 1; + }else if( i<19*incr ){ + /* Less than 19 digits, so we know that it fits in 64 bits */ + assert( u<=LARGEST_INT64 ); + return 0; + }else{ + /* zNum is a 19-digit numbers. Compare it against 9223372036854775808. */ + c = compare2pow63(zNum, incr); + if( c<0 ){ + /* zNum is less than 9223372036854775808 so it fits */ + assert( u<=LARGEST_INT64 ); + return 0; + }else if( c>0 ){ + /* zNum is greater than 9223372036854775808 so it overflows */ + return 1; + }else{ + /* zNum is exactly 9223372036854775808. Fits if negative. The + ** special case 2 overflow if positive */ + assert( u-1==LARGEST_INT64 ); + return neg ? 0 : 2; + } + } +} + +/* +** The variable-length integer encoding is as follows: +** +** KEY: +** A = 0xxxxxxx 7 bits of data and one flag bit +** B = 1xxxxxxx 7 bits of data and one flag bit +** C = xxxxxxxx 8 bits of data +** +** 7 bits - A +** 14 bits - BA +** 21 bits - BBA +** 28 bits - BBBA +** 35 bits - BBBBA +** 42 bits - BBBBBA +** 49 bits - BBBBBBA +** 56 bits - BBBBBBBA +** 64 bits - BBBBBBBBC +*/ + +/* +** Write a 64-bit variable-length integer to memory starting at p[0]. +** The length of data write will be between 1 and 9 bytes. The number +** of bytes written is returned. +** +** A variable-length integer consists of the lower 7 bits of each byte +** for all bytes that have the 8th bit set and one byte with the 8th +** bit clear. Except, if we get to the 9th byte, it stores the full +** 8 bits and is the last byte. +*/ +static int SQLITE_NOINLINE putVarint64(unsigned char *p, u64 v){ + int i, j, n; + u8 buf[10]; + if( v & (((u64)0xff000000)<<32) ){ + p[8] = (u8)v; + v >>= 8; + for(i=7; i>=0; i--){ + p[i] = (u8)((v & 0x7f) | 0x80); + v >>= 7; + } + return 9; + } + n = 0; + do{ + buf[n++] = (u8)((v & 0x7f) | 0x80); + v >>= 7; + }while( v!=0 ); + buf[0] &= 0x7f; + assert( n<=9 ); + for(i=0, j=n-1; j>=0; j--, i++){ + p[i] = buf[j]; + } + return n; +} +SQLITE_API int SQLITE_STDCALL sqlite3BtreePutVarint(unsigned char *p, u64 v){ + if( v<=0x7f ){ + p[0] = v&0x7f; + return 1; + } + if( v<=0x3fff ){ + p[0] = ((v>>7)&0x7f)|0x80; + p[1] = v&0x7f; + return 2; + } + return putVarint64(p,v); +} + +/* +** Bitmasks used by sqlite3GetVarint(). These precomputed constants +** are defined here rather than simply putting the constant expressions +** inline in order to work around bugs in the RVT compiler. +** +** SLOT_2_0 A mask for (0x7f<<14) | 0x7f +** +** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 +*/ +#define SLOT_2_0 0x001fc07f +#define SLOT_4_2_0 0xf01fc07f + + +/* +** Read a 64-bit variable-length integer from memory starting at p[0]. +** Return the number of bytes read. The value is stored in *v. +*/ +SQLITE_API u8 SQLITE_STDCALL sqlite3BtreeGetVarint(const unsigned char *p, u64 *v){ + u32 a,b,s; + + a = *p; + /* a: p0 (unmasked) */ + if (!(a&0x80)) + { + *v = a; + return 1; + } + + p++; + b = *p; + /* b: p1 (unmasked) */ + if (!(b&0x80)) + { + a &= 0x7f; + a = a<<7; + a |= b; + *v = a; + return 2; + } + + /* Verify that constants are precomputed correctly */ + assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) ); + assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) ); + + p++; + a = a<<14; + a |= *p; + /* a: p0<<14 | p2 (unmasked) */ + if (!(a&0x80)) + { + a &= SLOT_2_0; + b &= 0x7f; + b = b<<7; + a |= b; + *v = a; + return 3; + } + + /* CSE1 from below */ + a &= SLOT_2_0; + p++; + b = b<<14; + b |= *p; + /* b: p1<<14 | p3 (unmasked) */ + if (!(b&0x80)) + { + b &= SLOT_2_0; + /* moved CSE1 up */ + /* a &= (0x7f<<14)|(0x7f); */ + a = a<<7; + a |= b; + *v = a; + return 4; + } + + /* a: p0<<14 | p2 (masked) */ + /* b: p1<<14 | p3 (unmasked) */ + /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ + /* moved CSE1 up */ + /* a &= (0x7f<<14)|(0x7f); */ + b &= SLOT_2_0; + s = a; + /* s: p0<<14 | p2 (masked) */ + + p++; + a = a<<14; + a |= *p; + /* a: p0<<28 | p2<<14 | p4 (unmasked) */ + if (!(a&0x80)) + { + /* we can skip these cause they were (effectively) done above in calc'ing s */ + /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ + /* b &= (0x7f<<14)|(0x7f); */ + b = b<<7; + a |= b; + s = s>>18; + *v = ((u64)s)<<32 | a; + return 5; + } + + /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ + s = s<<7; + s |= b; + /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ + + p++; + b = b<<14; + b |= *p; + /* b: p1<<28 | p3<<14 | p5 (unmasked) */ + if (!(b&0x80)) + { + /* we can skip this cause it was (effectively) done above in calc'ing s */ + /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ + a &= SLOT_2_0; + a = a<<7; + a |= b; + s = s>>18; + *v = ((u64)s)<<32 | a; + return 6; + } + + p++; + a = a<<14; + a |= *p; + /* a: p2<<28 | p4<<14 | p6 (unmasked) */ + if (!(a&0x80)) + { + a &= SLOT_4_2_0; + b &= SLOT_2_0; + b = b<<7; + a |= b; + s = s>>11; + *v = ((u64)s)<<32 | a; + return 7; + } + + /* CSE2 from below */ + a &= SLOT_2_0; + p++; + b = b<<14; + b |= *p; + /* b: p3<<28 | p5<<14 | p7 (unmasked) */ + if (!(b&0x80)) + { + b &= SLOT_4_2_0; + /* moved CSE2 up */ + /* a &= (0x7f<<14)|(0x7f); */ + a = a<<7; + a |= b; + s = s>>4; + *v = ((u64)s)<<32 | a; + return 8; + } + + p++; + a = a<<15; + a |= *p; + /* a: p4<<29 | p6<<15 | p8 (unmasked) */ + + /* moved CSE2 up */ + /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ + b &= SLOT_2_0; + b = b<<8; + a |= b; + + s = s<<4; + b = p[-4]; + b &= 0x7f; + b = b>>3; + s |= b; + + *v = ((u64)s)<<32 | a; + + return 9; +} + +/* +** Read a 32-bit variable-length integer from memory starting at p[0]. +** Return the number of bytes read. The value is stored in *v. +** +** If the varint stored in p[0] is larger than can fit in a 32-bit unsigned +** integer, then set *v to 0xffffffff. +** +** A MACRO version, getVarint32, is provided which inlines the +** single-byte case. All code should use the MACRO version as +** this function assumes the single-byte case has already been handled. +*/ +SQLITE_API u8 SQLITE_STDCALL sqlite3BtreeGetVarint32(const unsigned char *p, u32 *v){ + u32 a,b; + + /* The 1-byte case. Overwhelmingly the most common. Handled inline + ** by the getVarin32() macro */ + a = *p; + /* a: p0 (unmasked) */ +#ifndef getVarint32 + if (!(a&0x80)) + { + /* Values between 0 and 127 */ + *v = a; + return 1; + } +#endif + + /* The 2-byte case */ + p++; + b = *p; + /* b: p1 (unmasked) */ + if (!(b&0x80)) + { + /* Values between 128 and 16383 */ + a &= 0x7f; + a = a<<7; + *v = a | b; + return 2; + } + + /* The 3-byte case */ + p++; + a = a<<14; + a |= *p; + /* a: p0<<14 | p2 (unmasked) */ + if (!(a&0x80)) + { + /* Values between 16384 and 2097151 */ + a &= (0x7f<<14)|(0x7f); + b &= 0x7f; + b = b<<7; + *v = a | b; + return 3; + } + + /* A 32-bit varint is used to store size information in btrees. + ** Objects are rarely larger than 2MiB limit of a 3-byte varint. + ** A 3-byte varint is sufficient, for example, to record the size + ** of a 1048569-byte BLOB or string. + ** + ** We only unroll the first 1-, 2-, and 3- byte cases. The very + ** rare larger cases can be handled by the slower 64-bit varint + ** routine. + */ +#if 1 + { + u64 v64; + u8 n; + + p -= 2; + n = sqlite3BtreeGetVarint(p, &v64); + assert( n>3 && n<=9 ); + if( (v64 & SQLITE_MAX_U32)!=v64 ){ + *v = 0xffffffff; + }else{ + *v = (u32)v64; + } + return n; + } + +#else + /* For following code (kept for historical record only) shows an + ** unrolling for the 3- and 4-byte varint cases. This code is + ** slightly faster, but it is also larger and much harder to test. + */ + p++; + b = b<<14; + b |= *p; + /* b: p1<<14 | p3 (unmasked) */ + if (!(b&0x80)) + { + /* Values between 2097152 and 268435455 */ + b &= (0x7f<<14)|(0x7f); + a &= (0x7f<<14)|(0x7f); + a = a<<7; + *v = a | b; + return 4; + } + + p++; + a = a<<14; + a |= *p; + /* a: p0<<28 | p2<<14 | p4 (unmasked) */ + if (!(a&0x80)) + { + /* Values between 268435456 and 34359738367 */ + a &= SLOT_4_2_0; + b &= SLOT_4_2_0; + b = b<<7; + *v = a | b; + return 5; + } + + /* We can only reach this point when reading a corrupt database + ** file. In that case we are not in any hurry. Use the (relatively + ** slow) general-purpose sqlite3BtreeGetVarint() routine to extract the + ** value. */ + { + u64 v64; + u8 n; + + p -= 4; + n = sqlite3BtreeGetVarint(p, &v64); + assert( n>5 && n<=9 ); + *v = (u32)v64; + return n; + } +#endif +} + +/* +** Return the number of bytes that will be needed to store the given +** 64-bit integer. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeVarintLen(u64 v){ + int i; + for(i=1; (v >>= 7)!=0; i++){ assert( i<9 ); } + return i; +} + + +/* +** Read or write a four-byte big-endian integer value. +*/ +SQLITE_PRIVATE u32 sqlite3Get4byte(const u8 *p){ +#if SQLITE_BYTEORDER==4321 + u32 x; + memcpy(&x,p,4); + return x; +#elif SQLITE_BYTEORDER==1234 && !defined(SQLITE_DISABLE_INTRINSIC) \ + && defined(__GNUC__) && GCC_VERSION>=4003000 + u32 x; + memcpy(&x,p,4); + return __builtin_bswap32(x); +#elif SQLITE_BYTEORDER==1234 && !defined(SQLITE_DISABLE_INTRINSIC) \ + && defined(_MSC_VER) && _MSC_VER>=1300 + u32 x; + memcpy(&x,p,4); + return _byteswap_ulong(x); +#else + testcase( p[0]&0x80 ); + return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3]; +#endif +} +SQLITE_PRIVATE void sqlite3Put4byte(unsigned char *p, u32 v){ +#if SQLITE_BYTEORDER==4321 + memcpy(p,&v,4); +#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) && GCC_VERSION>=4003000 + u32 x = __builtin_bswap32(v); + memcpy(p,&x,4); +#elif SQLITE_BYTEORDER==1234 && defined(_MSC_VER) && _MSC_VER>=1300 + u32 x = _byteswap_ulong(v); + memcpy(p,&x,4); +#else + p[0] = (u8)(v>>24); + p[1] = (u8)(v>>16); + p[2] = (u8)(v>>8); + p[3] = (u8)v; +#endif +} + +/* +** Log an error that is an API call on a connection pointer that should +** not have been used. The "type" of connection pointer is given as the +** argument. The zType is a word like "NULL" or "closed" or "invalid". +*/ +static void logBadConnection(const char *zType){ + sqlite3_log(SQLITE_MISUSE, + "API call with %s database connection pointer", + zType + ); +} + +SQLITE_PRIVATE int sqlite3SafetyCheckSickOrOk(Btree *pBtree){ + u32 magic; + magic = pBtree->magic; + if( magic!=SQLITE_MAGIC_SICK && + magic!=SQLITE_MAGIC_OPEN && + magic!=SQLITE_MAGIC_BUSY ){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + logBadConnection("invalid"); + return 0; + }else{ + return 1; + } +} + +/* +** Compute the absolute value of a 32-bit signed integer, of possible. Or +** if the integer has a value of -2147483648, return +2147483647 +*/ +SQLITE_PRIVATE int sqlite3AbsInt32(int x){ + if( x>=0 ) return x; + if( x==(int)0x80000000 ) return 0x7fffffff; + return -x; +} + +/************** End of util.c ************************************************/ +/************** Begin file os_unix.c *****************************************/ +/* +** 2004 May 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains the VFS implementation for unix-like operating systems +** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others. +** +** There are actually several different VFS implementations in this file. +** The differences are in the way that file locking is done. The default +** implementation uses Posix Advisory Locks. Alternative implementations +** use flock(), dot-files, various proprietary locking schemas, or simply +** skip locking all together. +** +** This source file is organized into divisions where the logic for various +** subfunctions is contained within the appropriate division. PLEASE +** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed +** in the correct division and should be clearly labeled. +** +** The layout of divisions is as follows: +** +** * General-purpose declarations and utility functions. +** * Unique file ID logic used by VxWorks. +** * Various locking primitive implementations (all except proxy locking): +** + for Posix Advisory Locks +** + for no-op locks +** + for dot-file locks +** + for flock() locking +** + for named semaphore locks (VxWorks only) +** + for AFP filesystem locks (MacOSX only) +** * sqlite3_file methods not associated with locking. +** * Definitions of sqlite3_io_methods objects for all locking +** methods plus "finder" functions for each locking method. +** * sqlite3_vfs method implementations. +** * Locking primitives for the proxy uber-locking-method. (MacOSX only) +** * Definitions of sqlite3_vfs objects for all locking methods +** plus implementations of sqlite3_os_init() and sqlite3_os_end(). +*/ +/* #include "sqliteInt.h" */ +#if SQLITE_OS_UNIX /* This file is used on unix only */ + +/* +** There are various methods for file locking used for concurrency +** control: +** +** 1. POSIX locking (the default), +** 2. No locking, +** 3. Dot-file locking, +** 4. flock() locking, +** 5. AFP locking (OSX only), +** 6. Named POSIX semaphores (VXWorks only), +** 7. proxy locking. (OSX only) +** +** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE +** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic +** selection of the appropriate locking style based on the filesystem +** where the database is located. +*/ +#if !defined(SQLITE_ENABLE_LOCKING_STYLE) +# if defined(__APPLE__) +# define SQLITE_ENABLE_LOCKING_STYLE 1 +# else +# define SQLITE_ENABLE_LOCKING_STYLE 0 +# endif +#endif + +/* +** standard include files. +*/ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <sys/time.h> +#include <errno.h> +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 +# include <sys/mman.h> +#endif + +#if SQLITE_ENABLE_LOCKING_STYLE +# include <sys/ioctl.h> +# include <sys/file.h> +# include <sys/param.h> +#endif /* SQLITE_ENABLE_LOCKING_STYLE */ + +#if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \ + (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000)) +# if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \ + && (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0)) +# define HAVE_GETHOSTUUID 1 +# else +# warning "gethostuuid() is disabled." +# endif +#endif + + +#if OS_VXWORKS +/* # include <sys/ioctl.h> */ +# include <semaphore.h> +# include <limits.h> +#endif /* OS_VXWORKS */ + +#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE +# include <sys/mount.h> +#endif + +#ifdef HAVE_UTIME +# include <utime.h> +#endif + +/* +** Allowed values of unixFile.fsFlags +*/ +#define SQLITE_FSFLAGS_IS_MSDOS 0x1 + +/* +** If we are to be thread-safe, include the pthreads header and define +** the SQLITE_UNIX_THREADS macro. +*/ +#if SQLITE_THREADSAFE +/* # include <pthread.h> */ +# define SQLITE_UNIX_THREADS 1 +#endif + +/* +** Default permissions when creating a new file +*/ +#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS +# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644 +#endif + +/* +** Default permissions when creating auto proxy dir +*/ +#ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS +# define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 +#endif + +/* +** Maximum supported path-length. +*/ +#define MAX_PATHNAME 512 + +/* Always cast the getpid() return type for compatibility with +** kernel modules in VxWorks. */ +#define osGetpid(X) (pid_t)getpid() + +/* +** Only set the lastErrno if the error code is a real error and not +** a normal expected return code of SQLITE_BUSY or SQLITE_OK +*/ +#define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) + +/* Forward references */ +typedef struct unixShm unixShm; /* Connection shared memory */ +typedef struct unixShmNode unixShmNode; /* Shared memory instance */ +typedef struct unixInodeInfo unixInodeInfo; /* An i-node */ +typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */ + +/* +** Sometimes, after a file handle is closed by SQLite, the file descriptor +** cannot be closed immediately. In these cases, instances of the following +** structure are used to store the file descriptor while waiting for an +** opportunity to either close or reuse it. +*/ +struct UnixUnusedFd { + int fd; /* File descriptor to close */ + int flags; /* Flags this file descriptor was opened with */ + UnixUnusedFd *pNext; /* Next unused file descriptor on same file */ +}; + +/* +** The unixFile structure is subclass of sqlite3_file specific to the unix +** VFS implementations. +*/ +typedef struct unixFile unixFile; +struct unixFile { + sqlite3_io_methods const *pMethod; /* Always the first entry */ + sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ + unixInodeInfo *pInode; /* Info about locks on this inode */ + int h; /* The file descriptor */ + unsigned char eFileLock; /* The type of lock held on this fd */ + unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ + int lastErrno; /* The unix errno from last I/O error */ + void *lockingContext; /* Locking style specific state */ + UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ + const char *zPath; /* Name of the file */ + unixShm *pShm; /* Shared memory segment information */ + int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ +#if SQLITE_MAX_MMAP_SIZE>0 + int nFetchOut; /* Number of outstanding xFetch refs */ + sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ + sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ + sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ + void *pMapRegion; /* Memory mapped region */ +#endif +#ifdef __QNXNTO__ + int sectorSize; /* Device sector size */ + int deviceCharacteristics; /* Precomputed device characteristics */ +#endif +#if SQLITE_ENABLE_LOCKING_STYLE + int openFlags; /* The flags specified at open() */ +#endif +#if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) + unsigned fsFlags; /* cached details from statfs() */ +#endif +#if OS_VXWORKS + struct vxworksFileId *pId; /* Unique file ID */ +#endif +#ifdef SQLITE_DEBUG + /* The next group of variables are used to track whether or not the + ** transaction counter in bytes 24-27 of database files are updated + ** whenever any part of the database changes. An assertion fault will + ** occur if a file is updated without also updating the transaction + ** counter. This test is made to avoid new problems similar to the + ** one described by ticket #3584. + */ + unsigned char transCntrChng; /* True if the transaction counter changed */ + unsigned char dbUpdate; /* True if any part of database file changed */ + unsigned char inNormalWrite; /* True if in a normal write operation */ + +#endif + +#ifdef SQLITE_TEST + /* In test mode, increase the size of this structure a bit so that + ** it is larger than the struct CrashFile defined in test6.c. + */ + char aPadding[32]; +#endif +}; + +/* This variable holds the process id (pid) from when the xRandomness() +** method was called. If xOpen() is called from a different process id, +** indicating that a fork() has occurred, the PRNG will be reset. +*/ +static pid_t randomnessPid = 0; + +/* +** Allowed values for the unixFile.ctrlFlags bitmask: +*/ +#define UNIXFILE_EXCL 0x01 /* Connections from one process only */ +#define UNIXFILE_RDONLY 0x02 /* Connection is read only */ +#define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ +#ifndef SQLITE_DISABLE_DIRSYNC +# define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ +#else +# define UNIXFILE_DIRSYNC 0x00 +#endif +#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ +#define UNIXFILE_DELETE 0x20 /* Delete on close */ +#define UNIXFILE_URI 0x40 /* Filename might have query parameters */ +#define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ +#define UNIXFILE_WARNED 0x0100 /* verifyDbFile() warnings issued */ +#define UNIXFILE_BLOCK 0x0200 /* Next SHM lock might block */ + +/* +** Include code that is common to all os_*.c files +*/ +/************** Include os_common.h in the middle of os_unix.c ***************/ +/************** Begin file os_common.h ***************************************/ +/* +** 2004 May 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains macros and a little bit of code that is common to +** all of the platform-specific files (os_*.c) and is #included into those +** files. +** +** This file should be #included by the os_*.c files only. It is not a +** general purpose header file. +*/ +#ifndef _OS_COMMON_H_ +#define _OS_COMMON_H_ + +/* +** At least two bugs have slipped in because we changed the MEMORY_DEBUG +** macro to SQLITE_DEBUG and some older makefiles have not yet made the +** switch. The following code should catch this problem at compile-time. +*/ +#ifdef MEMORY_DEBUG +# error "The MEMORY_DEBUG macro is obsolete. Use SQLITE_DEBUG instead." +#endif + +/* +** Macros for performance tracing. Normally turned off. Only works +** on i486 hardware. +*/ +#ifdef SQLITE_PERFORMANCE_TRACE + +/* +** hwtime.h contains inline assembler code for implementing +** high-performance timing routines. +*/ +/* #include "hwtime.h" */ + +static sqlite_uint64 g_start; +static sqlite_uint64 g_elapsed; +#define TIMER_START g_start=sqlite3Hwtime() +#define TIMER_END g_elapsed=sqlite3Hwtime()-g_start +#define TIMER_ELAPSED g_elapsed +#else +#define TIMER_START +#define TIMER_END +#define TIMER_ELAPSED ((sqlite_uint64)0) +#endif + +/* +** If we compile with the SQLITE_TEST macro set, then the following block +** of code will give us the ability to simulate a disk I/O error. This +** is used for testing the I/O recovery logic. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_io_error_hit = 0; /* Total number of I/O Errors */ +SQLITE_PRIVATE int sqlite3_io_error_hardhit = 0; /* Number of non-benign errors */ +SQLITE_PRIVATE int sqlite3_io_error_pending = 0; /* Count down to first I/O error */ +SQLITE_PRIVATE int sqlite3_io_error_persist = 0; /* True if I/O errors persist */ +SQLITE_PRIVATE int sqlite3_io_error_benign = 0; /* True if errors are benign */ +SQLITE_PRIVATE int sqlite3_diskfull_pending = 0; +SQLITE_PRIVATE int sqlite3_diskfull = 0; +#define SimulateIOErrorBenign(X) sqlite3_io_error_benign=(X) +#define SimulateIOError(CODE) \ + if( (sqlite3_io_error_persist && sqlite3_io_error_hit) \ + || sqlite3_io_error_pending-- == 1 ) \ + { local_ioerr(); CODE; } +static void local_ioerr(){ + IOTRACE(("IOERR\n")); + sqlite3_io_error_hit++; + if( !sqlite3_io_error_benign ) sqlite3_io_error_hardhit++; +} +#define SimulateDiskfullError(CODE) \ + if( sqlite3_diskfull_pending ){ \ + if( sqlite3_diskfull_pending == 1 ){ \ + local_ioerr(); \ + sqlite3_diskfull = 1; \ + sqlite3_io_error_hit = 1; \ + CODE; \ + }else{ \ + sqlite3_diskfull_pending--; \ + } \ + } +#else +#define SimulateIOErrorBenign(X) +#define SimulateIOError(A) +#define SimulateDiskfullError(A) +#endif + +/* +** When testing, keep a count of the number of open files. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_open_file_count = 0; +#define OpenCounter(X) sqlite3_open_file_count+=(X) +#else +#define OpenCounter(X) +#endif + +#endif /* !defined(_OS_COMMON_H_) */ + +/************** End of os_common.h *******************************************/ +/************** Continuing where we left off in os_unix.c ********************/ + +/* +** Define various macros that are missing from some systems. +*/ +#ifndef O_LARGEFILE +# define O_LARGEFILE 0 +#endif +#ifdef SQLITE_DISABLE_LFS +# undef O_LARGEFILE +# define O_LARGEFILE 0 +#endif +#ifndef O_NOFOLLOW +# define O_NOFOLLOW 0 +#endif +#ifndef O_BINARY +# define O_BINARY 0 +#endif + +/* +** The threadid macro resolves to the thread-id or to 0. Used for +** testing and debugging only. +*/ +#if SQLITE_THREADSAFE +#define threadid pthread_self() +#else +#define threadid 0 +#endif + +/* +** HAVE_MREMAP defaults to true on Linux and false everywhere else. +*/ +#if !defined(HAVE_MREMAP) +# if defined(__linux__) && defined(_GNU_SOURCE) +# define HAVE_MREMAP 1 +# else +# define HAVE_MREMAP 0 +# endif +#endif + +/* +** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek() +** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined. +*/ +#ifdef __ANDROID__ +# define lseek lseek64 +#endif + +/* +** Different Unix systems declare open() in different ways. Same use +** open(const char*,int,mode_t). Others use open(const char*,int,...). +** The difference is important when using a pointer to the function. +** +** The safest way to deal with the problem is to always use this wrapper +** which always has the same well-defined interface. +*/ +static int posixOpen(const char *zFile, int flags, int mode){ + return open(zFile, flags, mode); +} + +/* +** On some systems, calls to fchown() will trigger a message in a security +** log if they come from non-root processes. So avoid calling fchown() if +** we are not running as root. +*/ +static int posixFchown(int fd, uid_t uid, gid_t gid){ +#if OS_VXWORKS + return 0; +#else + return geteuid() ? 0 : fchown(fd,uid,gid); +#endif +} + +/* Forward reference */ +static int openDirectory(const char*, int*); +static int unixGetpagesize(void); + +/* +** Many system calls are accessed through pointer-to-functions so that +** they may be overridden at runtime to facilitate fault injection during +** testing and sandboxing. The following array holds the names and pointers +** to all overrideable system calls. +*/ +static struct unix_syscall { + const char *zName; /* Name of the system call */ + sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ + sqlite3_syscall_ptr pDefault; /* Default value */ +} aSyscall[] = { + { "open", (sqlite3_syscall_ptr)posixOpen, 0 }, +#define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) + + { "close", (sqlite3_syscall_ptr)close, 0 }, +#define osClose ((int(*)(int))aSyscall[1].pCurrent) + + { "access", (sqlite3_syscall_ptr)access, 0 }, +#define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent) + + { "getcwd", (sqlite3_syscall_ptr)getcwd, 0 }, +#define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent) + + { "stat", (sqlite3_syscall_ptr)stat, 0 }, +#define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent) + +/* +** The DJGPP compiler environment looks mostly like Unix, but it +** lacks the fcntl() system call. So redefine fcntl() to be something +** that always succeeds. This means that locking does not occur under +** DJGPP. But it is DOS - what did you expect? +*/ +#ifdef __DJGPP__ + { "fstat", 0, 0 }, +#define osFstat(a,b,c) 0 +#else + { "fstat", (sqlite3_syscall_ptr)fstat, 0 }, +#define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent) +#endif + + { "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 }, +#define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent) + + { "fcntl", (sqlite3_syscall_ptr)fcntl, 0 }, +#define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent) + + { "read", (sqlite3_syscall_ptr)read, 0 }, +#define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) + +#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE + { "pread", (sqlite3_syscall_ptr)pread, 0 }, +#else + { "pread", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent) + +#if defined(USE_PREAD64) + { "pread64", (sqlite3_syscall_ptr)pread64, 0 }, +#else + { "pread64", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osPread64 ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[10].pCurrent) + + { "write", (sqlite3_syscall_ptr)write, 0 }, +#define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) + +#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE + { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 }, +#else + { "pwrite", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\ + aSyscall[12].pCurrent) + +#if defined(USE_PREAD64) + { "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 }, +#else + { "pwrite64", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off_t))\ + aSyscall[13].pCurrent) + + { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 }, +#define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) + +#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE + { "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 }, +#else + { "fallocate", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) + + { "unlink", (sqlite3_syscall_ptr)unlink, 0 }, +#define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) + + { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, +#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) + + { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, +#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) + + { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, +#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) + + { "fchown", (sqlite3_syscall_ptr)posixFchown, 0 }, +#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) + +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 + { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, +#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent) + + { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, +#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent) + +#if HAVE_MREMAP + { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, +#else + { "mremap", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent) + { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, +#define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent) + +#endif + +}; /* End of the overrideable system calls */ + +/* +** This is the xSetSystemCall() method of sqlite3_vfs for all of the +** "unix" VFSes. Return SQLITE_OK opon successfully updating the +** system call pointer, or SQLITE_NOTFOUND if there is no configurable +** system call named zName. +*/ +static int unixSetSystemCall( + sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ + const char *zName, /* Name of system call to override */ + sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ +){ + unsigned int i; + int rc = SQLITE_NOTFOUND; + + UNUSED_PARAMETER(pNotUsed); + if( zName==0 ){ + /* If no zName is given, restore all system calls to their default + ** settings and return NULL + */ + rc = SQLITE_OK; + for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ + if( aSyscall[i].pDefault ){ + aSyscall[i].pCurrent = aSyscall[i].pDefault; + } + } + }else{ + /* If zName is specified, operate on only the one system call + ** specified. + */ + for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ + if( strcmp(zName, aSyscall[i].zName)==0 ){ + if( aSyscall[i].pDefault==0 ){ + aSyscall[i].pDefault = aSyscall[i].pCurrent; + } + rc = SQLITE_OK; + if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault; + aSyscall[i].pCurrent = pNewFunc; + break; + } + } + } + return rc; +} + +/* +** Return the value of a system call. Return NULL if zName is not a +** recognized system call name. NULL is also returned if the system call +** is currently undefined. +*/ +static sqlite3_syscall_ptr unixGetSystemCall( + sqlite3_vfs *pNotUsed, + const char *zName +){ + unsigned int i; + + UNUSED_PARAMETER(pNotUsed); + for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ + if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent; + } + return 0; +} + +/* +** Return the name of the first system call after zName. If zName==NULL +** then return the name of the first system call. Return NULL if zName +** is the last system call or if zName is not the name of a valid +** system call. +*/ +static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ + int i = -1; + + UNUSED_PARAMETER(p); + if( zName ){ + for(i=0; i<ArraySize(aSyscall)-1; i++){ + if( strcmp(zName, aSyscall[i].zName)==0 ) break; + } + } + for(i++; i<ArraySize(aSyscall); i++){ + if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName; + } + return 0; +} + +/* +** Do not accept any file descriptor less than this value, in order to avoid +** opening database file using file descriptors that are commonly used for +** standard input, output, and error. +*/ +#ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR +# define SQLITE_MINIMUM_FILE_DESCRIPTOR 3 +#endif + +/* +** Invoke open(). Do so multiple times, until it either succeeds or +** fails for some reason other than EINTR. +** +** If the file creation mode "m" is 0 then set it to the default for +** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally +** 0644) as modified by the system umask. If m is not 0, then +** make the file creation mode be exactly m ignoring the umask. +** +** The m parameter will be non-zero only when creating -wal, -journal, +** and -shm files. We want those files to have *exactly* the same +** permissions as their original database, unadulterated by the umask. +** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a +** transaction crashes and leaves behind hot journals, then any +** process that is able to write to the database will also be able to +** recover the hot journals. +*/ +static int robust_open(const char *z, int f, mode_t m){ + int fd; + mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS; + while(1){ +#if defined(O_CLOEXEC) + fd = osOpen(z,f|O_CLOEXEC,m2); +#else + fd = osOpen(z,f,m2); +#endif + if( fd<0 ){ + if( errno==EINTR ) continue; + break; + } + if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break; + osClose(fd); +// sqlite3_log(SQLITE_WARNING, +// "attempt to open \"%s\" as file descriptor %d", z, fd); + fd = -1; + if( osOpen("/dev/null", f, m)<0 ) break; + } + if( fd>=0 ){ + if( m!=0 ){ + struct stat statbuf; + if( osFstat(fd, &statbuf)==0 + && statbuf.st_size==0 + && (statbuf.st_mode&0777)!=m + ){ + osFchmod(fd, m); + } + } +#if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) + osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); +#endif + } + return fd; +} + +/* +** Helper functions to obtain and relinquish the global mutex. The +** global mutex is used to protect the unixInodeInfo and +** vxworksFileId objects used by this file, all of which may be +** shared by multiple threads. +** +** Function unixMutexHeld() is used to assert() that the global mutex +** is held when required. This function is only used as part of assert() +** statements. e.g. +** +** unixEnterMutex() +** assert( unixMutexHeld() ); +** unixEnterLeave() +*/ +static void unixEnterMutex(void){ + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); +} +static void unixLeaveMutex(void){ + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); +} + +#ifndef NDEBUG +static int unixMutexHeld(void) { + return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); +} +#endif + + + +#ifdef SQLITE_HAVE_OS_TRACE +/* +** Helper function for printing out trace information from debugging +** binaries. This returns the string representation of the supplied +** integer lock-type. +*/ +static const char *azFileLock(int eFileLock){ + switch( eFileLock ){ + case NO_LOCK: return "NONE"; + case SHARED_LOCK: return "SHARED"; + case RESERVED_LOCK: return "RESERVED"; + case PENDING_LOCK: return "PENDING"; + case EXCLUSIVE_LOCK: return "EXCLUSIVE"; + } + return "ERROR"; +} +#endif + +#ifdef SQLITE_LOCK_TRACE +/* +** Print out information about all locking operations. +** +** This routine is used for troubleshooting locks on multithreaded +** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE +** command-line option on the compiler. This code is normally +** turned off. +*/ +static int lockTrace(int fd, int op, struct flock *p){ + char *zOpName, *zType; + int s; + int savedErrno; + if( op==F_GETLK ){ + zOpName = "GETLK"; + }else if( op==F_SETLK ){ + zOpName = "SETLK"; + }else{ + s = osFcntl(fd, op, p); + sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s); + return s; + } + if( p->l_type==F_RDLCK ){ + zType = "RDLCK"; + }else if( p->l_type==F_WRLCK ){ + zType = "WRLCK"; + }else if( p->l_type==F_UNLCK ){ + zType = "UNLCK"; + }else{ + assert( 0 ); + } + assert( p->l_whence==SEEK_SET ); + s = osFcntl(fd, op, p); + savedErrno = errno; + sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n", + threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, + (int)p->l_pid, s); + if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ + struct flock l2; + l2 = *p; + osFcntl(fd, F_GETLK, &l2); + if( l2.l_type==F_RDLCK ){ + zType = "RDLCK"; + }else if( l2.l_type==F_WRLCK ){ + zType = "WRLCK"; + }else if( l2.l_type==F_UNLCK ){ + zType = "UNLCK"; + }else{ + assert( 0 ); + } + sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n", + zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid); + } + errno = savedErrno; + return s; +} +#undef osFcntl +#define osFcntl lockTrace +#endif /* SQLITE_LOCK_TRACE */ + +/* +** Retry ftruncate() calls that fail due to EINTR +** +** All calls to ftruncate() within this file should be made through +** this wrapper. On the Android platform, bypassing the logic below +** could lead to a corrupt database. +*/ +static int robust_ftruncate(int h, sqlite3_int64 sz){ + int rc; +#ifdef __ANDROID__ + /* On Android, ftruncate() always uses 32-bit offsets, even if + ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to + ** truncate a file to any size larger than 2GiB. Silently ignore any + ** such attempts. */ + if( sz>(sqlite3_int64)0x7FFFFFFF ){ + rc = SQLITE_OK; + }else +#endif + do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR ); + return rc; +} + +/* +** This routine translates a standard POSIX errno code into something +** useful to the clients of the sqlite3 functions. Specifically, it is +** intended to translate a variety of "try again" errors into SQLITE_BUSY +** and a variety of "please close the file descriptor NOW" errors into +** SQLITE_IOERR +** +** Errors during initialization of locks, or file system support for locks, +** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. +*/ +static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { + switch (posixError) { +#if 0 + /* At one point this code was not commented out. In theory, this branch + ** should never be hit, as this function should only be called after + ** a locking-related function (i.e. fcntl()) has returned non-zero with + ** the value of errno as the first argument. Since a system call has failed, + ** errno should be non-zero. + ** + ** Despite this, if errno really is zero, we still don't want to return + ** SQLITE_OK. The system call failed, and *some* SQLite error should be + ** propagated back to the caller. Commenting this branch out means errno==0 + ** will be handled by the "default:" case below. + */ + case 0: + return SQLITE_OK; +#endif + + case EAGAIN: + case ETIMEDOUT: + case EBUSY: + case EINTR: + case ENOLCK: + /* random NFS retry error, unless during file system support + * introspection, in which it actually means what it says */ + return SQLITE_BUSY; + + case EACCES: + /* EACCES is like EAGAIN during locking operations, but not any other time*/ + if( (sqliteIOErr == SQLITE_IOERR_LOCK) || + (sqliteIOErr == SQLITE_IOERR_UNLOCK) || + (sqliteIOErr == SQLITE_IOERR_RDLOCK) || + (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){ + return SQLITE_BUSY; + } + /* else fall through */ + case EPERM: + return SQLITE_PERM; + +#if EOPNOTSUPP!=ENOTSUP + case EOPNOTSUPP: + /* something went terribly awry, unless during file system support + * introspection, in which it actually means what it says */ +#endif +#ifdef ENOTSUP + case ENOTSUP: + /* invalid fd, unless during file system support introspection, in which + * it actually means what it says */ +#endif + case EIO: + case EBADF: + case EINVAL: + case ENOTCONN: + case ENODEV: + case ENXIO: + case ENOENT: +#ifdef ESTALE /* ESTALE is not defined on Interix systems */ + case ESTALE: +#endif + case ENOSYS: + /* these should force the client to close the file and reconnect */ + + default: + return sqliteIOErr; + } +} + + +/****************************************************************************** +****************** Begin Unique File ID Utility Used By VxWorks *************** +** +** On most versions of unix, we can get a unique ID for a file by concatenating +** the device number and the inode number. But this does not work on VxWorks. +** On VxWorks, a unique file id must be based on the canonical filename. +** +** A pointer to an instance of the following structure can be used as a +** unique file ID in VxWorks. Each instance of this structure contains +** a copy of the canonical filename. There is also a reference count. +** The structure is reclaimed when the number of pointers to it drops to +** zero. +** +** There are never very many files open at one time and lookups are not +** a performance-critical path, so it is sufficient to put these +** structures on a linked list. +*/ +struct vxworksFileId { + struct vxworksFileId *pNext; /* Next in a list of them all */ + int nRef; /* Number of references to this one */ + int nName; /* Length of the zCanonicalName[] string */ + char *zCanonicalName; /* Canonical filename */ +}; + +#if OS_VXWORKS +/* +** All unique filenames are held on a linked list headed by this +** variable: +*/ +static struct vxworksFileId *vxworksFileList = 0; + +/* +** Simplify a filename into its canonical form +** by making the following changes: +** +** * removing any trailing and duplicate / +** * convert /./ into just / +** * convert /A/../ where A is any simple name into just / +** +** Changes are made in-place. Return the new name length. +** +** The original filename is in z[0..n-1]. Return the number of +** characters in the simplified name. +*/ +static int vxworksSimplifyName(char *z, int n){ + int i, j; + while( n>1 && z[n-1]=='/' ){ n--; } + for(i=j=0; i<n; i++){ + if( z[i]=='/' ){ + if( z[i+1]=='/' ) continue; + if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){ + i += 1; + continue; + } + if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){ + while( j>0 && z[j-1]!='/' ){ j--; } + if( j>0 ){ j--; } + i += 2; + continue; + } + } + z[j++] = z[i]; + } + z[j] = 0; + return j; +} + +/* +** Find a unique file ID for the given absolute pathname. Return +** a pointer to the vxworksFileId object. This pointer is the unique +** file ID. +** +** The nRef field of the vxworksFileId object is incremented before +** the object is returned. A new vxworksFileId object is created +** and added to the global list if necessary. +** +** If a memory allocation error occurs, return NULL. +*/ +static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){ + struct vxworksFileId *pNew; /* search key and new file ID */ + struct vxworksFileId *pCandidate; /* For looping over existing file IDs */ + int n; /* Length of zAbsoluteName string */ + + assert( zAbsoluteName[0]=='/' ); + n = (int)strlen(zAbsoluteName); + pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) ); + if( pNew==0 ) return 0; + pNew->zCanonicalName = (char*)&pNew[1]; + memcpy(pNew->zCanonicalName, zAbsoluteName, n+1); + n = vxworksSimplifyName(pNew->zCanonicalName, n); + + /* Search for an existing entry that matching the canonical name. + ** If found, increment the reference count and return a pointer to + ** the existing file ID. + */ + unixEnterMutex(); + for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){ + if( pCandidate->nName==n + && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0 + ){ + sqlite3_free(pNew); + pCandidate->nRef++; + unixLeaveMutex(); + return pCandidate; + } + } + + /* No match was found. We will make a new file ID */ + pNew->nRef = 1; + pNew->nName = n; + pNew->pNext = vxworksFileList; + vxworksFileList = pNew; + unixLeaveMutex(); + return pNew; +} + +/* +** Decrement the reference count on a vxworksFileId object. Free +** the object when the reference count reaches zero. +*/ +static void vxworksReleaseFileId(struct vxworksFileId *pId){ + unixEnterMutex(); + assert( pId->nRef>0 ); + pId->nRef--; + if( pId->nRef==0 ){ + struct vxworksFileId **pp; + for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){} + assert( *pp==pId ); + *pp = pId->pNext; + sqlite3_free(pId); + } + unixLeaveMutex(); +} +#endif /* OS_VXWORKS */ +/*************** End of Unique File ID Utility Used By VxWorks **************** +******************************************************************************/ + + +/****************************************************************************** +*************************** Posix Advisory Locking **************************** +** +** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) +** section 6.5.2.2 lines 483 through 490 specify that when a process +** sets or clears a lock, that operation overrides any prior locks set +** by the same process. It does not explicitly say so, but this implies +** that it overrides locks set by the same process using a different +** file descriptor. Consider this test case: +** +** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); +** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); +** +** Suppose ./file1 and ./file2 are really the same file (because +** one is a hard or symbolic link to the other) then if you set +** an exclusive lock on fd1, then try to get an exclusive lock +** on fd2, it works. I would have expected the second lock to +** fail since there was already a lock on the file due to fd1. +** But not so. Since both locks came from the same process, the +** second overrides the first, even though they were on different +** file descriptors opened on different file names. +** +** This means that we cannot use POSIX locks to synchronize file access +** among competing threads of the same process. POSIX locks will work fine +** to synchronize access for threads in separate processes, but not +** threads within the same process. +** +** To work around the problem, SQLite has to manage file locks internally +** on its own. Whenever a new database is opened, we have to find the +** specific inode of the database file (the inode is determined by the +** st_dev and st_ino fields of the stat structure that fstat() fills in) +** and check for locks already existing on that inode. When locks are +** created or removed, we have to look at our own internal record of the +** locks to see if another thread has previously set a lock on that same +** inode. +** +** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. +** For VxWorks, we have to use the alternative unique ID system based on +** canonical filename and implemented in the previous division.) +** +** The sqlite3_file structure for POSIX is no longer just an integer file +** descriptor. It is now a structure that holds the integer file +** descriptor and a pointer to a structure that describes the internal +** locks on the corresponding inode. There is one locking structure +** per inode, so if the same inode is opened twice, both unixFile structures +** point to the same locking structure. The locking structure keeps +** a reference count (so we will know when to delete it) and a "cnt" +** field that tells us its internal lock status. cnt==0 means the +** file is unlocked. cnt==-1 means the file has an exclusive lock. +** cnt>0 means there are cnt shared locks on the file. +** +** Any attempt to lock or unlock a file first checks the locking +** structure. The fcntl() system call is only invoked to set a +** POSIX lock if the internal lock structure transitions between +** a locked and an unlocked state. +** +** But wait: there are yet more problems with POSIX advisory locks. +** +** If you close a file descriptor that points to a file that has locks, +** all locks on that file that are owned by the current process are +** released. To work around this problem, each unixInodeInfo object +** maintains a count of the number of pending locks on tha inode. +** When an attempt is made to close an unixFile, if there are +** other unixFile open on the same inode that are holding locks, the call +** to close() the file descriptor is deferred until all of the locks clear. +** The unixInodeInfo structure keeps a list of file descriptors that need to +** be closed and that list is walked (and cleared) when the last lock +** clears. +** +** Yet another problem: LinuxThreads do not play well with posix locks. +** +** Many older versions of linux use the LinuxThreads library which is +** not posix compliant. Under LinuxThreads, a lock created by thread +** A cannot be modified or overridden by a different thread B. +** Only thread A can modify the lock. Locking behavior is correct +** if the appliation uses the newer Native Posix Thread Library (NPTL) +** on linux - with NPTL a lock created by thread A can override locks +** in thread B. But there is no way to know at compile-time which +** threading library is being used. So there is no way to know at +** compile-time whether or not thread A can override locks on thread B. +** One has to do a run-time check to discover the behavior of the +** current process. +** +** SQLite used to support LinuxThreads. But support for LinuxThreads +** was dropped beginning with version 3.7.0. SQLite will still work with +** LinuxThreads provided that (1) there is no more than one connection +** per database file in the same process and (2) database connections +** do not move across threads. +*/ + +/* +** An instance of the following structure serves as the key used +** to locate a particular unixInodeInfo object. +*/ +struct unixFileId { + dev_t dev; /* Device number */ +#if OS_VXWORKS + struct vxworksFileId *pId; /* Unique file ID for vxworks. */ +#else + ino_t ino; /* Inode number */ +#endif +}; + +/* +** An instance of the following structure is allocated for each open +** inode. Or, on LinuxThreads, there is one of these structures for +** each inode opened by each thread. +** +** A single inode can have multiple file descriptors, so each unixFile +** structure contains a pointer to an instance of this object and this +** object keeps a count of the number of unixFile pointing to it. +*/ +struct unixInodeInfo { + struct unixFileId fileId; /* The lookup key */ + int nShared; /* Number of SHARED locks held */ + unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ + unsigned char bProcessLock; /* An exclusive process lock is held */ + int nRef; /* Number of pointers to this structure */ + unixShmNode *pShmNode; /* Shared memory associated with this inode */ + int nLock; /* Number of outstanding file locks */ + UnixUnusedFd *pUnused; /* Unused file descriptors to close */ + unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ + unixInodeInfo *pPrev; /* .... doubly linked */ +#if SQLITE_ENABLE_LOCKING_STYLE + unsigned long long sharedByte; /* for AFP simulated shared lock */ +#endif +#if OS_VXWORKS + sem_t *pSem; /* Named POSIX semaphore */ + char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ +#endif +}; + +/* +** A lists of all unixInodeInfo objects. +*/ +static unixInodeInfo *inodeList = 0; + +/* +** +** This function - unixLogError_x(), is only ever called via the macro +** unixLogError(). +** +** It is invoked after an error occurs in an OS function and errno has been +** set. It logs a message using sqlite3_log() containing the current value of +** errno and, if possible, the human-readable equivalent from strerror() or +** strerror_r(). +** +** The first argument passed to the macro should be the error code that +** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). +** The two subsequent arguments should be the name of the OS function that +** failed (e.g. "unlink", "open") and the associated file-system path, +** if any. +*/ +#define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) +static int unixLogErrorAtLine( + int errcode, /* SQLite error code */ + const char *zFunc, /* Name of OS function that failed */ + const char *zPath, /* File path associated with error */ + int iLine /* Source line number where error occurred */ +){ + char *zErr; /* Message from strerror() or equivalent */ + int iErrno = errno; /* Saved syscall error number */ + + /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use + ** the strerror() function to obtain the human-readable error message + ** equivalent to errno. Otherwise, use strerror_r(). + */ +#if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) + char aErr[80]; + memset(aErr, 0, sizeof(aErr)); + zErr = aErr; + + /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, + ** assume that the system provides the GNU version of strerror_r() that + ** returns a pointer to a buffer containing the error message. That pointer + ** may point to aErr[], or it may point to some static storage somewhere. + ** Otherwise, assume that the system provides the POSIX version of + ** strerror_r(), which always writes an error message into aErr[]. + ** + ** If the code incorrectly assumes that it is the POSIX version that is + ** available, the error message will often be an empty string. Not a + ** huge problem. Incorrectly concluding that the GNU version is available + ** could lead to a segfault though. + */ +#if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) + zErr = +# endif + strerror_r(iErrno, aErr, sizeof(aErr)-1); + +#elif SQLITE_THREADSAFE + /* This is a threadsafe build, but strerror_r() is not available. */ + zErr = ""; +#else + /* Non-threadsafe build, use strerror(). */ + zErr = strerror(iErrno); +#endif + + if( zPath==0 ) zPath = ""; + sqlite3_log(errcode, + "os_unix.c:%d: (%d) %s(%s) - %s", + iLine, iErrno, zFunc, zPath, zErr + ); + + return errcode; +} + +/* +** Close a file descriptor. +** +** We assume that close() almost always works, since it is only in a +** very sick application or on a very sick platform that it might fail. +** If it does fail, simply leak the file descriptor, but do log the +** error. +** +** Note that it is not safe to retry close() after EINTR since the +** file descriptor might have already been reused by another thread. +** So we don't even try to recover from an EINTR. Just log the error +** and move on. +*/ +static void robust_close(unixFile *pFile, int h, int lineno){ + if( osClose(h) ){ + unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close", + pFile ? pFile->zPath : 0, lineno); + } +} + +/* +** Set the pFile->lastErrno. Do this in a subroutine as that provides +** a convenient place to set a breakpoint. +*/ +static void storeLastErrno(unixFile *pFile, int error){ + pFile->lastErrno = error; +} + +/* +** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. +*/ +static void closePendingFds(unixFile *pFile){ + unixInodeInfo *pInode = pFile->pInode; + UnixUnusedFd *p; + UnixUnusedFd *pNext; + for(p=pInode->pUnused; p; p=pNext){ + pNext = p->pNext; + robust_close(pFile, p->fd, __LINE__); + sqlite3_free(p); + } + pInode->pUnused = 0; +} + +/* +** Release a unixInodeInfo structure previously allocated by findInodeInfo(). +** +** The mutex entered using the unixEnterMutex() function must be held +** when this function is called. +*/ +static void releaseInodeInfo(unixFile *pFile){ + unixInodeInfo *pInode = pFile->pInode; + assert( unixMutexHeld() ); + if( ALWAYS(pInode) ){ + pInode->nRef--; + if( pInode->nRef==0 ){ + assert( pInode->pShmNode==0 ); + closePendingFds(pFile); + if( pInode->pPrev ){ + assert( pInode->pPrev->pNext==pInode ); + pInode->pPrev->pNext = pInode->pNext; + }else{ + assert( inodeList==pInode ); + inodeList = pInode->pNext; + } + if( pInode->pNext ){ + assert( pInode->pNext->pPrev==pInode ); + pInode->pNext->pPrev = pInode->pPrev; + } + sqlite3_free(pInode); + } + } +} + +/* +** Given a file descriptor, locate the unixInodeInfo object that +** describes that file descriptor. Create a new one if necessary. The +** return value might be uninitialized if an error occurs. +** +** The mutex entered using the unixEnterMutex() function must be held +** when this function is called. +** +** Return an appropriate error code. +*/ +static int findInodeInfo( + unixFile *pFile, /* Unix file with file desc used in the key */ + unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ +){ + int rc; /* System call return code */ + int fd; /* The file descriptor for pFile */ + struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ + struct stat statbuf; /* Low-level file information */ + unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ + + assert( unixMutexHeld() ); + + /* Get low-level information about the file that we can used to + ** create a unique name for the file. + */ + fd = pFile->h; + rc = osFstat(fd, &statbuf); + if( rc!=0 ){ + storeLastErrno(pFile, errno); +#ifdef EOVERFLOW + if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS; +#endif + return SQLITE_IOERR; + } + +#ifdef __APPLE__ + /* On OS X on an msdos filesystem, the inode number is reported + ** incorrectly for zero-size files. See ticket #3260. To work + ** around this problem (we consider it a bug in OS X, not SQLite) + ** we always increase the file size to 1 by writing a single byte + ** prior to accessing the inode number. The one byte written is + ** an ASCII 'S' character which also happens to be the first byte + ** in the header of every SQLite database. In this way, if there + ** is a race condition such that another thread has already populated + ** the first page of the database, no damage is done. + */ + if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ + do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR ); + if( rc!=1 ){ + storeLastErrno(pFile, errno); + return SQLITE_IOERR; + } + rc = osFstat(fd, &statbuf); + if( rc!=0 ){ + storeLastErrno(pFile, errno); + return SQLITE_IOERR; + } + } +#endif + + memset(&fileId, 0, sizeof(fileId)); + fileId.dev = statbuf.st_dev; +#if OS_VXWORKS + fileId.pId = pFile->pId; +#else + fileId.ino = statbuf.st_ino; +#endif + pInode = inodeList; + while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){ + pInode = pInode->pNext; + } + if( pInode==0 ){ + pInode = sqlite3_malloc64( sizeof(*pInode) ); + if( pInode==0 ){ + return SQLITE_NOMEM; + } + memset(pInode, 0, sizeof(*pInode)); + memcpy(&pInode->fileId, &fileId, sizeof(fileId)); + pInode->nRef = 1; + pInode->pNext = inodeList; + pInode->pPrev = 0; + if( inodeList ) inodeList->pPrev = pInode; + inodeList = pInode; + }else{ + pInode->nRef++; + } + *ppInode = pInode; + return SQLITE_OK; +} + +/* +** Return TRUE if pFile has been renamed or unlinked since it was first opened. +*/ +static int fileHasMoved(unixFile *pFile){ +#if OS_VXWORKS + return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId; +#else + struct stat buf; + return pFile->pInode!=0 && + (osStat(pFile->zPath, &buf)!=0 || buf.st_ino!=pFile->pInode->fileId.ino); +#endif +} + + +/* +** Check a unixFile that is a database. Verify the following: +** +** (1) There is exactly one hard link on the file +** (2) The file is not a symbolic link +** (3) The file has not been renamed or unlinked +** +** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. +*/ +static void verifyDbFile(unixFile *pFile){ + struct stat buf; + int rc; + if( pFile->ctrlFlags & UNIXFILE_WARNED ){ + /* One or more of the following warnings have already been issued. Do not + ** repeat them so as not to clutter the error log */ + return; + } + rc = osFstat(pFile->h, &buf); + if( rc!=0 ){ +// sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } + if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){ +// sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } + if( buf.st_nlink>1 ){ +// sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } + if( fileHasMoved(pFile) ){ +// sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } +} + + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +*/ +static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ + int rc = SQLITE_OK; + int reserved = 0; + unixFile *pFile = (unixFile*)id; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + + assert( pFile ); + unixEnterMutex(); /* Because pFile->pInode is shared across threads */ + + /* Check if a thread in this process holds such a lock */ + if( pFile->pInode->eFileLock>SHARED_LOCK ){ + reserved = 1; + } + + /* Otherwise see if some other process holds it. + */ +#ifndef __DJGPP__ + if( !reserved && !pFile->pInode->bProcessLock ){ + struct flock lock; + lock.l_whence = SEEK_SET; + lock.l_start = RESERVED_BYTE; + lock.l_len = 1; + lock.l_type = F_WRLCK; + if( osFcntl(pFile->h, F_GETLK, &lock) ){ + rc = SQLITE_IOERR_CHECKRESERVEDLOCK; + storeLastErrno(pFile, errno); + } else if( lock.l_type!=F_UNLCK ){ + reserved = 1; + } + } +#endif + + unixLeaveMutex(); + OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); + + *pResOut = reserved; + return rc; +} + +/* +** Attempt to set a system-lock on the file pFile. The lock is +** described by pLock. +** +** If the pFile was opened read/write from unix-excl, then the only lock +** ever obtained is an exclusive lock, and it is obtained exactly once +** the first time any lock is attempted. All subsequent system locking +** operations become no-ops. Locking operations still happen internally, +** in order to coordinate access between separate database connections +** within this process, but all of that is handled in memory and the +** operating system does not participate. +** +** This function is a pass-through to fcntl(F_SETLK) if pFile is using +** any VFS other than "unix-excl" or if pFile is opened on "unix-excl" +** and is read-only. +** +** Zero is returned if the call completes successfully, or -1 if a call +** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). +*/ +static int unixFileLock(unixFile *pFile, struct flock *pLock){ + int rc; + unixInodeInfo *pInode = pFile->pInode; + assert( unixMutexHeld() ); + assert( pInode!=0 ); + if( ((pFile->ctrlFlags & UNIXFILE_EXCL)!=0 || pInode->bProcessLock) + && ((pFile->ctrlFlags & UNIXFILE_RDONLY)==0) + ){ + if( pInode->bProcessLock==0 ){ + struct flock lock; + assert( pInode->nLock==0 ); + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST; + lock.l_len = SHARED_SIZE; + lock.l_type = F_WRLCK; + rc = osFcntl(pFile->h, F_SETLK, &lock); + if( rc<0 ) return rc; + pInode->bProcessLock = 1; + pInode->nLock++; + }else{ + rc = 0; + } + }else{ + rc = osFcntl(pFile->h, F_SETLK, pLock); + } + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +*/ +static int unixLock(sqlite3_file *id, int eFileLock){ + /* The following describes the implementation of the various locks and + ** lock transitions in terms of the POSIX advisory shared and exclusive + ** lock primitives (called read-locks and write-locks below, to avoid + ** confusion with SQLite lock names). The algorithms are complicated + ** slightly in order to be compatible with windows systems simultaneously + ** accessing the same database file, in case that is ever required. + ** + ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved + ** byte', each single bytes at well known offsets, and the 'shared byte + ** range', a range of 510 bytes at a well known offset. + ** + ** To obtain a SHARED lock, a read-lock is obtained on the 'pending + ** byte'. If this is successful, a random byte from the 'shared byte + ** range' is read-locked and the lock on the 'pending byte' released. + ** + ** A process may only obtain a RESERVED lock after it has a SHARED lock. + ** A RESERVED lock is implemented by grabbing a write-lock on the + ** 'reserved byte'. + ** + ** A process may only obtain a PENDING lock after it has obtained a + ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock + ** on the 'pending byte'. This ensures that no new SHARED locks can be + ** obtained, but existing SHARED locks are allowed to persist. A process + ** does not have to obtain a RESERVED lock on the way to a PENDING lock. + ** This property is used by the algorithm for rolling back a journal file + ** after a crash. + ** + ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is + ** implemented by obtaining a write-lock on the entire 'shared byte + ** range'. Since all other locks require a read-lock on one of the bytes + ** within this range, this ensures that no other locks are held on the + ** database. + ** + ** The reason a single byte cannot be used instead of the 'shared byte + ** range' is that some versions of windows do not support read-locks. By + ** locking a random byte from a range, concurrent SHARED locks may exist + ** even if the locking primitive used is always a write-lock. + */ + int rc = SQLITE_OK; + unixFile *pFile = (unixFile*)id; + unixInodeInfo *pInode; + struct flock lock; + int tErrno = 0; + + assert( pFile ); + OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h, + azFileLock(eFileLock), azFileLock(pFile->eFileLock), + azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared, + osGetpid(0))); + + /* If there is already a lock of this type or more restrictive on the + ** unixFile, do nothing. Don't use the end_lock: exit path, as + ** unixEnterMutex() hasn't been called yet. + */ + if( pFile->eFileLock>=eFileLock ){ + OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h, + azFileLock(eFileLock))); + return SQLITE_OK; + } + + /* Make sure the locking sequence is correct. + ** (1) We never move from unlocked to anything higher than shared lock. + ** (2) SQLite never explicitly requests a pendig lock. + ** (3) A shared lock is always held when a reserve lock is requested. + */ + assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); + assert( eFileLock!=PENDING_LOCK ); + assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); + + /* This mutex is needed because pFile->pInode is shared across threads + */ + unixEnterMutex(); + pInode = pFile->pInode; + + /* If some thread using this PID has a lock via a different unixFile* + ** handle that precludes the requested lock, return BUSY. + */ + if( (pFile->eFileLock!=pInode->eFileLock && + (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) + ){ + rc = SQLITE_BUSY; + goto end_lock; + } + + /* If a SHARED lock is requested, and some thread using this PID already + ** has a SHARED or RESERVED lock, then increment reference counts and + ** return SQLITE_OK. + */ + if( eFileLock==SHARED_LOCK && + (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ + assert( eFileLock==SHARED_LOCK ); + assert( pFile->eFileLock==0 ); + assert( pInode->nShared>0 ); + pFile->eFileLock = SHARED_LOCK; + pInode->nShared++; + pInode->nLock++; + goto end_lock; + } + + + /* A PENDING lock is needed before acquiring a SHARED lock and before + ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will + ** be released. + */ + lock.l_len = 1L; + lock.l_whence = SEEK_SET; + if( eFileLock==SHARED_LOCK + || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) + ){ + lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK); + lock.l_start = PENDING_BYTE; + if( unixFileLock(pFile, &lock) ){ + tErrno = errno; + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + if( rc!=SQLITE_BUSY ){ + storeLastErrno(pFile, tErrno); + } + goto end_lock; + } + } + + + /* If control gets to this point, then actually go ahead and make + ** operating system calls for the specified lock. + */ + if( eFileLock==SHARED_LOCK ){ + assert( pInode->nShared==0 ); + assert( pInode->eFileLock==0 ); + assert( rc==SQLITE_OK ); + + /* Now get the read-lock */ + lock.l_start = SHARED_FIRST; + lock.l_len = SHARED_SIZE; + if( unixFileLock(pFile, &lock) ){ + tErrno = errno; + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + } + + /* Drop the temporary PENDING lock */ + lock.l_start = PENDING_BYTE; + lock.l_len = 1L; + lock.l_type = F_UNLCK; + if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){ + /* This could happen with a network mount */ + tErrno = errno; + rc = SQLITE_IOERR_UNLOCK; + } + + if( rc ){ + if( rc!=SQLITE_BUSY ){ + storeLastErrno(pFile, tErrno); + } + goto end_lock; + }else{ + pFile->eFileLock = SHARED_LOCK; + pInode->nLock++; + pInode->nShared = 1; + } + }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ + /* We are trying for an exclusive lock but another thread in this + ** same process is still holding a shared lock. */ + rc = SQLITE_BUSY; + }else{ + /* The request was for a RESERVED or EXCLUSIVE lock. It is + ** assumed that there is a SHARED or greater lock on the file + ** already. + */ + assert( 0!=pFile->eFileLock ); + lock.l_type = F_WRLCK; + + assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK ); + if( eFileLock==RESERVED_LOCK ){ + lock.l_start = RESERVED_BYTE; + lock.l_len = 1L; + }else{ + lock.l_start = SHARED_FIRST; + lock.l_len = SHARED_SIZE; + } + + if( unixFileLock(pFile, &lock) ){ + tErrno = errno; + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + if( rc!=SQLITE_BUSY ){ + storeLastErrno(pFile, tErrno); + } + } + } + + +#ifdef SQLITE_DEBUG + /* Set up the transaction-counter change checking flags when + ** transitioning from a SHARED to a RESERVED lock. The change + ** from SHARED to RESERVED marks the beginning of a normal + ** write operation (not a hot journal rollback). + */ + if( rc==SQLITE_OK + && pFile->eFileLock<=SHARED_LOCK + && eFileLock==RESERVED_LOCK + ){ + pFile->transCntrChng = 0; + pFile->dbUpdate = 0; + pFile->inNormalWrite = 1; + } +#endif + + + if( rc==SQLITE_OK ){ + pFile->eFileLock = eFileLock; + pInode->eFileLock = eFileLock; + }else if( eFileLock==EXCLUSIVE_LOCK ){ + pFile->eFileLock = PENDING_LOCK; + pInode->eFileLock = PENDING_LOCK; + } + +end_lock: + unixLeaveMutex(); + OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), + rc==SQLITE_OK ? "ok" : "failed")); + return rc; +} + +/* +** Add the file descriptor used by file handle pFile to the corresponding +** pUnused list. +*/ +static void setPendingFd(unixFile *pFile){ + unixInodeInfo *pInode = pFile->pInode; + UnixUnusedFd *p = pFile->pUnused; + p->pNext = pInode->pUnused; + pInode->pUnused = p; + pFile->h = -1; + pFile->pUnused = 0; +} + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +** +** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED +** the byte range is divided into 2 parts and the first part is unlocked then +** set to a read lock, then the other part is simply unlocked. This works +** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to +** remove the write lock on a region when a read lock is set. +*/ +static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ + unixFile *pFile = (unixFile*)id; + unixInodeInfo *pInode; + struct flock lock; + int rc = SQLITE_OK; + + assert( pFile ); + OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock, + pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, + osGetpid(0))); + + assert( eFileLock<=SHARED_LOCK ); + if( pFile->eFileLock<=eFileLock ){ + return SQLITE_OK; + } + unixEnterMutex(); + pInode = pFile->pInode; + assert( pInode->nShared!=0 ); + if( pFile->eFileLock>SHARED_LOCK ){ + assert( pInode->eFileLock==pFile->eFileLock ); + +#ifdef SQLITE_DEBUG + /* When reducing a lock such that other processes can start + ** reading the database file again, make sure that the + ** transaction counter was updated if any part of the database + ** file changed. If the transaction counter is not updated, + ** other connections to the same file might not realize that + ** the file has changed and hence might not know to flush their + ** cache. The use of a stale cache can lead to database corruption. + */ + pFile->inNormalWrite = 0; +#endif + + /* downgrading to a shared lock on NFS involves clearing the write lock + ** before establishing the readlock - to avoid a race condition we downgrade + ** the lock in 2 blocks, so that part of the range will be covered by a + ** write lock until the rest is covered by a read lock: + ** 1: [WWWWW] + ** 2: [....W] + ** 3: [RRRRW] + ** 4: [RRRR.] + */ + if( eFileLock==SHARED_LOCK ){ +#if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE + (void)handleNFSUnlock; + assert( handleNFSUnlock==0 ); +#endif +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE + if( handleNFSUnlock ){ + int tErrno; /* Error code from system call errors */ + off_t divSize = SHARED_SIZE - 1; + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST; + lock.l_len = divSize; + if( unixFileLock(pFile, &lock)==(-1) ){ + tErrno = errno; + rc = SQLITE_IOERR_UNLOCK; + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + goto end_unlock; + } + lock.l_type = F_RDLCK; + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST; + lock.l_len = divSize; + if( unixFileLock(pFile, &lock)==(-1) ){ + tErrno = errno; + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + goto end_unlock; + } + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST+divSize; + lock.l_len = SHARED_SIZE-divSize; + if( unixFileLock(pFile, &lock)==(-1) ){ + tErrno = errno; + rc = SQLITE_IOERR_UNLOCK; + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + goto end_unlock; + } + }else +#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ + { + lock.l_type = F_RDLCK; + lock.l_whence = SEEK_SET; + lock.l_start = SHARED_FIRST; + lock.l_len = SHARED_SIZE; + if( unixFileLock(pFile, &lock) ){ + /* In theory, the call to unixFileLock() cannot fail because another + ** process is holding an incompatible lock. If it does, this + ** indicates that the other process is not following the locking + ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning + ** SQLITE_BUSY would confuse the upper layer (in practice it causes + ** an assert to fail). */ + rc = SQLITE_IOERR_RDLOCK; + storeLastErrno(pFile, errno); + goto end_unlock; + } + } + } + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = PENDING_BYTE; + lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); + if( unixFileLock(pFile, &lock)==0 ){ + pInode->eFileLock = SHARED_LOCK; + }else{ + rc = SQLITE_IOERR_UNLOCK; + storeLastErrno(pFile, errno); + goto end_unlock; + } + } + if( eFileLock==NO_LOCK ){ + /* Decrement the shared lock counter. Release the lock using an + ** OS call only when all threads in this same process have released + ** the lock. + */ + pInode->nShared--; + if( pInode->nShared==0 ){ + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = lock.l_len = 0L; + if( unixFileLock(pFile, &lock)==0 ){ + pInode->eFileLock = NO_LOCK; + }else{ + rc = SQLITE_IOERR_UNLOCK; + storeLastErrno(pFile, errno); + pInode->eFileLock = NO_LOCK; + pFile->eFileLock = NO_LOCK; + } + } + + /* Decrement the count of locks against this same file. When the + ** count reaches zero, close any other file descriptors whose close + ** was deferred because of outstanding locks. + */ + pInode->nLock--; + assert( pInode->nLock>=0 ); + if( pInode->nLock==0 ){ + closePendingFds(pFile); + } + } + +end_unlock: + unixLeaveMutex(); + if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; + return rc; +} + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +*/ +static int unixUnlock(sqlite3_file *id, int eFileLock){ +#if SQLITE_MAX_MMAP_SIZE>0 + assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); +#endif + return posixUnlock(id, eFileLock, 0); +} + +#if SQLITE_MAX_MMAP_SIZE>0 +static int unixMapfile(unixFile *pFd, i64 nByte); +static void unixUnmapfile(unixFile *pFd); +#endif + +/* +** This function performs the parts of the "close file" operation +** common to all locking schemes. It closes the directory and file +** handles, if they are valid, and sets all fields of the unixFile +** structure to 0. +** +** It is *not* necessary to hold the mutex when this routine is called, +** even on VxWorks. A mutex will be acquired on VxWorks by the +** vxworksReleaseFileId() routine. +*/ +static int closeUnixFile(sqlite3_file *id){ + unixFile *pFile = (unixFile*)id; +#if SQLITE_MAX_MMAP_SIZE>0 + unixUnmapfile(pFile); +#endif + if( pFile->h>=0 ){ + robust_close(pFile, pFile->h, __LINE__); + pFile->h = -1; + } +#if OS_VXWORKS + if( pFile->pId ){ + if( pFile->ctrlFlags & UNIXFILE_DELETE ){ + osUnlink(pFile->pId->zCanonicalName); + } + vxworksReleaseFileId(pFile->pId); + pFile->pId = 0; + } +#endif +#ifdef SQLITE_UNLINK_AFTER_CLOSE + if( pFile->ctrlFlags & UNIXFILE_DELETE ){ + osUnlink(pFile->zPath); + sqlite3_free(*(char**)&pFile->zPath); + pFile->zPath = 0; + } +#endif + OSTRACE(("CLOSE %-3d\n", pFile->h)); + OpenCounter(-1); + sqlite3_free(pFile->pUnused); + memset(pFile, 0, sizeof(unixFile)); + return SQLITE_OK; +} + +/* +** Close a file. +*/ +static int unixClose(sqlite3_file *id){ + int rc = SQLITE_OK; + unixFile *pFile = (unixFile *)id; + verifyDbFile(pFile); + unixUnlock(id, NO_LOCK); + unixEnterMutex(); + + /* unixFile.pInode is always valid here. Otherwise, a different close + ** routine (e.g. nolockClose()) would be called instead. + */ + assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 ); + if( ALWAYS(pFile->pInode) && pFile->pInode->nLock ){ + /* If there are outstanding locks, do not actually close the file just + ** yet because that would clear those locks. Instead, add the file + ** descriptor to pInode->pUnused list. It will be automatically closed + ** when the last lock is cleared. + */ + setPendingFd(pFile); + } + releaseInodeInfo(pFile); + rc = closeUnixFile(id); + unixLeaveMutex(); + return rc; +} + +/************** End of the posix advisory lock implementation ***************** +******************************************************************************/ + +/****************************************************************************** +****************************** No-op Locking ********************************** +** +** Of the various locking implementations available, this is by far the +** simplest: locking is ignored. No attempt is made to lock the database +** file for reading or writing. +** +** This locking mode is appropriate for use on read-only databases +** (ex: databases that are burned into CD-ROM, for example.) It can +** also be used if the application employs some external mechanism to +** prevent simultaneous access of the same database by two or more +** database connections. But there is a serious risk of database +** corruption if this locking mode is used in situations where multiple +** database connections are accessing the same database file at the same +** time and one or more of those connections are writing. +*/ + +static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){ + UNUSED_PARAMETER(NotUsed); + *pResOut = 0; + return SQLITE_OK; +} +static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){ + UNUSED_PARAMETER2(NotUsed, NotUsed2); + return SQLITE_OK; +} +static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ + UNUSED_PARAMETER2(NotUsed, NotUsed2); + return SQLITE_OK; +} + +/* +** Close the file. +*/ +static int nolockClose(sqlite3_file *id) { + return closeUnixFile(id); +} + +/******************* End of the no-op lock implementation ********************* +******************************************************************************/ + +/****************************************************************************** +************************* Begin dot-file Locking ****************************** +** +** The dotfile locking implementation uses the existence of separate lock +** files (really a directory) to control access to the database. This works +** on just about every filesystem imaginable. But there are serious downsides: +** +** (1) There is zero concurrency. A single reader blocks all other +** connections from reading or writing the database. +** +** (2) An application crash or power loss can leave stale lock files +** sitting around that need to be cleared manually. +** +** Nevertheless, a dotlock is an appropriate locking mode for use if no +** other locking strategy is available. +** +** Dotfile locking works by creating a subdirectory in the same directory as +** the database and with the same name but with a ".lock" extension added. +** The existence of a lock directory implies an EXCLUSIVE lock. All other +** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. +*/ + +/* +** The file suffix added to the data base filename in order to create the +** lock directory. +*/ +#define DOTLOCK_SUFFIX ".lock" + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +** +** In dotfile locking, either a lock exists or it does not. So in this +** variation of CheckReservedLock(), *pResOut is set to true if any lock +** is held on the file and false if the file is unlocked. +*/ +static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { + int rc = SQLITE_OK; + int reserved = 0; + unixFile *pFile = (unixFile*)id; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + + assert( pFile ); + + /* Check if a thread in this process holds such a lock */ + if( pFile->eFileLock>SHARED_LOCK ){ + /* Either this connection or some other connection in the same process + ** holds a lock on the file. No need to check further. */ + reserved = 1; + }else{ + /* The lock is held if and only if the lockfile exists */ + const char *zLockFile = (const char*)pFile->lockingContext; + reserved = osAccess(zLockFile, 0)==0; + } + OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved)); + *pResOut = reserved; + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +** +** With dotfile locking, we really only support state (4): EXCLUSIVE. +** But we track the other locking levels internally. +*/ +static int dotlockLock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + char *zLockFile = (char *)pFile->lockingContext; + int rc = SQLITE_OK; + + + /* If we have any lock, then the lock file already exists. All we have + ** to do is adjust our internal record of the lock level. + */ + if( pFile->eFileLock > NO_LOCK ){ + pFile->eFileLock = eFileLock; + /* Always update the timestamp on the old file */ +#ifdef HAVE_UTIME + utime(zLockFile, NULL); +#else + utimes(zLockFile, NULL); +#endif + return SQLITE_OK; + } + + /* grab an exclusive lock */ + rc = osMkdir(zLockFile, 0777); + if( rc<0 ){ + /* failed to open/create the lock directory */ + int tErrno = errno; + if( EEXIST == tErrno ){ + rc = SQLITE_BUSY; + } else { + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + } + return rc; + } + + /* got it, set the type and return ok */ + pFile->eFileLock = eFileLock; + return rc; +} + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +** +** When the locking level reaches NO_LOCK, delete the lock file. +*/ +static int dotlockUnlock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + char *zLockFile = (char *)pFile->lockingContext; + int rc; + + assert( pFile ); + OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, + pFile->eFileLock, osGetpid(0))); + assert( eFileLock<=SHARED_LOCK ); + + /* no-op if possible */ + if( pFile->eFileLock==eFileLock ){ + return SQLITE_OK; + } + + /* To downgrade to shared, simply update our internal notion of the + ** lock state. No need to mess with the file on disk. + */ + if( eFileLock==SHARED_LOCK ){ + pFile->eFileLock = SHARED_LOCK; + return SQLITE_OK; + } + + /* To fully unlock the database, delete the lock file */ + assert( eFileLock==NO_LOCK ); + rc = osRmdir(zLockFile); + if( rc<0 && errno==ENOTDIR ) rc = osUnlink(zLockFile); + if( rc<0 ){ + int tErrno = errno; + rc = 0; + if( ENOENT != tErrno ){ + rc = SQLITE_IOERR_UNLOCK; + } + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + return rc; + } + pFile->eFileLock = NO_LOCK; + return SQLITE_OK; +} + +/* +** Close a file. Make sure the lock has been released before closing. +*/ +static int dotlockClose(sqlite3_file *id) { + int rc = SQLITE_OK; + if( id ){ + unixFile *pFile = (unixFile*)id; + dotlockUnlock(id, NO_LOCK); + sqlite3_free(pFile->lockingContext); + rc = closeUnixFile(id); + } + return rc; +} +/****************** End of the dot-file lock implementation ******************* +******************************************************************************/ + +/****************************************************************************** +************************** Begin flock Locking ******************************** +** +** Use the flock() system call to do file locking. +** +** flock() locking is like dot-file locking in that the various +** fine-grain locking levels supported by SQLite are collapsed into +** a single exclusive lock. In other words, SHARED, RESERVED, and +** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite +** still works when you do this, but concurrency is reduced since +** only a single process can be reading the database at a time. +** +** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off +*/ +#if SQLITE_ENABLE_LOCKING_STYLE + +/* +** Retry flock() calls that fail with EINTR +*/ +#ifdef EINTR +static int robust_flock(int fd, int op){ + int rc; + do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); + return rc; +} +#else +# define robust_flock(a,b) flock(a,b) +#endif + + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +*/ +static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ + int rc = SQLITE_OK; + int reserved = 0; + unixFile *pFile = (unixFile*)id; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + + assert( pFile ); + + /* Check if a thread in this process holds such a lock */ + if( pFile->eFileLock>SHARED_LOCK ){ + reserved = 1; + } + + /* Otherwise see if some other process holds it. */ + if( !reserved ){ + /* attempt to get the lock */ + int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); + if( !lrc ){ + /* got the lock, unlock it */ + lrc = robust_flock(pFile->h, LOCK_UN); + if ( lrc ) { + int tErrno = errno; + /* unlock failed with an error */ + lrc = SQLITE_IOERR_UNLOCK; + if( IS_LOCK_ERROR(lrc) ){ + storeLastErrno(pFile, tErrno); + rc = lrc; + } + } + } else { + int tErrno = errno; + reserved = 1; + /* someone else might have it reserved */ + lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + if( IS_LOCK_ERROR(lrc) ){ + storeLastErrno(pFile, tErrno); + rc = lrc; + } + } + } + OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); + +#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS + if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){ + rc = SQLITE_OK; + reserved=1; + } +#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ + *pResOut = reserved; + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** flock() only really support EXCLUSIVE locks. We track intermediate +** lock states in the sqlite3_file structure, but all locks SHARED or +** above are really EXCLUSIVE locks and exclude all other processes from +** access the file. +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +*/ +static int flockLock(sqlite3_file *id, int eFileLock) { + int rc = SQLITE_OK; + unixFile *pFile = (unixFile*)id; + + assert( pFile ); + + /* if we already have a lock, it is exclusive. + ** Just adjust level and punt on outta here. */ + if (pFile->eFileLock > NO_LOCK) { + pFile->eFileLock = eFileLock; + return SQLITE_OK; + } + + /* grab an exclusive lock */ + + if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { + int tErrno = errno; + /* didn't get, must be busy */ + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + } else { + /* got it, set the type and return ok */ + pFile->eFileLock = eFileLock; + } + OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock), + rc==SQLITE_OK ? "ok" : "failed")); +#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS + if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){ + rc = SQLITE_BUSY; + } +#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ + return rc; +} + + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +*/ +static int flockUnlock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + + assert( pFile ); + OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock, + pFile->eFileLock, osGetpid(0))); + assert( eFileLock<=SHARED_LOCK ); + + /* no-op if possible */ + if( pFile->eFileLock==eFileLock ){ + return SQLITE_OK; + } + + /* shared can just be set because we always have an exclusive */ + if (eFileLock==SHARED_LOCK) { + pFile->eFileLock = eFileLock; + return SQLITE_OK; + } + + /* no, really, unlock. */ + if( robust_flock(pFile->h, LOCK_UN) ){ +#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS + return SQLITE_OK; +#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ + return SQLITE_IOERR_UNLOCK; + }else{ + pFile->eFileLock = NO_LOCK; + return SQLITE_OK; + } +} + +/* +** Close a file. +*/ +static int flockClose(sqlite3_file *id) { + int rc = SQLITE_OK; + if( id ){ + flockUnlock(id, NO_LOCK); + rc = closeUnixFile(id); + } + return rc; +} + +#endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ + +/******************* End of the flock lock implementation ********************* +******************************************************************************/ + +/****************************************************************************** +************************ Begin Named Semaphore Locking ************************ +** +** Named semaphore locking is only supported on VxWorks. +** +** Semaphore locking is like dot-lock and flock in that it really only +** supports EXCLUSIVE locking. Only a single process can read or write +** the database file at a time. This reduces potential concurrency, but +** makes the lock implementation much easier. +*/ +#if OS_VXWORKS + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +*/ +static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) { + int rc = SQLITE_OK; + int reserved = 0; + unixFile *pFile = (unixFile*)id; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + + assert( pFile ); + + /* Check if a thread in this process holds such a lock */ + if( pFile->eFileLock>SHARED_LOCK ){ + reserved = 1; + } + + /* Otherwise see if some other process holds it. */ + if( !reserved ){ + sem_t *pSem = pFile->pInode->pSem; + + if( sem_trywait(pSem)==-1 ){ + int tErrno = errno; + if( EAGAIN != tErrno ){ + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); + storeLastErrno(pFile, tErrno); + } else { + /* someone else has the lock when we are in NO_LOCK */ + reserved = (pFile->eFileLock < SHARED_LOCK); + } + }else{ + /* we could have it if we want it */ + sem_post(pSem); + } + } + OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved)); + + *pResOut = reserved; + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** Semaphore locks only really support EXCLUSIVE locks. We track intermediate +** lock states in the sqlite3_file structure, but all locks SHARED or +** above are really EXCLUSIVE locks and exclude all other processes from +** access the file. +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +*/ +static int semXLock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + sem_t *pSem = pFile->pInode->pSem; + int rc = SQLITE_OK; + + /* if we already have a lock, it is exclusive. + ** Just adjust level and punt on outta here. */ + if (pFile->eFileLock > NO_LOCK) { + pFile->eFileLock = eFileLock; + rc = SQLITE_OK; + goto sem_end_lock; + } + + /* lock semaphore now but bail out when already locked. */ + if( sem_trywait(pSem)==-1 ){ + rc = SQLITE_BUSY; + goto sem_end_lock; + } + + /* got it, set the type and return ok */ + pFile->eFileLock = eFileLock; + + sem_end_lock: + return rc; +} + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +*/ +static int semXUnlock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + sem_t *pSem = pFile->pInode->pSem; + + assert( pFile ); + assert( pSem ); + OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock, + pFile->eFileLock, osGetpid(0))); + assert( eFileLock<=SHARED_LOCK ); + + /* no-op if possible */ + if( pFile->eFileLock==eFileLock ){ + return SQLITE_OK; + } + + /* shared can just be set because we always have an exclusive */ + if (eFileLock==SHARED_LOCK) { + pFile->eFileLock = eFileLock; + return SQLITE_OK; + } + + /* no, really unlock. */ + if ( sem_post(pSem)==-1 ) { + int rc, tErrno = errno; + rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + return rc; + } + pFile->eFileLock = NO_LOCK; + return SQLITE_OK; +} + +/* + ** Close a file. + */ +static int semXClose(sqlite3_file *id) { + if( id ){ + unixFile *pFile = (unixFile*)id; + semXUnlock(id, NO_LOCK); + assert( pFile ); + unixEnterMutex(); + releaseInodeInfo(pFile); + unixLeaveMutex(); + closeUnixFile(id); + } + return SQLITE_OK; +} + +#endif /* OS_VXWORKS */ +/* +** Named semaphore locking is only available on VxWorks. +** +*************** End of the named semaphore lock implementation **************** +******************************************************************************/ + + +/****************************************************************************** +*************************** Begin AFP Locking ********************************* +** +** AFP is the Apple Filing Protocol. AFP is a network filesystem found +** on Apple Macintosh computers - both OS9 and OSX. +** +** Third-party implementations of AFP are available. But this code here +** only works on OSX. +*/ + +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +/* +** The afpLockingContext structure contains all afp lock specific state +*/ +typedef struct afpLockingContext afpLockingContext; +struct afpLockingContext { + int reserved; + const char *dbPath; /* Name of the open file */ +}; + +struct ByteRangeLockPB2 +{ + unsigned long long offset; /* offset to first byte to lock */ + unsigned long long length; /* nbr of bytes to lock */ + unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ + unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ + unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ + int fd; /* file desc to assoc this lock with */ +}; + +#define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) + +/* +** This is a utility for setting or clearing a bit-range lock on an +** AFP filesystem. +** +** Return SQLITE_OK on success, SQLITE_BUSY on failure. +*/ +static int afpSetLock( + const char *path, /* Name of the file to be locked or unlocked */ + unixFile *pFile, /* Open file descriptor on path */ + unsigned long long offset, /* First byte to be locked */ + unsigned long long length, /* Number of bytes to lock */ + int setLockFlag /* True to set lock. False to clear lock */ +){ + struct ByteRangeLockPB2 pb; + int err; + + pb.unLockFlag = setLockFlag ? 0 : 1; + pb.startEndFlag = 0; + pb.offset = offset; + pb.length = length; + pb.fd = pFile->h; + + OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", + (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), + offset, length)); + err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); + if ( err==-1 ) { + int rc; + int tErrno = errno; + OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n", + path, tErrno, strerror(tErrno))); +#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS + rc = SQLITE_BUSY; +#else + rc = sqliteErrorFromPosixError(tErrno, + setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); +#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ + if( IS_LOCK_ERROR(rc) ){ + storeLastErrno(pFile, tErrno); + } + return rc; + } else { + return SQLITE_OK; + } +} + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +*/ +static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ + int rc = SQLITE_OK; + int reserved = 0; + unixFile *pFile = (unixFile*)id; + afpLockingContext *context; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + + assert( pFile ); + context = (afpLockingContext *) pFile->lockingContext; + if( context->reserved ){ + *pResOut = 1; + return SQLITE_OK; + } + unixEnterMutex(); /* Because pFile->pInode is shared across threads */ + + /* Check if a thread in this process holds such a lock */ + if( pFile->pInode->eFileLock>SHARED_LOCK ){ + reserved = 1; + } + + /* Otherwise see if some other process holds it. + */ + if( !reserved ){ + /* lock the RESERVED byte */ + int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); + if( SQLITE_OK==lrc ){ + /* if we succeeded in taking the reserved lock, unlock it to restore + ** the original state */ + lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); + } else { + /* if we failed to get the lock then someone else must have it */ + reserved = 1; + } + if( IS_LOCK_ERROR(lrc) ){ + rc=lrc; + } + } + + unixLeaveMutex(); + OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); + + *pResOut = reserved; + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +*/ +static int afpLock(sqlite3_file *id, int eFileLock){ + int rc = SQLITE_OK; + unixFile *pFile = (unixFile*)id; + unixInodeInfo *pInode = pFile->pInode; + afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; + + assert( pFile ); + OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h, + azFileLock(eFileLock), azFileLock(pFile->eFileLock), + azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0))); + + /* If there is already a lock of this type or more restrictive on the + ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as + ** unixEnterMutex() hasn't been called yet. + */ + if( pFile->eFileLock>=eFileLock ){ + OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h, + azFileLock(eFileLock))); + return SQLITE_OK; + } + + /* Make sure the locking sequence is correct + ** (1) We never move from unlocked to anything higher than shared lock. + ** (2) SQLite never explicitly requests a pendig lock. + ** (3) A shared lock is always held when a reserve lock is requested. + */ + assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); + assert( eFileLock!=PENDING_LOCK ); + assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); + + /* This mutex is needed because pFile->pInode is shared across threads + */ + unixEnterMutex(); + pInode = pFile->pInode; + + /* If some thread using this PID has a lock via a different unixFile* + ** handle that precludes the requested lock, return BUSY. + */ + if( (pFile->eFileLock!=pInode->eFileLock && + (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) + ){ + rc = SQLITE_BUSY; + goto afp_end_lock; + } + + /* If a SHARED lock is requested, and some thread using this PID already + ** has a SHARED or RESERVED lock, then increment reference counts and + ** return SQLITE_OK. + */ + if( eFileLock==SHARED_LOCK && + (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ + assert( eFileLock==SHARED_LOCK ); + assert( pFile->eFileLock==0 ); + assert( pInode->nShared>0 ); + pFile->eFileLock = SHARED_LOCK; + pInode->nShared++; + pInode->nLock++; + goto afp_end_lock; + } + + /* A PENDING lock is needed before acquiring a SHARED lock and before + ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will + ** be released. + */ + if( eFileLock==SHARED_LOCK + || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) + ){ + int failed; + failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1); + if (failed) { + rc = failed; + goto afp_end_lock; + } + } + + /* If control gets to this point, then actually go ahead and make + ** operating system calls for the specified lock. + */ + if( eFileLock==SHARED_LOCK ){ + int lrc1, lrc2, lrc1Errno = 0; + long lk, mask; + + assert( pInode->nShared==0 ); + assert( pInode->eFileLock==0 ); + + mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff; + /* Now get the read-lock SHARED_LOCK */ + /* note that the quality of the randomness doesn't matter that much */ + lk = random(); + pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1); + lrc1 = afpSetLock(context->dbPath, pFile, + SHARED_FIRST+pInode->sharedByte, 1, 1); + if( IS_LOCK_ERROR(lrc1) ){ + lrc1Errno = pFile->lastErrno; + } + /* Drop the temporary PENDING lock */ + lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); + + if( IS_LOCK_ERROR(lrc1) ) { + storeLastErrno(pFile, lrc1Errno); + rc = lrc1; + goto afp_end_lock; + } else if( IS_LOCK_ERROR(lrc2) ){ + rc = lrc2; + goto afp_end_lock; + } else if( lrc1 != SQLITE_OK ) { + rc = lrc1; + } else { + pFile->eFileLock = SHARED_LOCK; + pInode->nLock++; + pInode->nShared = 1; + } + }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ + /* We are trying for an exclusive lock but another thread in this + ** same process is still holding a shared lock. */ + rc = SQLITE_BUSY; + }else{ + /* The request was for a RESERVED or EXCLUSIVE lock. It is + ** assumed that there is a SHARED or greater lock on the file + ** already. + */ + int failed = 0; + assert( 0!=pFile->eFileLock ); + if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) { + /* Acquire a RESERVED lock */ + failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); + if( !failed ){ + context->reserved = 1; + } + } + if (!failed && eFileLock == EXCLUSIVE_LOCK) { + /* Acquire an EXCLUSIVE lock */ + + /* Remove the shared lock before trying the range. we'll need to + ** reestablish the shared lock if we can't get the afpUnlock + */ + if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST + + pInode->sharedByte, 1, 0)) ){ + int failed2 = SQLITE_OK; + /* now attemmpt to get the exclusive lock range */ + failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, + SHARED_SIZE, 1); + if( failed && (failed2 = afpSetLock(context->dbPath, pFile, + SHARED_FIRST + pInode->sharedByte, 1, 1)) ){ + /* Can't reestablish the shared lock. Sqlite can't deal, this is + ** a critical I/O error + */ + rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 : + SQLITE_IOERR_LOCK; + goto afp_end_lock; + } + }else{ + rc = failed; + } + } + if( failed ){ + rc = failed; + } + } + + if( rc==SQLITE_OK ){ + pFile->eFileLock = eFileLock; + pInode->eFileLock = eFileLock; + }else if( eFileLock==EXCLUSIVE_LOCK ){ + pFile->eFileLock = PENDING_LOCK; + pInode->eFileLock = PENDING_LOCK; + } + +afp_end_lock: + unixLeaveMutex(); + OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), + rc==SQLITE_OK ? "ok" : "failed")); + return rc; +} + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +*/ +static int afpUnlock(sqlite3_file *id, int eFileLock) { + int rc = SQLITE_OK; + unixFile *pFile = (unixFile*)id; + unixInodeInfo *pInode; + afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; + int skipShared = 0; +#ifdef SQLITE_TEST + int h = pFile->h; +#endif + + assert( pFile ); + OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock, + pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, + osGetpid(0))); + + assert( eFileLock<=SHARED_LOCK ); + if( pFile->eFileLock<=eFileLock ){ + return SQLITE_OK; + } + unixEnterMutex(); + pInode = pFile->pInode; + assert( pInode->nShared!=0 ); + if( pFile->eFileLock>SHARED_LOCK ){ + assert( pInode->eFileLock==pFile->eFileLock ); + SimulateIOErrorBenign(1); + SimulateIOError( h=(-1) ) + SimulateIOErrorBenign(0); + +#ifdef SQLITE_DEBUG + /* When reducing a lock such that other processes can start + ** reading the database file again, make sure that the + ** transaction counter was updated if any part of the database + ** file changed. If the transaction counter is not updated, + ** other connections to the same file might not realize that + ** the file has changed and hence might not know to flush their + ** cache. The use of a stale cache can lead to database corruption. + */ + assert( pFile->inNormalWrite==0 + || pFile->dbUpdate==0 + || pFile->transCntrChng==1 ); + pFile->inNormalWrite = 0; +#endif + + if( pFile->eFileLock==EXCLUSIVE_LOCK ){ + rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0); + if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){ + /* only re-establish the shared lock if necessary */ + int sharedLockByte = SHARED_FIRST+pInode->sharedByte; + rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1); + } else { + skipShared = 1; + } + } + if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){ + rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); + } + if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){ + rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); + if( !rc ){ + context->reserved = 0; + } + } + if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){ + pInode->eFileLock = SHARED_LOCK; + } + } + if( rc==SQLITE_OK && eFileLock==NO_LOCK ){ + + /* Decrement the shared lock counter. Release the lock using an + ** OS call only when all threads in this same process have released + ** the lock. + */ + unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte; + pInode->nShared--; + if( pInode->nShared==0 ){ + SimulateIOErrorBenign(1); + SimulateIOError( h=(-1) ) + SimulateIOErrorBenign(0); + if( !skipShared ){ + rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0); + } + if( !rc ){ + pInode->eFileLock = NO_LOCK; + pFile->eFileLock = NO_LOCK; + } + } + if( rc==SQLITE_OK ){ + pInode->nLock--; + assert( pInode->nLock>=0 ); + if( pInode->nLock==0 ){ + closePendingFds(pFile); + } + } + } + + unixLeaveMutex(); + if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; + return rc; +} + +/* +** Close a file & cleanup AFP specific locking context +*/ +static int afpClose(sqlite3_file *id) { + int rc = SQLITE_OK; + if( id ){ + unixFile *pFile = (unixFile*)id; + afpUnlock(id, NO_LOCK); + unixEnterMutex(); + if( pFile->pInode && pFile->pInode->nLock ){ + /* If there are outstanding locks, do not actually close the file just + ** yet because that would clear those locks. Instead, add the file + ** descriptor to pInode->aPending. It will be automatically closed when + ** the last lock is cleared. + */ + setPendingFd(pFile); + } + releaseInodeInfo(pFile); + sqlite3_free(pFile->lockingContext); + rc = closeUnixFile(id); + unixLeaveMutex(); + } + return rc; +} + +#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ +/* +** The code above is the AFP lock implementation. The code is specific +** to MacOSX and does not work on other unix platforms. No alternative +** is available. If you don't compile for a mac, then the "unix-afp" +** VFS is not available. +** +********************* End of the AFP lock implementation ********************** +******************************************************************************/ + +/****************************************************************************** +*************************** Begin NFS Locking ********************************/ + +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +/* + ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock + ** must be either NO_LOCK or SHARED_LOCK. + ** + ** If the locking level of the file descriptor is already at or below + ** the requested locking level, this routine is a no-op. + */ +static int nfsUnlock(sqlite3_file *id, int eFileLock){ + return posixUnlock(id, eFileLock, 1); +} + +#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ +/* +** The code above is the NFS lock implementation. The code is specific +** to MacOSX and does not work on other unix platforms. No alternative +** is available. +** +********************* End of the NFS lock implementation ********************** +******************************************************************************/ + +/****************************************************************************** +**************** Non-locking sqlite3_file methods ***************************** +** +** The next division contains implementations for all methods of the +** sqlite3_file object other than the locking methods. The locking +** methods were defined in divisions above (one locking method per +** division). Those methods that are common to all locking modes +** are gather together into this division. +*/ + +/* +** Seek to the offset passed as the second argument, then read cnt +** bytes into pBuf. Return the number of bytes actually read. +** +** NB: If you define USE_PREAD or USE_PREAD64, then it might also +** be necessary to define _XOPEN_SOURCE to be 500. This varies from +** one system to another. Since SQLite does not define USE_PREAD +** in any form by default, we will not attempt to define _XOPEN_SOURCE. +** See tickets #2741 and #2681. +** +** To avoid stomping the errno value on a failed read the lastErrno value +** is set before returning. +*/ +static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ + int got; + int prior = 0; +#if (!defined(USE_PREAD) && !defined(USE_PREAD64)) + i64 newOffset; +#endif + TIMER_START; + assert( cnt==(cnt&0x1ffff) ); + assert( id->h>2 ); + do{ +#if defined(USE_PREAD) + got = osPread(id->h, pBuf, cnt, offset); + SimulateIOError( got = -1 ); +#elif defined(USE_PREAD64) + got = osPread64(id->h, pBuf, cnt, offset); + SimulateIOError( got = -1 ); +#else + newOffset = lseek(id->h, offset, SEEK_SET); + SimulateIOError( newOffset-- ); + if( newOffset!=offset ){ + if( newOffset == -1 ){ + storeLastErrno((unixFile*)id, errno); + }else{ + storeLastErrno((unixFile*)id, 0); + } + return -1; + } + got = osRead(id->h, pBuf, cnt); +#endif + if( got==cnt ) break; + if( got<0 ){ + if( errno==EINTR ){ got = 1; continue; } + prior = 0; + storeLastErrno((unixFile*)id, errno); + break; + }else if( got>0 ){ + cnt -= got; + offset += got; + prior += got; + pBuf = (void*)(got + (char*)pBuf); + } + }while( got>0 ); + TIMER_END; + OSTRACE(("READ %-3d %5d %7lld %llu\n", + id->h, got+prior, offset-prior, TIMER_ELAPSED)); + return got+prior; +} + +/* +** Read data from a file into a buffer. Return SQLITE_OK if all +** bytes were read successfully and SQLITE_IOERR if anything goes +** wrong. +*/ +static int unixRead( + sqlite3_file *id, + void *pBuf, + int amt, + sqlite3_int64 offset +){ + unixFile *pFile = (unixFile *)id; + int got; + assert( id ); + assert( offset>=0 ); + assert( amt>0 ); + + /* If this is a database file (not a journal, master-journal or temp + ** file), the bytes in the locking range should never be read or written. */ +#if 0 + assert( pFile->pUnused==0 + || offset>=PENDING_BYTE+512 + || offset+amt<=PENDING_BYTE + ); +#endif + +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this read request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offset<pFile->mmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); + return SQLITE_OK; + }else{ + int nCopy = pFile->mmapSize - offset; + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + + got = seekAndRead(pFile, offset, pBuf, amt); + if( got==amt ){ + return SQLITE_OK; + }else if( got<0 ){ + /* lastErrno set by seekAndRead */ + return SQLITE_IOERR_READ; + }else{ + storeLastErrno(pFile, 0); /* not a system error */ + /* Unread parts of the buffer must be zero-filled */ + memset(&((char*)pBuf)[got], 0, amt-got); + return SQLITE_IOERR_SHORT_READ; + } +} + +/* +** Attempt to seek the file-descriptor passed as the first argument to +** absolute offset iOff, then attempt to write nBuf bytes of data from +** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, +** return the actual number of bytes written (which may be less than +** nBuf). +*/ +static int seekAndWriteFd( + int fd, /* File descriptor to write to */ + i64 iOff, /* File offset to begin writing at */ + const void *pBuf, /* Copy data from this buffer to the file */ + int nBuf, /* Size of buffer pBuf in bytes */ + int *piErrno /* OUT: Error number if error occurs */ +){ + int rc = 0; /* Value returned by system call */ + + assert( nBuf==(nBuf&0x1ffff) ); + assert( fd>2 ); + nBuf &= 0x1ffff; + TIMER_START; + +#if defined(USE_PREAD) + do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); +#elif defined(USE_PREAD64) + do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); +#else + do{ + i64 iSeek = lseek(fd, iOff, SEEK_SET); + SimulateIOError( iSeek-- ); + + if( iSeek!=iOff ){ + if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0); + return -1; + } + rc = osWrite(fd, pBuf, nBuf); + }while( rc<0 && errno==EINTR ); +#endif + + TIMER_END; + OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED)); + + if( rc<0 && piErrno ) *piErrno = errno; + return rc; +} + + +/* +** Seek to the offset in id->offset then read cnt bytes into pBuf. +** Return the number of bytes actually read. Update the offset. +** +** To avoid stomping the errno value on a failed write the lastErrno value +** is set before returning. +*/ +static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ + return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); +} + + +/* +** Write data from a buffer into a file. Return SQLITE_OK on success +** or some other error code on failure. +*/ +static int unixWrite( + sqlite3_file *id, + const void *pBuf, + int amt, + sqlite3_int64 offset +){ + unixFile *pFile = (unixFile*)id; + int wrote = 0; + assert( id ); + assert( amt>0 ); + + /* If this is a database file (not a journal, master-journal or temp + ** file), the bytes in the locking range should never be read or written. */ +#if 0 + assert( pFile->pUnused==0 + || offset>=PENDING_BYTE+512 + || offset+amt<=PENDING_BYTE + ); +#endif + +#ifdef SQLITE_DEBUG + /* If we are doing a normal write to a database file (as opposed to + ** doing a hot-journal rollback or a write to some file other than a + ** normal database file) then record the fact that the database + ** has changed. If the transaction counter is modified, record that + ** fact too. + */ + if( pFile->inNormalWrite ){ + pFile->dbUpdate = 1; /* The database has been modified */ + if( offset<=24 && offset+amt>=27 ){ + int rc; + char oldCntr[4]; + SimulateIOErrorBenign(1); + rc = seekAndRead(pFile, 24, oldCntr, 4); + SimulateIOErrorBenign(0); + if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){ + pFile->transCntrChng = 1; /* The transaction counter has changed */ + } + } + } +#endif + +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this write request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offset<pFile->mmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); + return SQLITE_OK; + }else{ + int nCopy = pFile->mmapSize - offset; + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + + while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))<amt && wrote>0 ){ + amt -= wrote; + offset += wrote; + pBuf = &((char*)pBuf)[wrote]; + } + SimulateIOError(( wrote=(-1), amt=1 )); + SimulateDiskfullError(( wrote=0, amt=1 )); + + if( amt>wrote ){ + if( wrote<0 && pFile->lastErrno!=ENOSPC ){ + /* lastErrno set by seekAndWrite */ + return SQLITE_IOERR_WRITE; + }else{ + storeLastErrno(pFile, 0); /* not a system error */ + return SQLITE_FULL; + } + } + + return SQLITE_OK; +} + +#ifdef SQLITE_TEST +/* +** Count the number of fullsyncs and normal syncs. This is used to test +** that syncs and fullsyncs are occurring at the right times. +*/ +SQLITE_PRIVATE int sqlite3_sync_count = 0; +SQLITE_PRIVATE int sqlite3_fullsync_count = 0; +#endif + +/* +** We do not trust systems to provide a working fdatasync(). Some do. +** Others do no. To be safe, we will stick with the (slightly slower) +** fsync(). If you know that your system does support fdatasync() correctly, +** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC +*/ +#if !defined(fdatasync) && !HAVE_FDATASYNC +# define fdatasync fsync +#endif + +/* +** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not +** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently +** only available on Mac OS X. But that could change. +*/ +#ifdef F_FULLFSYNC +# define HAVE_FULLFSYNC 1 +#else +# define HAVE_FULLFSYNC 0 +#endif + + +/* +** The fsync() system call does not work as advertised on many +** unix systems. The following procedure is an attempt to make +** it work better. +** +** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful +** for testing when we want to run through the test suite quickly. +** You are strongly advised *not* to deploy with SQLITE_NO_SYNC +** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash +** or power failure will likely corrupt the database file. +** +** SQLite sets the dataOnly flag if the size of the file is unchanged. +** The idea behind dataOnly is that it should only write the file content +** to disk, not the inode. We only set dataOnly if the file size is +** unchanged since the file size is part of the inode. However, +** Ted Ts'o tells us that fdatasync() will also write the inode if the +** file size has changed. The only real difference between fdatasync() +** and fsync(), Ted tells us, is that fdatasync() will not flush the +** inode if the mtime or owner or other inode attributes have changed. +** We only care about the file size, not the other file attributes, so +** as far as SQLite is concerned, an fdatasync() is always adequate. +** So, we always use fdatasync() if it is available, regardless of +** the value of the dataOnly flag. +*/ +static int full_fsync(int fd, int fullSync, int dataOnly){ + int rc; + + /* The following "ifdef/elif/else/" block has the same structure as + ** the one below. It is replicated here solely to avoid cluttering + ** up the real code with the UNUSED_PARAMETER() macros. + */ +#ifdef SQLITE_NO_SYNC + UNUSED_PARAMETER(fd); + UNUSED_PARAMETER(fullSync); + UNUSED_PARAMETER(dataOnly); +#elif HAVE_FULLFSYNC + UNUSED_PARAMETER(dataOnly); +#else + UNUSED_PARAMETER(fullSync); + UNUSED_PARAMETER(dataOnly); +#endif + + /* Record the number of times that we do a normal fsync() and + ** FULLSYNC. This is used during testing to verify that this procedure + ** gets called with the correct arguments. + */ +#ifdef SQLITE_TEST + if( fullSync ) sqlite3_fullsync_count++; + sqlite3_sync_count++; +#endif + + /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a + ** no-op + */ +#ifdef SQLITE_NO_SYNC + rc = SQLITE_OK; +#elif HAVE_FULLFSYNC + if( fullSync ){ + rc = osFcntl(fd, F_FULLFSYNC, 0); + }else{ + rc = 1; + } + /* If the FULLFSYNC failed, fall back to attempting an fsync(). + ** It shouldn't be possible for fullfsync to fail on the local + ** file system (on OSX), so failure indicates that FULLFSYNC + ** isn't supported for this file system. So, attempt an fsync + ** and (for now) ignore the overhead of a superfluous fcntl call. + ** It'd be better to detect fullfsync support once and avoid + ** the fcntl call every time sync is called. + */ + if( rc ) rc = fsync(fd); + +#elif defined(__APPLE__) + /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly + ** so currently we default to the macro that redefines fdatasync to fsync + */ + rc = fsync(fd); +#else + rc = fdatasync(fd); +#if OS_VXWORKS + if( rc==-1 && errno==ENOTSUP ){ + rc = fsync(fd); + } +#endif /* OS_VXWORKS */ +#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */ + + if( OS_VXWORKS && rc!= -1 ){ + rc = 0; + } + return rc; +} + +/* +** Open a file descriptor to the directory containing file zFilename. +** If successful, *pFd is set to the opened file descriptor and +** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM +** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined +** value. +** +** The directory file descriptor is used for only one thing - to +** fsync() a directory to make sure file creation and deletion events +** are flushed to disk. Such fsyncs are not needed on newer +** journaling filesystems, but are required on older filesystems. +** +** This routine can be overridden using the xSetSysCall interface. +** The ability to override this routine was added in support of the +** chromium sandbox. Opening a directory is a security risk (we are +** told) so making it overrideable allows the chromium sandbox to +** replace this routine with a harmless no-op. To make this routine +** a no-op, replace it with a stub that returns SQLITE_OK but leaves +** *pFd set to a negative number. +** +** If SQLITE_OK is returned, the caller is responsible for closing +** the file descriptor *pFd using close(). +*/ +static int openDirectory(const char *zFilename, int *pFd){ + int ii; + int fd = -1; + char zDirname[MAX_PATHNAME+1]; + + sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); + for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--); + if( ii>0 ){ + zDirname[ii] = '\0'; + fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); + if( fd>=0 ){ + OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); + } + } + *pFd = fd; + return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname)); +} + +/* +** Make sure all writes to a particular file are committed to disk. +** +** If dataOnly==0 then both the file itself and its metadata (file +** size, access time, etc) are synced. If dataOnly!=0 then only the +** file data is synced. +** +** Under Unix, also make sure that the directory entry for the file +** has been created by fsync-ing the directory that contains the file. +** If we do not do this and we encounter a power failure, the directory +** entry for the journal might not exist after we reboot. The next +** SQLite to access the file will not know that the journal exists (because +** the directory entry for the journal was never created) and the transaction +** will not roll back - possibly leading to database corruption. +*/ +static int unixSync(sqlite3_file *id, int flags){ + int rc; + unixFile *pFile = (unixFile*)id; + + int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); + int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; + + /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ + assert((flags&0x0F)==SQLITE_SYNC_NORMAL + || (flags&0x0F)==SQLITE_SYNC_FULL + ); + + /* Unix cannot, but some systems may return SQLITE_FULL from here. This + ** line is to test that doing so does not cause any problems. + */ + SimulateDiskfullError( return SQLITE_FULL ); + + assert( pFile ); + OSTRACE(("SYNC %-3d\n", pFile->h)); + rc = full_fsync(pFile->h, isFullsync, isDataOnly); + SimulateIOError( rc=1 ); + if( rc ){ + storeLastErrno(pFile, errno); + return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); + } + + /* Also fsync the directory containing the file if the DIRSYNC flag + ** is set. This is a one-time occurrence. Many systems (examples: AIX) + ** are unable to fsync a directory, so ignore errors on the fsync. + */ + if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ + int dirfd; + OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath, + HAVE_FULLFSYNC, isFullsync)); + rc = osOpenDirectory(pFile->zPath, &dirfd); + if( rc==SQLITE_OK && dirfd>=0 ){ + full_fsync(dirfd, 0, 0); + robust_close(pFile, dirfd, __LINE__); + }else if( rc==SQLITE_CANTOPEN ){ + rc = SQLITE_OK; + } + pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; + } + return rc; +} + +/* +** Truncate an open file to a specified size +*/ +static int unixTruncate(sqlite3_file *id, i64 nByte){ + unixFile *pFile = (unixFile *)id; + int rc; + assert( pFile ); + SimulateIOError( return SQLITE_IOERR_TRUNCATE ); + + /* If the user has configured a chunk-size for this file, truncate the + ** file so that it consists of an integer number of chunks (i.e. the + ** actual file size after the operation may be larger than the requested + ** size). + */ + if( pFile->szChunk>0 ){ + nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; + } + + rc = robust_ftruncate(pFile->h, nByte); + if( rc ){ + storeLastErrno(pFile, errno); + return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); + }else{ +#ifdef SQLITE_DEBUG + /* If we are doing a normal write to a database file (as opposed to + ** doing a hot-journal rollback or a write to some file other than a + ** normal database file) and we truncate the file to zero length, + ** that effectively updates the change counter. This might happen + ** when restoring a database using the backup API from a zero-length + ** source. + */ + if( pFile->inNormalWrite && nByte==0 ){ + pFile->transCntrChng = 1; + } +#endif + +#if SQLITE_MAX_MMAP_SIZE>0 + /* If the file was just truncated to a size smaller than the currently + ** mapped region, reduce the effective mapping size as well. SQLite will + ** use read() and write() to access data beyond this point from now on. + */ + if( nByte<pFile->mmapSize ){ + pFile->mmapSize = nByte; + } +#endif + + return SQLITE_OK; + } +} + +/* +** Determine the current size of a file in bytes +*/ +static int unixFileSize(sqlite3_file *id, i64 *pSize){ + int rc; + struct stat buf; + assert( id ); + rc = osFstat(((unixFile*)id)->h, &buf); + SimulateIOError( rc=1 ); + if( rc!=0 ){ + storeLastErrno((unixFile*)id, errno); + return SQLITE_IOERR_FSTAT; + } + *pSize = buf.st_size; + + /* When opening a zero-size database, the findInodeInfo() procedure + ** writes a single byte into that file in order to work around a bug + ** in the OS-X msdos filesystem. In order to avoid problems with upper + ** layers, we need to report this file size as zero even though it is + ** really 1. Ticket #3260. + */ + if( *pSize==1 ) *pSize = 0; + + + return SQLITE_OK; +} + +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) +/* +** Handler for proxy-locking file-control verbs. Defined below in the +** proxying locking division. +*/ +static int proxyFileControl(sqlite3_file*,int,void*); +#endif + +/* +** This function is called to handle the SQLITE_FCNTL_SIZE_HINT +** file-control operation. Enlarge the database to nBytes in size +** (rounded up to the next chunk-size). If the database is already +** nBytes or larger, this routine is a no-op. +*/ +static int fcntlSizeHint(unixFile *pFile, i64 nByte){ + if( pFile->szChunk>0 ){ + i64 nSize; /* Required file size */ + struct stat buf; /* Used to hold return values of fstat() */ + + if( osFstat(pFile->h, &buf) ){ + return SQLITE_IOERR_FSTAT; + } + + nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; + if( nSize>(i64)buf.st_size ){ + +#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE + /* The code below is handling the return value of osFallocate() + ** correctly. posix_fallocate() is defined to "returns zero on success, + ** or an error number on failure". See the manpage for details. */ + int err; + do{ + err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size); + }while( err==EINTR ); + if( err ) return SQLITE_IOERR_WRITE; +#else + /* If the OS does not have posix_fallocate(), fake it. Write a + ** single byte to the last byte in each block that falls entirely + ** within the extended region. Then, if required, a single byte + ** at offset (nSize-1), to set the size of the file correctly. + ** This is a similar technique to that used by glibc on systems + ** that do not have a real fallocate() call. + */ + int nBlk = buf.st_blksize; /* File-system block size */ + int nWrite = 0; /* Number of bytes written by seekAndWrite */ + i64 iWrite; /* Next offset to write to */ + + iWrite = ((buf.st_size + 2*nBlk - 1)/nBlk)*nBlk-1; + assert( iWrite>=buf.st_size ); + assert( (iWrite/nBlk)==((buf.st_size+nBlk-1)/nBlk) ); + assert( ((iWrite+1)%nBlk)==0 ); + for(/*no-op*/; iWrite<nSize; iWrite+=nBlk ){ + nWrite = seekAndWrite(pFile, iWrite, "", 1); + if( nWrite!=1 ) return SQLITE_IOERR_WRITE; + } + if( nWrite==0 || (nSize%nBlk) ){ + nWrite = seekAndWrite(pFile, nSize-1, "", 1); + if( nWrite!=1 ) return SQLITE_IOERR_WRITE; + } +#endif + } + } + +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ + int rc; + if( pFile->szChunk<=0 ){ + if( robust_ftruncate(pFile->h, nByte) ){ + storeLastErrno(pFile, errno); + return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); + } + } + + rc = unixMapfile(pFile, nByte); + return rc; + } +#endif + + return SQLITE_OK; +} + +/* +** If *pArg is initially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} + +/* Forward declaration */ +static int unixGetTempname(int nBuf, char *zBuf); + +/* +** Information and control of an open file handle. +*/ +static int unixFileControl(sqlite3_file *id, int op, void *pArg){ + unixFile *pFile = (unixFile*)id; + switch( op ){ + case SQLITE_FCNTL_WAL_BLOCK: { + /* pFile->ctrlFlags |= UNIXFILE_BLOCK; // Deferred feature */ + return SQLITE_OK; + } + case SQLITE_FCNTL_LOCKSTATE: { + *(int*)pArg = pFile->eFileLock; + return SQLITE_OK; + } + case SQLITE_FCNTL_LAST_ERRNO: { + *(int*)pArg = pFile->lastErrno; + return SQLITE_OK; + } + case SQLITE_FCNTL_CHUNK_SIZE: { + pFile->szChunk = *(int *)pArg; + return SQLITE_OK; + } + case SQLITE_FCNTL_SIZE_HINT: { + int rc; + SimulateIOErrorBenign(1); + rc = fcntlSizeHint(pFile, *(i64 *)pArg); + SimulateIOErrorBenign(0); + return rc; + } + case SQLITE_FCNTL_PERSIST_WAL: { + unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_VFSNAME: { + *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); + return SQLITE_OK; + } + case SQLITE_FCNTL_TEMPFILENAME: { + char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname ); + if( zTFile ){ + unixGetTempname(pFile->pVfs->mxPathname, zTFile); + *(char**)pArg = zTFile; + } + return SQLITE_OK; + } + case SQLITE_FCNTL_HAS_MOVED: { + *(int*)pArg = fileHasMoved(pFile); + return SQLITE_OK; + } +#if SQLITE_MAX_MMAP_SIZE>0 + case SQLITE_FCNTL_MMAP_SIZE: { + i64 newLimit = *(i64*)pArg; + int rc = SQLITE_OK; + if( newLimit>sqlite3GlobalConfig.mxMmap ){ + newLimit = sqlite3GlobalConfig.mxMmap; + } + *(i64*)pArg = pFile->mmapSizeMax; + if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ + pFile->mmapSizeMax = newLimit; + if( pFile->mmapSize>0 ){ + unixUnmapfile(pFile); + rc = unixMapfile(pFile, -1); + } + } + return rc; + } +#endif +#ifdef SQLITE_DEBUG + /* The pager calls this method to signal that it has done + ** a rollback and that the database is therefore unchanged and + ** it hence it is OK for the transaction change counter to be + ** unchanged. + */ + case SQLITE_FCNTL_DB_UNCHANGED: { + ((unixFile*)id)->dbUpdate = 0; + return SQLITE_OK; + } +#endif +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) + case SQLITE_FCNTL_SET_LOCKPROXYFILE: + case SQLITE_FCNTL_GET_LOCKPROXYFILE: { + return proxyFileControl(id,op,pArg); + } +#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ + } + return SQLITE_NOTFOUND; +} + +/* +** Return the sector size in bytes of the underlying block device for +** the specified file. This is almost always 512 bytes, but may be +** larger for some devices. +** +** SQLite code assumes this function cannot fail. It also assumes that +** if two files are created in the same file-system directory (i.e. +** a database and its journal file) that the sector size will be the +** same for both. +*/ +#ifndef __QNXNTO__ +static int unixSectorSize(sqlite3_file *NotUsed){ + UNUSED_PARAMETER(NotUsed); + return SQLITE_DEFAULT_SECTOR_SIZE; +} +#endif + +/* +** The following version of unixSectorSize() is optimized for QNX. +*/ +#ifdef __QNXNTO__ +#include <sys/dcmd_blk.h> +#include <sys/statvfs.h> +static int unixSectorSize(sqlite3_file *id){ + unixFile *pFile = (unixFile*)id; + if( pFile->sectorSize == 0 ){ + struct statvfs fsInfo; + + /* Set defaults for non-supported filesystems */ + pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; + pFile->deviceCharacteristics = 0; + if( fstatvfs(pFile->h, &fsInfo) == -1 ) { + return pFile->sectorSize; + } + + if( !strcmp(fsInfo.f_basetype, "tmp") ) { + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */ + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( strstr(fsInfo.f_basetype, "etfs") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + /* etfs cluster size writes are atomic */ + (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) | + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */ + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + /* full bitset of atomics from max sector size and smaller */ + ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else if( strstr(fsInfo.f_basetype, "dos") ){ + pFile->sectorSize = fsInfo.f_bsize; + pFile->deviceCharacteristics = + /* full bitset of atomics from max sector size and smaller */ + ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | + SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind + ** so it is ordered */ + 0; + }else{ + pFile->deviceCharacteristics = + SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */ + SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until + ** the write succeeds */ + 0; + } + } + /* Last chance verification. If the sector size isn't a multiple of 512 + ** then it isn't valid.*/ + if( pFile->sectorSize % 512 != 0 ){ + pFile->deviceCharacteristics = 0; + pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; + } + return pFile->sectorSize; +} +#endif /* __QNXNTO__ */ + +/* +** Return the device characteristics for the file. +** +** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. +** However, that choice is controversial since technically the underlying +** file system does not always provide powersafe overwrites. (In other +** words, after a power-loss event, parts of the file that were never +** written might end up being altered.) However, non-PSOW behavior is very, +** very rare. And asserting PSOW makes a large reduction in the amount +** of required I/O for journaling, since a lot of padding is eliminated. +** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control +** available to turn it off and URI query parameter available to turn it off. +*/ +static int unixDeviceCharacteristics(sqlite3_file *id){ + unixFile *p = (unixFile*)id; + int rc = 0; +#ifdef __QNXNTO__ + if( p->sectorSize==0 ) unixSectorSize(id); + rc = p->deviceCharacteristics; +#endif + if( p->ctrlFlags & UNIXFILE_PSOW ){ + rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; + } + return rc; +} + +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 + +/* +** Return the system page size. +** +** This function should not be called directly by other code in this file. +** Instead, it should be called via macro osGetpagesize(). +*/ +static int unixGetpagesize(void){ +#if OS_VXWORKS + return 1024; +#elif defined(_BSD_SOURCE) + return getpagesize(); +#else + return (int)sysconf(_SC_PAGESIZE); +#endif +} + +#endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */ + +#ifndef SQLITE_OMIT_WAL + +/* +** Object used to represent an shared memory buffer. +** +** When multiple threads all reference the same wal-index, each thread +** has its own unixShm object, but they all point to a single instance +** of this unixShmNode object. In other words, each wal-index is opened +** only once per process. +** +** Each unixShmNode object is connected to a single unixInodeInfo object. +** We could coalesce this object into unixInodeInfo, but that would mean +** every open file that does not use shared memory (in other words, most +** open files) would have to carry around this extra information. So +** the unixInodeInfo object contains a pointer to this unixShmNode object +** and the unixShmNode object is created only when needed. +** +** unixMutexHeld() must be true when creating or destroying +** this object or while reading or writing the following fields: +** +** nRef +** +** The following fields are read-only after the object is created: +** +** fid +** zFilename +** +** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and +** unixMutexHeld() is true when reading or writing any other field +** in this structure. +*/ +struct unixShmNode { + unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ + sqlite3_mutex *mutex; /* Mutex to access this object */ + char *zFilename; /* Name of the mmapped file */ + int h; /* Open file descriptor */ + int szRegion; /* Size of shared-memory regions */ + u16 nRegion; /* Size of array apRegion */ + u8 isReadonly; /* True if read-only */ + char **apRegion; /* Array of mapped shared-memory regions */ + int nRef; /* Number of unixShm objects pointing to this */ + unixShm *pFirst; /* All unixShm objects pointing to this */ +#ifdef SQLITE_DEBUG + u8 exclMask; /* Mask of exclusive locks held */ + u8 sharedMask; /* Mask of shared locks held */ + u8 nextShmId; /* Next available unixShm.id value */ +#endif +}; + +/* +** Structure used internally by this VFS to record the state of an +** open shared memory connection. +** +** The following fields are initialized when this object is created and +** are read-only thereafter: +** +** unixShm.pFile +** unixShm.id +** +** All other fields are read/write. The unixShm.pFile->mutex must be held +** while accessing any read/write fields. +*/ +struct unixShm { + unixShmNode *pShmNode; /* The underlying unixShmNode object */ + unixShm *pNext; /* Next unixShm with the same unixShmNode */ + u8 hasMutex; /* True if holding the unixShmNode mutex */ + u8 id; /* Id of this connection within its unixShmNode */ + u16 sharedMask; /* Mask of shared locks held */ + u16 exclMask; /* Mask of exclusive locks held */ +}; + +/* +** Constants used for locking +*/ +#define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ +#define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ + +/* +** Apply posix advisory locks for all bytes from ofst through ofst+n-1. +** +** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking +** otherwise. +*/ +static int unixShmSystemLock( + unixFile *pFile, /* Open connection to the WAL file */ + int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ + int ofst, /* First byte of the locking range */ + int n /* Number of bytes to lock */ +){ + unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ + struct flock f; /* The posix advisory locking structure */ + int rc = SQLITE_OK; /* Result code form fcntl() */ + + /* Access to the unixShmNode object is serialized by the caller */ + pShmNode = pFile->pInode->pShmNode; + assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 ); + + /* Shared locks never span more than one byte */ + assert( n==1 || lockType!=F_RDLCK ); + + /* Locks are within range */ + assert( n>=1 && n<SQLITE_SHM_NLOCK ); + + if( pShmNode->h>=0 ){ + int lkType; + /* Initialize the locking parameters */ + memset(&f, 0, sizeof(f)); + f.l_type = lockType; + f.l_whence = SEEK_SET; + f.l_start = ofst; + f.l_len = n; + + lkType = (pFile->ctrlFlags & UNIXFILE_BLOCK)!=0 ? F_SETLKW : F_SETLK; + rc = osFcntl(pShmNode->h, lkType, &f); + rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; + pFile->ctrlFlags &= ~UNIXFILE_BLOCK; + } + + /* Update the global lock state and do debug tracing */ +#ifdef SQLITE_DEBUG + { u16 mask; + OSTRACE(("SHM-LOCK ")); + mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst); + if( rc==SQLITE_OK ){ + if( lockType==F_UNLCK ){ + OSTRACE(("unlock %d ok", ofst)); + pShmNode->exclMask &= ~mask; + pShmNode->sharedMask &= ~mask; + }else if( lockType==F_RDLCK ){ + OSTRACE(("read-lock %d ok", ofst)); + pShmNode->exclMask &= ~mask; + pShmNode->sharedMask |= mask; + }else{ + assert( lockType==F_WRLCK ); + OSTRACE(("write-lock %d ok", ofst)); + pShmNode->exclMask |= mask; + pShmNode->sharedMask &= ~mask; + } + }else{ + if( lockType==F_UNLCK ){ + OSTRACE(("unlock %d failed", ofst)); + }else if( lockType==F_RDLCK ){ + OSTRACE(("read-lock failed")); + }else{ + assert( lockType==F_WRLCK ); + OSTRACE(("write-lock %d failed", ofst)); + } + } + OSTRACE((" - afterwards %03x,%03x\n", + pShmNode->sharedMask, pShmNode->exclMask)); + } +#endif + + return rc; +} + +/* +** Return the minimum number of 32KB shm regions that should be mapped at +** a time, assuming that each mapping must be an integer multiple of the +** current system page-size. +** +** Usually, this is 1. The exception seems to be systems that are configured +** to use 64KB pages - in this case each mapping must cover at least two +** shm regions. +*/ +static int unixShmRegionPerMap(void){ + int shmsz = 32*1024; /* SHM region size */ + int pgsz = osGetpagesize(); /* System page size */ + assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ + if( pgsz<shmsz ) return 1; + return pgsz/shmsz; +} + +/* +** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. +** +** This is not a VFS shared-memory method; it is a utility function called +** by VFS shared-memory methods. +*/ +static void unixShmPurge(unixFile *pFd){ + unixShmNode *p = pFd->pInode->pShmNode; + assert( unixMutexHeld() ); + if( p && p->nRef==0 ){ + int nShmPerMap = unixShmRegionPerMap(); + int i; + assert( p->pInode==pFd->pInode ); + sqlite3_mutex_free(p->mutex); + for(i=0; i<p->nRegion; i+=nShmPerMap){ + if( p->h>=0 ){ + osMunmap(p->apRegion[i], p->szRegion); + }else{ + sqlite3_free(p->apRegion[i]); + } + } + sqlite3_free(p->apRegion); + if( p->h>=0 ){ + robust_close(pFd, p->h, __LINE__); + p->h = -1; + } + p->pInode->pShmNode = 0; + sqlite3_free(p); + } +} + +/* +** Open a shared-memory area associated with open database file pDbFd. +** This particular implementation uses mmapped files. +** +** The file used to implement shared-memory is in the same directory +** as the open database file and has the same name as the open database +** file with the "-shm" suffix added. For example, if the database file +** is "/home/user1/config.db" then the file that is created and mmapped +** for shared memory will be called "/home/user1/config.db-shm". +** +** Another approach to is to use files in /dev/shm or /dev/tmp or an +** some other tmpfs mount. But if a file in a different directory +** from the database file is used, then differing access permissions +** or a chroot() might cause two different processes on the same +** database to end up using different files for shared memory - +** meaning that their memory would not really be shared - resulting +** in database corruption. Nevertheless, this tmpfs file usage +** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm" +** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time +** option results in an incompatible build of SQLite; builds of SQLite +** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the +** same database file at the same time, database corruption will likely +** result. The SQLITE_SHM_DIRECTORY compile-time option is considered +** "unsupported" and may go away in a future SQLite release. +** +** When opening a new shared-memory file, if no other instances of that +** file are currently open, in this process or in other processes, then +** the file must be truncated to zero length or have its header cleared. +** +** If the original database file (pDbFd) is using the "unix-excl" VFS +** that means that an exclusive lock is held on the database file and +** that no other processes are able to read or write the database. In +** that case, we do not really need shared memory. No shared memory +** file is created. The shared memory will be simulated with heap memory. +*/ +static int unixOpenSharedMemory(unixFile *pDbFd){ + struct unixShm *p = 0; /* The connection to be opened */ + struct unixShmNode *pShmNode; /* The underlying mmapped file */ + int rc; /* Result code */ + unixInodeInfo *pInode; /* The inode of fd */ + char *zShmFilename; /* Name of the file used for SHM */ + int nShmFilename; /* Size of the SHM filename in bytes */ + + /* Allocate space for the new unixShm object. */ + p = sqlite3_malloc64( sizeof(*p) ); + if( p==0 ) return SQLITE_NOMEM; + memset(p, 0, sizeof(*p)); + assert( pDbFd->pShm==0 ); + + /* Check to see if a unixShmNode object already exists. Reuse an existing + ** one if present. Create a new one if necessary. + */ + unixEnterMutex(); + pInode = pDbFd->pInode; + pShmNode = pInode->pShmNode; + if( pShmNode==0 ){ + struct stat sStat; /* fstat() info for database file */ +#ifndef SQLITE_SHM_DIRECTORY + const char *zBasePath = pDbFd->zPath; +#endif + + /* Call fstat() to figure out the permissions on the database file. If + ** a new *-shm file is created, an attempt will be made to create it + ** with the same permissions. + */ + if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){ + rc = SQLITE_IOERR_FSTAT; + goto shm_open_err; + } + +#ifdef SQLITE_SHM_DIRECTORY + nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; +#else + nShmFilename = 6 + (int)strlen(zBasePath); +#endif + pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename ); + if( pShmNode==0 ){ + rc = SQLITE_NOMEM; + goto shm_open_err; + } + memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); + zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1]; +#ifdef SQLITE_SHM_DIRECTORY + sqlite3_snprintf(nShmFilename, zShmFilename, + SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x", + (u32)sStat.st_ino, (u32)sStat.st_dev); +#else + sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", zBasePath); +#endif + pShmNode->h = -1; + pDbFd->pInode->pShmNode = pShmNode; + pShmNode->pInode = pDbFd->pInode; + pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); + if( pShmNode->mutex==0 ){ + rc = SQLITE_NOMEM; + goto shm_open_err; + } + + if( pInode->bProcessLock==0 ){ + int openFlags = O_RDWR | O_CREAT; + pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777)); + if( pShmNode->h<0 ){ + rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename); + goto shm_open_err; + } + + /* If this process is running as root, make sure that the SHM file + ** is owned by the same user that owns the original database. Otherwise, + ** the original owner will not be able to connect. + */ + osFchown(pShmNode->h, sStat.st_uid, sStat.st_gid); + + /* Check to see if another process is holding the dead-man switch. + ** If not, truncate the file to zero length. + */ + rc = SQLITE_OK; + if( unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ + if( robust_ftruncate(pShmNode->h, 0) ){ + rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename); + } + } + if( rc==SQLITE_OK ){ + rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); + } + if( rc ) goto shm_open_err; + } + } + + /* Make the new connection a child of the unixShmNode */ + p->pShmNode = pShmNode; +#ifdef SQLITE_DEBUG + p->id = pShmNode->nextShmId++; +#endif + pShmNode->nRef++; + pDbFd->pShm = p; + unixLeaveMutex(); + + /* The reference count on pShmNode has already been incremented under + ** the cover of the unixEnterMutex() mutex and the pointer from the + ** new (struct unixShm) object to the pShmNode has been set. All that is + ** left to do is to link the new object into the linked list starting + ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex + ** mutex. + */ + sqlite3_mutex_enter(pShmNode->mutex); + p->pNext = pShmNode->pFirst; + pShmNode->pFirst = p; + sqlite3_mutex_leave(pShmNode->mutex); + return SQLITE_OK; + + /* Jump here on any error */ +shm_open_err: + unixShmPurge(pDbFd); /* This call frees pShmNode if required */ + sqlite3_free(p); + unixLeaveMutex(); + return rc; +} + +/* +** This function is called to obtain a pointer to region iRegion of the +** shared-memory associated with the database file fd. Shared-memory regions +** are numbered starting from zero. Each shared-memory region is szRegion +** bytes in size. +** +** If an error occurs, an error code is returned and *pp is set to NULL. +** +** Otherwise, if the bExtend parameter is 0 and the requested shared-memory +** region has not been allocated (by any client, including one running in a +** separate process), then *pp is set to NULL and SQLITE_OK returned. If +** bExtend is non-zero and the requested shared-memory region has not yet +** been allocated, it is allocated by this function. +** +** If the shared-memory region has already been allocated or is allocated by +** this call as described above, then it is mapped into this processes +** address space (if it is not already), *pp is set to point to the mapped +** memory and SQLITE_OK returned. +*/ +static int unixShmMap( + sqlite3_file *fd, /* Handle open on database file */ + int iRegion, /* Region to retrieve */ + int szRegion, /* Size of regions */ + int bExtend, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ +){ + unixFile *pDbFd = (unixFile*)fd; + unixShm *p; + unixShmNode *pShmNode; + int rc = SQLITE_OK; + int nShmPerMap = unixShmRegionPerMap(); + int nReqRegion; + + /* If the shared-memory file has not yet been opened, open it now. */ + if( pDbFd->pShm==0 ){ + rc = unixOpenSharedMemory(pDbFd); + if( rc!=SQLITE_OK ) return rc; + } + + p = pDbFd->pShm; + pShmNode = p->pShmNode; + sqlite3_mutex_enter(pShmNode->mutex); + assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); + assert( pShmNode->pInode==pDbFd->pInode ); + assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); + assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); + + /* Minimum number of regions required to be mapped. */ + nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; + + if( pShmNode->nRegion<nReqRegion ){ + char **apNew; /* New apRegion[] array */ + int nByte = nReqRegion*szRegion; /* Minimum required file size */ + struct stat sStat; /* Used by fstat() */ + + pShmNode->szRegion = szRegion; + + if( pShmNode->h>=0 ){ + /* The requested region is not mapped into this processes address space. + ** Check to see if it has been allocated (i.e. if the wal-index file is + ** large enough to contain the requested region). + */ + if( osFstat(pShmNode->h, &sStat) ){ + rc = SQLITE_IOERR_SHMSIZE; + goto shmpage_out; + } + + if( sStat.st_size<nByte ){ + /* The requested memory region does not exist. If bExtend is set to + ** false, exit early. *pp will be set to NULL and SQLITE_OK returned. + */ + if( !bExtend ){ + goto shmpage_out; + } + + /* Alternatively, if bExtend is true, extend the file. Do this by + ** writing a single byte to the end of each (OS) page being + ** allocated or extended. Technically, we need only write to the + ** last page in order to extend the file. But writing to all new + ** pages forces the OS to allocate them immediately, which reduces + ** the chances of SIGBUS while accessing the mapped region later on. + */ + else{ + static const int pgsz = 4096; + int iPg; + + /* Write to the last byte of each newly allocated or extended page */ + assert( (nByte % pgsz)==0 ); + for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){ + if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){ + const char *zFile = pShmNode->zFilename; + rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile); + goto shmpage_out; + } + } + } + } + } + + /* Map the requested memory region into this processes address space. */ + apNew = (char **)sqlite3_realloc( + pShmNode->apRegion, nReqRegion*sizeof(char *) + ); + if( !apNew ){ + rc = SQLITE_IOERR_NOMEM; + goto shmpage_out; + } + pShmNode->apRegion = apNew; + while( pShmNode->nRegion<nReqRegion ){ + int nMap = szRegion*nShmPerMap; + int i; + void *pMem; + if( pShmNode->h>=0 ){ + pMem = osMmap(0, nMap, + pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, + MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion + ); + if( pMem==MAP_FAILED ){ + rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); + goto shmpage_out; + } + }else{ + pMem = sqlite3_malloc64(szRegion); + if( pMem==0 ){ + rc = SQLITE_NOMEM; + goto shmpage_out; + } + memset(pMem, 0, szRegion); + } + + for(i=0; i<nShmPerMap; i++){ + pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; + } + pShmNode->nRegion += nShmPerMap; + } + } + +shmpage_out: + if( pShmNode->nRegion>iRegion ){ + *pp = pShmNode->apRegion[iRegion]; + }else{ + *pp = 0; + } + if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; + sqlite3_mutex_leave(pShmNode->mutex); + return rc; +} + +/* +** Change the lock state for a shared-memory segment. +** +** Note that the relationship between SHAREd and EXCLUSIVE locks is a little +** different here than in posix. In xShmLock(), one can go from unlocked +** to shared and back or from unlocked to exclusive and back. But one may +** not go from shared to exclusive or from exclusive to shared. +*/ +static int unixShmLock( + sqlite3_file *fd, /* Database file holding the shared memory */ + int ofst, /* First lock to acquire or release */ + int n, /* Number of locks to acquire or release */ + int flags /* What to do with the lock */ +){ + unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ + unixShm *p = pDbFd->pShm; /* The shared memory being locked */ + unixShm *pX; /* For looping over all siblings */ + unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ + int rc = SQLITE_OK; /* Result code */ + u16 mask; /* Mask of locks to take or release */ + + assert( pShmNode==pDbFd->pInode->pShmNode ); + assert( pShmNode->pInode==pDbFd->pInode ); + assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); + assert( n>=1 ); + assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); + assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); + assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); + assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); + + mask = (1<<(ofst+n)) - (1<<ofst); + assert( n>1 || mask==(1<<ofst) ); + sqlite3_mutex_enter(pShmNode->mutex); + if( flags & SQLITE_SHM_UNLOCK ){ + u16 allMask = 0; /* Mask of locks held by siblings */ + + /* See if any siblings hold this same lock */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( pX==p ) continue; + assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); + allMask |= pX->sharedMask; + } + + /* Unlock the system-level locks */ + if( (mask & allMask)==0 ){ + rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n); + }else{ + rc = SQLITE_OK; + } + + /* Undo the local locks */ + if( rc==SQLITE_OK ){ + p->exclMask &= ~mask; + p->sharedMask &= ~mask; + } + }else if( flags & SQLITE_SHM_SHARED ){ + u16 allShared = 0; /* Union of locks held by connections other than "p" */ + + /* Find out which shared locks are already held by sibling connections. + ** If any sibling already holds an exclusive lock, go ahead and return + ** SQLITE_BUSY. + */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( (pX->exclMask & mask)!=0 ){ + rc = SQLITE_BUSY; + break; + } + allShared |= pX->sharedMask; + } + + /* Get shared locks at the system level, if necessary */ + if( rc==SQLITE_OK ){ + if( (allShared & mask)==0 ){ + rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); + }else{ + rc = SQLITE_OK; + } + } + + /* Get the local shared locks */ + if( rc==SQLITE_OK ){ + p->sharedMask |= mask; + } + }else{ + /* Make sure no sibling connections hold locks that will block this + ** lock. If any do, return SQLITE_BUSY right away. + */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){ + rc = SQLITE_BUSY; + break; + } + } + + /* Get the exclusive locks at the system level. Then if successful + ** also mark the local connection as being locked. + */ + if( rc==SQLITE_OK ){ + rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n); + if( rc==SQLITE_OK ){ + assert( (p->sharedMask & mask)==0 ); + p->exclMask |= mask; + } + } + } + sqlite3_mutex_leave(pShmNode->mutex); + OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n", + p->id, osGetpid(0), p->sharedMask, p->exclMask)); + return rc; +} + +/* +** Implement a memory barrier or memory fence on shared memory. +** +** All loads and stores begun before the barrier must complete before +** any load or store begun after the barrier. +*/ +static void unixShmBarrier( + sqlite3_file *fd /* Database file holding the shared memory */ +){ + UNUSED_PARAMETER(fd); + sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ + unixEnterMutex(); /* Also mutex, for redundancy */ + unixLeaveMutex(); +} + +/* +** Close a connection to shared-memory. Delete the underlying +** storage if deleteFlag is true. +** +** If there is no shared memory associated with the connection then this +** routine is a harmless no-op. +*/ +static int unixShmUnmap( + sqlite3_file *fd, /* The underlying database file */ + int deleteFlag /* Delete shared-memory if true */ +){ + unixShm *p; /* The connection to be closed */ + unixShmNode *pShmNode; /* The underlying shared-memory file */ + unixShm **pp; /* For looping over sibling connections */ + unixFile *pDbFd; /* The underlying database file */ + + pDbFd = (unixFile*)fd; + p = pDbFd->pShm; + if( p==0 ) return SQLITE_OK; + pShmNode = p->pShmNode; + + assert( pShmNode==pDbFd->pInode->pShmNode ); + assert( pShmNode->pInode==pDbFd->pInode ); + + /* Remove connection p from the set of connections associated + ** with pShmNode */ + sqlite3_mutex_enter(pShmNode->mutex); + for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} + *pp = p->pNext; + + /* Free the connection p */ + sqlite3_free(p); + pDbFd->pShm = 0; + sqlite3_mutex_leave(pShmNode->mutex); + + /* If pShmNode->nRef has reached 0, then close the underlying + ** shared-memory file, too */ + unixEnterMutex(); + assert( pShmNode->nRef>0 ); + pShmNode->nRef--; + if( pShmNode->nRef==0 ){ + if( deleteFlag && pShmNode->h>=0 ){ + osUnlink(pShmNode->zFilename); + } + unixShmPurge(pDbFd); + } + unixLeaveMutex(); + + return SQLITE_OK; +} + + +#else +# define unixShmMap 0 +# define unixShmLock 0 +# define unixShmBarrier 0 +# define unixShmUnmap 0 +#endif /* #ifndef SQLITE_OMIT_WAL */ + +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** If it is currently memory mapped, unmap file pFd. +*/ +static void unixUnmapfile(unixFile *pFd){ + assert( pFd->nFetchOut==0 ); + if( pFd->pMapRegion ){ + osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); + pFd->pMapRegion = 0; + pFd->mmapSize = 0; + pFd->mmapSizeActual = 0; + } +} + +/* +** Attempt to set the size of the memory mapping maintained by file +** descriptor pFd to nNew bytes. Any existing mapping is discarded. +** +** If successful, this function sets the following variables: +** +** unixFile.pMapRegion +** unixFile.mmapSize +** unixFile.mmapSizeActual +** +** If unsuccessful, an error message is logged via sqlite3_log() and +** the three variables above are zeroed. In this case SQLite should +** continue accessing the database using the xRead() and xWrite() +** methods. +*/ +static void unixRemapfile( + unixFile *pFd, /* File descriptor object */ + i64 nNew /* Required mapping size */ +){ + const char *zErr = "mmap"; + int h = pFd->h; /* File descriptor open on db file */ + u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ + i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ + u8 *pNew = 0; /* Location of new mapping */ + int flags = PROT_READ; /* Flags to pass to mmap() */ + + assert( pFd->nFetchOut==0 ); + assert( nNew>pFd->mmapSize ); + assert( nNew<=pFd->mmapSizeMax ); + assert( nNew>0 ); + assert( pFd->mmapSizeActual>=pFd->mmapSize ); + assert( MAP_FAILED!=0 ); + + if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; + + if( pOrig ){ +#if HAVE_MREMAP + i64 nReuse = pFd->mmapSize; +#else + const int szSyspage = osGetpagesize(); + i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); +#endif + u8 *pReq = &pOrig[nReuse]; + + /* Unmap any pages of the existing mapping that cannot be reused. */ + if( nReuse!=nOrig ){ + osMunmap(pReq, nOrig-nReuse); + } + +#if HAVE_MREMAP + pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); + zErr = "mremap"; +#else + pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); + if( pNew!=MAP_FAILED ){ + if( pNew!=pReq ){ + osMunmap(pNew, nNew - nReuse); + pNew = 0; + }else{ + pNew = pOrig; + } + } +#endif + + /* The attempt to extend the existing mapping failed. Free it. */ + if( pNew==MAP_FAILED || pNew==0 ){ + osMunmap(pOrig, nReuse); + } + } + + /* If pNew is still NULL, try to create an entirely new mapping. */ + if( pNew==0 ){ + pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); + } + + if( pNew==MAP_FAILED ){ + pNew = 0; + nNew = 0; + unixLogError(SQLITE_OK, zErr, pFd->zPath); + + /* If the mmap() above failed, assume that all subsequent mmap() calls + ** will probably fail too. Fall back to using xRead/xWrite exclusively + ** in this case. */ + pFd->mmapSizeMax = 0; + } + pFd->pMapRegion = (void *)pNew; + pFd->mmapSize = pFd->mmapSizeActual = nNew; +} + +/* +** Memory map or remap the file opened by file-descriptor pFd (if the file +** is already mapped, the existing mapping is replaced by the new). Or, if +** there already exists a mapping for this file, and there are still +** outstanding xFetch() references to it, this function is a no-op. +** +** If parameter nByte is non-negative, then it is the requested size of +** the mapping to create. Otherwise, if nByte is less than zero, then the +** requested size is the size of the file on disk. The actual size of the +** created mapping is either the requested size or the value configured +** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. +** +** SQLITE_OK is returned if no error occurs (even if the mapping is not +** recreated as a result of outstanding references) or an SQLite error +** code otherwise. +*/ +static int unixMapfile(unixFile *pFd, i64 nByte){ + i64 nMap = nByte; + int rc; + + assert( nMap>=0 || pFd->nFetchOut==0 ); + if( pFd->nFetchOut>0 ) return SQLITE_OK; + + if( nMap<0 ){ + struct stat statbuf; /* Low-level file information */ + rc = osFstat(pFd->h, &statbuf); + if( rc!=SQLITE_OK ){ + return SQLITE_IOERR_FSTAT; + } + nMap = statbuf.st_size; + } + if( nMap>pFd->mmapSizeMax ){ + nMap = pFd->mmapSizeMax; + } + + if( nMap!=pFd->mmapSize ){ + if( nMap>0 ){ + unixRemapfile(pFd, nMap); + }else{ + unixUnmapfile(pFd); + } + } + + return SQLITE_OK; +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +/* +** If possible, return a pointer to a mapping of file fd starting at offset +** iOff. The mapping must be valid for at least nAmt bytes. +** +** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. +** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. +** Finally, if an error does occur, return an SQLite error code. The final +** value of *pp is undefined in this case. +** +** If this function does return a pointer, the caller must eventually +** release the reference by calling unixUnfetch(). +*/ +static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ +#if SQLITE_MAX_MMAP_SIZE>0 + unixFile *pFd = (unixFile *)fd; /* The underlying database file */ +#endif + *pp = 0; + +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFd->mmapSizeMax>0 ){ + if( pFd->pMapRegion==0 ){ + int rc = unixMapfile(pFd, -1); + if( rc!=SQLITE_OK ) return rc; + } + if( pFd->mmapSize >= iOff+nAmt ){ + *pp = &((u8 *)pFd->pMapRegion)[iOff]; + pFd->nFetchOut++; + } + } +#endif + return SQLITE_OK; +} + +/* +** If the third argument is non-NULL, then this function releases a +** reference obtained by an earlier call to unixFetch(). The second +** argument passed to this function must be the same as the corresponding +** argument that was passed to the unixFetch() invocation. +** +** Or, if the third argument is NULL, then this function is being called +** to inform the VFS layer that, according to POSIX, any existing mapping +** may now be invalid and should be unmapped. +*/ +static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ +#if SQLITE_MAX_MMAP_SIZE>0 + unixFile *pFd = (unixFile *)fd; /* The underlying database file */ + UNUSED_PARAMETER(iOff); + + /* If p==0 (unmap the entire file) then there must be no outstanding + ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), + ** then there must be at least one outstanding. */ + assert( (p==0)==(pFd->nFetchOut==0) ); + + /* If p!=0, it must match the iOff value. */ + assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); + + if( p ){ + pFd->nFetchOut--; + }else{ + unixUnmapfile(pFd); + } + + assert( pFd->nFetchOut>=0 ); +#else + UNUSED_PARAMETER(fd); + UNUSED_PARAMETER(p); + UNUSED_PARAMETER(iOff); +#endif + return SQLITE_OK; +} + +/* +** Here ends the implementation of all sqlite3_file methods. +** +********************** End sqlite3_file Methods ******************************* +******************************************************************************/ + +/* +** This division contains definitions of sqlite3_io_methods objects that +** implement various file locking strategies. It also contains definitions +** of "finder" functions. A finder-function is used to locate the appropriate +** sqlite3_io_methods object for a particular database file. The pAppData +** field of the sqlite3_vfs VFS objects are initialized to be pointers to +** the correct finder-function for that VFS. +** +** Most finder functions return a pointer to a fixed sqlite3_io_methods +** object. The only interesting finder-function is autolockIoFinder, which +** looks at the filesystem type and tries to guess the best locking +** strategy from that. +** +** For finder-function F, two objects are created: +** +** (1) The real finder-function named "FImpt()". +** +** (2) A constant pointer to this function named just "F". +** +** +** A pointer to the F pointer is used as the pAppData value for VFS +** objects. We have to do this instead of letting pAppData point +** directly at the finder-function since C90 rules prevent a void* +** from be cast into a function pointer. +** +** +** Each instance of this macro generates two objects: +** +** * A constant sqlite3_io_methods object call METHOD that has locking +** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. +** +** * An I/O method finder function called FINDER that returns a pointer +** to the METHOD object in the previous bullet. +*/ +#define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \ +static const sqlite3_io_methods METHOD = { \ + VERSION, /* iVersion */ \ + CLOSE, /* xClose */ \ + unixRead, /* xRead */ \ + unixWrite, /* xWrite */ \ + unixTruncate, /* xTruncate */ \ + unixSync, /* xSync */ \ + unixFileSize, /* xFileSize */ \ + LOCK, /* xLock */ \ + UNLOCK, /* xUnlock */ \ + CKLOCK, /* xCheckReservedLock */ \ + unixFileControl, /* xFileControl */ \ + unixSectorSize, /* xSectorSize */ \ + unixDeviceCharacteristics, /* xDeviceCapabilities */ \ + SHMMAP, /* xShmMap */ \ + unixShmLock, /* xShmLock */ \ + unixShmBarrier, /* xShmBarrier */ \ + unixShmUnmap, /* xShmUnmap */ \ + unixFetch, /* xFetch */ \ + unixUnfetch, /* xUnfetch */ \ +}; \ +static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ + UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ + return &METHOD; \ +} \ +static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ + = FINDER##Impl; + +/* +** Here are all of the sqlite3_io_methods objects for each of the +** locking strategies. Functions that return pointers to these methods +** are also created. +*/ +IOMETHODS( + posixIoFinder, /* Finder function name */ + posixIoMethods, /* sqlite3_io_methods object name */ + 3, /* shared memory and mmap are enabled */ + unixClose, /* xClose method */ + unixLock, /* xLock method */ + unixUnlock, /* xUnlock method */ + unixCheckReservedLock, /* xCheckReservedLock method */ + unixShmMap /* xShmMap method */ +) +IOMETHODS( + nolockIoFinder, /* Finder function name */ + nolockIoMethods, /* sqlite3_io_methods object name */ + 3, /* shared memory is disabled */ + nolockClose, /* xClose method */ + nolockLock, /* xLock method */ + nolockUnlock, /* xUnlock method */ + nolockCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +IOMETHODS( + dotlockIoFinder, /* Finder function name */ + dotlockIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + dotlockClose, /* xClose method */ + dotlockLock, /* xLock method */ + dotlockUnlock, /* xUnlock method */ + dotlockCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) + +#if SQLITE_ENABLE_LOCKING_STYLE +IOMETHODS( + flockIoFinder, /* Finder function name */ + flockIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + flockClose, /* xClose method */ + flockLock, /* xLock method */ + flockUnlock, /* xUnlock method */ + flockCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +#endif + +#if OS_VXWORKS +IOMETHODS( + semIoFinder, /* Finder function name */ + semIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + semXClose, /* xClose method */ + semXLock, /* xLock method */ + semXUnlock, /* xUnlock method */ + semXCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +#endif + +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +IOMETHODS( + afpIoFinder, /* Finder function name */ + afpIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + afpClose, /* xClose method */ + afpLock, /* xLock method */ + afpUnlock, /* xUnlock method */ + afpCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +#endif + +/* +** The proxy locking method is a "super-method" in the sense that it +** opens secondary file descriptors for the conch and lock files and +** it uses proxy, dot-file, AFP, and flock() locking methods on those +** secondary files. For this reason, the division that implements +** proxy locking is located much further down in the file. But we need +** to go ahead and define the sqlite3_io_methods and finder function +** for proxy locking here. So we forward declare the I/O methods. +*/ +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +static int proxyClose(sqlite3_file*); +static int proxyLock(sqlite3_file*, int); +static int proxyUnlock(sqlite3_file*, int); +static int proxyCheckReservedLock(sqlite3_file*, int*); +IOMETHODS( + proxyIoFinder, /* Finder function name */ + proxyIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + proxyClose, /* xClose method */ + proxyLock, /* xLock method */ + proxyUnlock, /* xUnlock method */ + proxyCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +#endif + +/* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +IOMETHODS( + nfsIoFinder, /* Finder function name */ + nfsIoMethods, /* sqlite3_io_methods object name */ + 1, /* shared memory is disabled */ + unixClose, /* xClose method */ + unixLock, /* xLock method */ + nfsUnlock, /* xUnlock method */ + unixCheckReservedLock, /* xCheckReservedLock method */ + 0 /* xShmMap method */ +) +#endif + +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +/* +** This "finder" function attempts to determine the best locking strategy +** for the database file "filePath". It then returns the sqlite3_io_methods +** object that implements that strategy. +** +** This is for MacOSX only. +*/ +static const sqlite3_io_methods *autolockIoFinderImpl( + const char *filePath, /* name of the database file */ + unixFile *pNew /* open file object for the database file */ +){ + static const struct Mapping { + const char *zFilesystem; /* Filesystem type name */ + const sqlite3_io_methods *pMethods; /* Appropriate locking method */ + } aMap[] = { + { "hfs", &posixIoMethods }, + { "ufs", &posixIoMethods }, + { "afpfs", &afpIoMethods }, + { "smbfs", &afpIoMethods }, + { "webdav", &nolockIoMethods }, + { 0, 0 } + }; + int i; + struct statfs fsInfo; + struct flock lockInfo; + + if( !filePath ){ + /* If filePath==NULL that means we are dealing with a transient file + ** that does not need to be locked. */ + return &nolockIoMethods; + } + if( statfs(filePath, &fsInfo) != -1 ){ + if( fsInfo.f_flags & MNT_RDONLY ){ + return &nolockIoMethods; + } + for(i=0; aMap[i].zFilesystem; i++){ + if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ + return aMap[i].pMethods; + } + } + } + + /* Default case. Handles, amongst others, "nfs". + ** Test byte-range lock using fcntl(). If the call succeeds, + ** assume that the file-system supports POSIX style locks. + */ + lockInfo.l_len = 1; + lockInfo.l_start = 0; + lockInfo.l_whence = SEEK_SET; + lockInfo.l_type = F_RDLCK; + if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { + if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){ + return &nfsIoMethods; + } else { + return &posixIoMethods; + } + }else{ + return &dotlockIoMethods; + } +} +static const sqlite3_io_methods + *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; + +#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ + +#if OS_VXWORKS +/* +** This "finder" function for VxWorks checks to see if posix advisory +** locking works. If it does, then that is what is used. If it does not +** work, then fallback to named semaphore locking. +*/ +static const sqlite3_io_methods *vxworksIoFinderImpl( + const char *filePath, /* name of the database file */ + unixFile *pNew /* the open file object */ +){ + struct flock lockInfo; + + if( !filePath ){ + /* If filePath==NULL that means we are dealing with a transient file + ** that does not need to be locked. */ + return &nolockIoMethods; + } + + /* Test if fcntl() is supported and use POSIX style locks. + ** Otherwise fall back to the named semaphore method. + */ + lockInfo.l_len = 1; + lockInfo.l_start = 0; + lockInfo.l_whence = SEEK_SET; + lockInfo.l_type = F_RDLCK; + if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { + return &posixIoMethods; + }else{ + return &semIoMethods; + } +} +static const sqlite3_io_methods + *(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl; + +#endif /* OS_VXWORKS */ + +/* +** An abstract type for a pointer to an IO method finder function: +*/ +typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); + + +/**************************************************************************** +**************************** sqlite3_vfs methods **************************** +** +** This division contains the implementation of methods on the +** sqlite3_vfs object. +*/ + +/* +** Initialize the contents of the unixFile structure pointed to by pId. +*/ +static int fillInUnixFile( + sqlite3_vfs *pVfs, /* Pointer to vfs object */ + int h, /* Open file descriptor of file being opened */ + sqlite3_file *pId, /* Write to the unixFile structure here */ + const char *zFilename, /* Name of the file being opened */ + int ctrlFlags /* Zero or more UNIXFILE_* values */ +){ + const sqlite3_io_methods *pLockingStyle; + unixFile *pNew = (unixFile *)pId; + int rc = SQLITE_OK; + + assert( pNew->pInode==NULL ); + + /* Usually the path zFilename should not be a relative pathname. The + ** exception is when opening the proxy "conch" file in builds that + ** include the special Apple locking styles. + */ +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE + assert( zFilename==0 || zFilename[0]=='/' + || pVfs->pAppData==(void*)&autolockIoFinder ); +#else + assert( zFilename==0 || zFilename[0]=='/' ); +#endif + + /* No locking occurs in temporary files */ + assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); + + OSTRACE(("OPEN %-3d %s\n", h, zFilename)); + pNew->h = h; + pNew->pVfs = pVfs; + pNew->zPath = zFilename; + pNew->ctrlFlags = (u8)ctrlFlags; +#if SQLITE_MAX_MMAP_SIZE>0 + pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; +#endif + if( SQLITE_POWERSAFE_OVERWRITE ){ + pNew->ctrlFlags |= UNIXFILE_PSOW; + } + if( strcmp(pVfs->zName,"unix-excl")==0 ){ + pNew->ctrlFlags |= UNIXFILE_EXCL; + } + +#if OS_VXWORKS + pNew->pId = vxworksFindFileId(zFilename); + if( pNew->pId==0 ){ + ctrlFlags |= UNIXFILE_NOLOCK; + rc = SQLITE_NOMEM; + } +#endif + + if( ctrlFlags & UNIXFILE_NOLOCK ){ + pLockingStyle = &nolockIoMethods; + }else{ + pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); +#if SQLITE_ENABLE_LOCKING_STYLE + /* Cache zFilename in the locking context (AFP and dotlock override) for + ** proxyLock activation is possible (remote proxy is based on db name) + ** zFilename remains valid until file is closed, to support */ + pNew->lockingContext = (void*)zFilename; +#endif + } + + if( pLockingStyle == &posixIoMethods +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE + || pLockingStyle == &nfsIoMethods +#endif + ){ + unixEnterMutex(); + rc = findInodeInfo(pNew, &pNew->pInode); + if( rc!=SQLITE_OK ){ + /* If an error occurred in findInodeInfo(), close the file descriptor + ** immediately, before releasing the mutex. findInodeInfo() may fail + ** in two scenarios: + ** + ** (a) A call to fstat() failed. + ** (b) A malloc failed. + ** + ** Scenario (b) may only occur if the process is holding no other + ** file descriptors open on the same file. If there were other file + ** descriptors on this file, then no malloc would be required by + ** findInodeInfo(). If this is the case, it is quite safe to close + ** handle h - as it is guaranteed that no posix locks will be released + ** by doing so. + ** + ** If scenario (a) caused the error then things are not so safe. The + ** implicit assumption here is that if fstat() fails, things are in + ** such bad shape that dropping a lock or two doesn't matter much. + */ + robust_close(pNew, h, __LINE__); + h = -1; + } + unixLeaveMutex(); + } + +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) + else if( pLockingStyle == &afpIoMethods ){ + /* AFP locking uses the file path so it needs to be included in + ** the afpLockingContext. + */ + afpLockingContext *pCtx; + pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) ); + if( pCtx==0 ){ + rc = SQLITE_NOMEM; + }else{ + /* NB: zFilename exists and remains valid until the file is closed + ** according to requirement F11141. So we do not need to make a + ** copy of the filename. */ + pCtx->dbPath = zFilename; + pCtx->reserved = 0; + srandomdev(); + unixEnterMutex(); + rc = findInodeInfo(pNew, &pNew->pInode); + if( rc!=SQLITE_OK ){ + sqlite3_free(pNew->lockingContext); + robust_close(pNew, h, __LINE__); + h = -1; + } + unixLeaveMutex(); + } + } +#endif + + else if( pLockingStyle == &dotlockIoMethods ){ + /* Dotfile locking uses the file path so it needs to be included in + ** the dotlockLockingContext + */ + char *zLockFile; + int nFilename; + assert( zFilename!=0 ); + nFilename = (int)strlen(zFilename) + 6; + zLockFile = (char *)sqlite3_malloc64(nFilename); + if( zLockFile==0 ){ + rc = SQLITE_NOMEM; + }else{ + sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename); + } + pNew->lockingContext = zLockFile; + } + +#if OS_VXWORKS + else if( pLockingStyle == &semIoMethods ){ + /* Named semaphore locking uses the file path so it needs to be + ** included in the semLockingContext + */ + unixEnterMutex(); + rc = findInodeInfo(pNew, &pNew->pInode); + if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){ + char *zSemName = pNew->pInode->aSemName; + int n; + sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem", + pNew->pId->zCanonicalName); + for( n=1; zSemName[n]; n++ ) + if( zSemName[n]=='/' ) zSemName[n] = '_'; + pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1); + if( pNew->pInode->pSem == SEM_FAILED ){ + rc = SQLITE_NOMEM; + pNew->pInode->aSemName[0] = '\0'; + } + } + unixLeaveMutex(); + } +#endif + + storeLastErrno(pNew, 0); +#if OS_VXWORKS + if( rc!=SQLITE_OK ){ + if( h>=0 ) robust_close(pNew, h, __LINE__); + h = -1; + osUnlink(zFilename); + pNew->ctrlFlags |= UNIXFILE_DELETE; + } +#endif + if( rc!=SQLITE_OK ){ + if( h>=0 ) robust_close(pNew, h, __LINE__); + }else{ + pNew->pMethod = pLockingStyle; + OpenCounter(+1); + verifyDbFile(pNew); + } + return rc; +} + +/* +** Return the name of a directory in which to put temporary files. +** If no suitable temporary file directory can be found, return NULL. +*/ +static const char *unixTempFileDir(void){ + static const char *azDirs[] = { + 0, + 0, + 0, + "/var/tmp", + "/usr/tmp", + "/tmp", + 0 /* List terminator */ + }; + unsigned int i; + struct stat buf; + const char *zDir = 0; + + azDirs[0] = sqlite3_temp_directory; + if( !azDirs[1] ) azDirs[1] = getenv("SQLITE_TMPDIR"); + if( !azDirs[2] ) azDirs[2] = getenv("TMPDIR"); + for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){ + if( zDir==0 ) continue; + if( osStat(zDir, &buf) ) continue; + if( !S_ISDIR(buf.st_mode) ) continue; + if( osAccess(zDir, 07) ) continue; + break; + } + return zDir; +} + +/* +** Create a temporary file name in zBuf. zBuf must be allocated +** by the calling process and must be big enough to hold at least +** pVfs->mxPathname bytes. +*/ +static int unixGetTempname(int nBuf, char *zBuf){ + static const unsigned char zChars[] = + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789"; + unsigned int i, j; + const char *zDir; + + /* It's odd to simulate an io-error here, but really this is just + ** using the io-error infrastructure to test that SQLite handles this + ** function failing. + */ + SimulateIOError( return SQLITE_IOERR ); + + zDir = unixTempFileDir(); + if( zDir==0 ) zDir = "."; + + /* Check that the output buffer is large enough for the temporary file + ** name. If it is not, return SQLITE_ERROR. + */ + if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 18) >= (size_t)nBuf ){ + return SQLITE_ERROR; + } + + do{ + sqlite3_snprintf(nBuf-18, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir); + j = (int)strlen(zBuf); + sqlite3_randomness(15, &zBuf[j]); + for(i=0; i<15; i++, j++){ + zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ]; + } + zBuf[j] = 0; + zBuf[j+1] = 0; + }while( osAccess(zBuf,0)==0 ); + return SQLITE_OK; +} + +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) +/* +** Routine to transform a unixFile into a proxy-locking unixFile. +** Implementation in the proxy-lock division, but used by unixOpen() +** if SQLITE_PREFER_PROXY_LOCKING is defined. +*/ +static int proxyTransformUnixFile(unixFile*, const char*); +#endif + +/* +** Search for an unused file descriptor that was opened on the database +** file (not a journal or master-journal file) identified by pathname +** zPath with SQLITE_OPEN_XXX flags matching those passed as the second +** argument to this function. +** +** Such a file descriptor may exist if a database connection was closed +** but the associated file descriptor could not be closed because some +** other file descriptor open on the same file is holding a file-lock. +** Refer to comments in the unixClose() function and the lengthy comment +** describing "Posix Advisory Locking" at the start of this file for +** further details. Also, ticket #4018. +** +** If a suitable file descriptor is found, then it is returned. If no +** such file descriptor is located, -1 is returned. +*/ +static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ + UnixUnusedFd *pUnused = 0; + + /* Do not search for an unused file descriptor on vxworks. Not because + ** vxworks would not benefit from the change (it might, we're not sure), + ** but because no way to test it is currently available. It is better + ** not to risk breaking vxworks support for the sake of such an obscure + ** feature. */ +#if !OS_VXWORKS + struct stat sStat; /* Results of stat() call */ + + /* A stat() call may fail for various reasons. If this happens, it is + ** almost certain that an open() call on the same path will also fail. + ** For this reason, if an error occurs in the stat() call here, it is + ** ignored and -1 is returned. The caller will try to open a new file + ** descriptor on the same path, fail, and return an error to SQLite. + ** + ** Even if a subsequent open() call does succeed, the consequences of + ** not searching for a reusable file descriptor are not dire. */ + if( 0==osStat(zPath, &sStat) ){ + unixInodeInfo *pInode; + + unixEnterMutex(); + pInode = inodeList; + while( pInode && (pInode->fileId.dev!=sStat.st_dev + || pInode->fileId.ino!=sStat.st_ino) ){ + pInode = pInode->pNext; + } + if( pInode ){ + UnixUnusedFd **pp; + for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); + pUnused = *pp; + if( pUnused ){ + *pp = pUnused->pNext; + } + } + unixLeaveMutex(); + } +#endif /* if !OS_VXWORKS */ + return pUnused; +} + +/* +** This function is called by unixOpen() to determine the unix permissions +** to create new files with. If no error occurs, then SQLITE_OK is returned +** and a value suitable for passing as the third argument to open(2) is +** written to *pMode. If an IO error occurs, an SQLite error code is +** returned and the value of *pMode is not modified. +** +** In most cases, this routine sets *pMode to 0, which will become +** an indication to robust_open() to create the file using +** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. +** But if the file being opened is a WAL or regular journal file, then +** this function queries the file-system for the permissions on the +** corresponding database file and sets *pMode to this value. Whenever +** possible, WAL and journal files are created using the same permissions +** as the associated database file. +*/ +static int findCreateFileMode( + const char *zPath, /* Path of file (possibly) being created */ + int flags, /* Flags passed as 4th argument to xOpen() */ + mode_t *pMode, /* OUT: Permissions to open file with */ + uid_t *pUid, /* OUT: uid to set on the file */ + gid_t *pGid /* OUT: gid to set on the file */ +){ + int rc = SQLITE_OK; /* Return Code */ + *pMode = 0; + *pUid = 0; + *pGid = 0; + if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ + char zDb[MAX_PATHNAME+1]; /* Database file path */ + int nDb; /* Number of valid bytes in zDb */ + struct stat sStat; /* Output of stat() on database file */ + + /* zPath is a path to a WAL or journal file. The following block derives + ** the path to the associated database file from zPath. This block handles + ** the following naming conventions: + ** + ** "<path to db>-journal" + ** "<path to db>-wal" + ** "<path to db>-journalNN" + ** "<path to db>-walNN" + ** + ** where NN is a decimal number. The NN naming schemes are + ** used by the test_multiplex.c module. + */ + nDb = sqlite3Strlen30(zPath) - 1; + while( zPath[nDb]!='-' ){ + assert( nDb>0 ); + assert( zPath[nDb]!='\n' ); + nDb--; + } + memcpy(zDb, zPath, nDb); + zDb[nDb] = '\0'; + + if( 0==osStat(zDb, &sStat) ){ + *pMode = sStat.st_mode & 0777; + *pUid = sStat.st_uid; + *pGid = sStat.st_gid; + }else{ + rc = SQLITE_IOERR_FSTAT; + } + }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ + *pMode = 0600; + } + return rc; +} + +/* +** Open the file zPath. +** +** Previously, the SQLite OS layer used three functions in place of this +** one: +** +** sqlite3OsOpenReadWrite(); +** sqlite3OsOpenReadOnly(); +** sqlite3OsOpenExclusive(); +** +** These calls correspond to the following combinations of flags: +** +** ReadWrite() -> (READWRITE | CREATE) +** ReadOnly() -> (READONLY) +** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) +** +** The old OpenExclusive() accepted a boolean argument - "delFlag". If +** true, the file was configured to be automatically deleted when the +** file handle closed. To achieve the same effect using this new +** interface, add the DELETEONCLOSE flag to those specified above for +** OpenExclusive(). +*/ +static int unixOpen( + sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */ + const char *zPath, /* Pathname of file to be opened */ + sqlite3_file *pFile, /* The file descriptor to be filled in */ + int flags, /* Input flags to control the opening */ + int *pOutFlags /* Output flags returned to SQLite core */ +){ + unixFile *p = (unixFile *)pFile; + int fd = -1; /* File descriptor returned by open() */ + int openFlags = 0; /* Flags to pass to open() */ + int eType = flags&0xFFFFFF00; /* Type of file to open */ + int noLock; /* True to omit locking primitives */ + int rc = SQLITE_OK; /* Function Return Code */ + int ctrlFlags = 0; /* UNIXFILE_* flags */ + + int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); + int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); + int isCreate = (flags & SQLITE_OPEN_CREATE); + int isReadonly = (flags & SQLITE_OPEN_READONLY); + int isReadWrite = (flags & SQLITE_OPEN_READWRITE); +#if SQLITE_ENABLE_LOCKING_STYLE + int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); +#endif +#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE + struct statfs fsInfo; +#endif + + /* If creating a master or main-file journal, this function will open + ** a file-descriptor on the directory too. The first time unixSync() + ** is called the directory file descriptor will be fsync()ed and close()d. + */ + int syncDir = (isCreate && ( + eType==SQLITE_OPEN_MASTER_JOURNAL + || eType==SQLITE_OPEN_MAIN_JOURNAL + || eType==SQLITE_OPEN_WAL + )); + + /* If argument zPath is a NULL pointer, this function is required to open + ** a temporary file. Use this buffer to store the file name in. + */ + char zTmpname[MAX_PATHNAME+2]; + const char *zName = zPath; + + /* Check the following statements are true: + ** + ** (a) Exactly one of the READWRITE and READONLY flags must be set, and + ** (b) if CREATE is set, then READWRITE must also be set, and + ** (c) if EXCLUSIVE is set, then CREATE must also be set. + ** (d) if DELETEONCLOSE is set, then CREATE must also be set. + */ + assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); + assert(isCreate==0 || isReadWrite); + assert(isExclusive==0 || isCreate); + assert(isDelete==0 || isCreate); + + /* The main DB, main journal, WAL file and master journal are never + ** automatically deleted. Nor are they ever temporary files. */ + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); + + /* Assert that the upper layer has set one of the "file-type" flags. */ + assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB + || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL + || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL + || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL + ); + + /* Detect a pid change and reset the PRNG. There is a race condition + ** here such that two or more threads all trying to open databases at + ** the same instant might all reset the PRNG. But multiple resets + ** are harmless. + */ + if( randomnessPid!=osGetpid(0) ){ + randomnessPid = osGetpid(0); + sqlite3_randomness(0,0); + } + + memset(p, 0, sizeof(unixFile)); + + if( eType==SQLITE_OPEN_MAIN_DB ){ + UnixUnusedFd *pUnused; + pUnused = findReusableFd(zName, flags); + if( pUnused ){ + fd = pUnused->fd; + }else{ + pUnused = sqlite3_malloc64(sizeof(*pUnused)); + if( !pUnused ){ + return SQLITE_NOMEM; + } + } + p->pUnused = pUnused; + + /* Database filenames are double-zero terminated if they are not + ** URIs with parameters. Hence, they can always be passed into + ** sqlite3_uri_parameter(). */ + assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); + + }else if( !zName ){ + /* If zName is NULL, the upper layer is requesting a temp file. */ + assert(isDelete && !syncDir); + rc = unixGetTempname(MAX_PATHNAME+2, zTmpname); + if( rc!=SQLITE_OK ){ + return rc; + } + zName = zTmpname; + + /* Generated temporary filenames are always double-zero terminated + ** for use by sqlite3_uri_parameter(). */ + assert( zName[strlen(zName)+1]==0 ); + } + + /* Determine the value of the flags parameter passed to POSIX function + ** open(). These must be calculated even if open() is not called, as + ** they may be stored as part of the file handle and used by the + ** 'conch file' locking functions later on. */ + if( isReadonly ) openFlags |= O_RDONLY; + if( isReadWrite ) openFlags |= O_RDWR; + if( isCreate ) openFlags |= O_CREAT; + if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); + openFlags |= (O_LARGEFILE|O_BINARY); + + if( fd<0 ){ + mode_t openMode; /* Permissions to create file with */ + uid_t uid; /* Userid for the file */ + gid_t gid; /* Groupid for the file */ + rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); + if( rc!=SQLITE_OK ){ + assert( !p->pUnused ); + assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); + return rc; + } + fd = robust_open(zName, openFlags, openMode); + OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags)); + if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){ + /* Failed to open the file for read/write access. Try read-only. */ + flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); + openFlags &= ~(O_RDWR|O_CREAT); + flags |= SQLITE_OPEN_READONLY; + openFlags |= O_RDONLY; + isReadonly = 1; + fd = robust_open(zName, openFlags, openMode); + } + if( fd<0 ){ + rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); + goto open_finished; + } + + /* If this process is running as root and if creating a new rollback + ** journal or WAL file, set the ownership of the journal or WAL to be + ** the same as the original database. + */ + if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ + osFchown(fd, uid, gid); + } + } + assert( fd>=0 ); + if( pOutFlags ){ + *pOutFlags = flags; + } + + if( p->pUnused ){ + p->pUnused->fd = fd; + p->pUnused->flags = flags; + } + + if( isDelete ){ +#if OS_VXWORKS + zPath = zName; +#elif defined(SQLITE_UNLINK_AFTER_CLOSE) + zPath = sqlite3_mprintf("%s", zName); + if( zPath==0 ){ + robust_close(p, fd, __LINE__); + return SQLITE_NOMEM; + } +#else + osUnlink(zName); +#endif + } +#if SQLITE_ENABLE_LOCKING_STYLE + else{ + p->openFlags = openFlags; + } +#endif + + noLock = eType!=SQLITE_OPEN_MAIN_DB; + + +#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE + if( fstatfs(fd, &fsInfo) == -1 ){ + storeLastErrno(p, errno); + robust_close(p, fd, __LINE__); + return SQLITE_IOERR_ACCESS; + } + if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) { + ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; + } + if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) { + ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; + } +#endif + + /* Set up appropriate ctrlFlags */ + if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; + if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; + if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; + if( syncDir ) ctrlFlags |= UNIXFILE_DIRSYNC; + if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; + +#if SQLITE_ENABLE_LOCKING_STYLE +#if SQLITE_PREFER_PROXY_LOCKING + isAutoProxy = 1; +#endif + if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){ + char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING"); + int useProxy = 0; + + /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means + ** never use proxy, NULL means use proxy for non-local files only. */ + if( envforce!=NULL ){ + useProxy = atoi(envforce)>0; + }else{ + useProxy = !(fsInfo.f_flags&MNT_LOCAL); + } + if( useProxy ){ + rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); + if( rc==SQLITE_OK ){ + rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); + if( rc!=SQLITE_OK ){ + /* Use unixClose to clean up the resources added in fillInUnixFile + ** and clear all the structure's references. Specifically, + ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op + */ + unixClose(pFile); + return rc; + } + } + goto open_finished; + } + } +#endif + + rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); + +open_finished: + if( rc!=SQLITE_OK ){ + sqlite3_free(p->pUnused); + } + return rc; +} + + +/* +** Delete the file at zPath. If the dirSync argument is true, fsync() +** the directory after deleting the file. +*/ +static int unixDelete( + sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */ + const char *zPath, /* Name of file to be deleted */ + int dirSync /* If true, fsync() directory after deleting file */ +){ + int rc = SQLITE_OK; + UNUSED_PARAMETER(NotUsed); + SimulateIOError(return SQLITE_IOERR_DELETE); + if( osUnlink(zPath)==(-1) ){ + if( errno==ENOENT +#if OS_VXWORKS + || osAccess(zPath,0)!=0 +#endif + ){ + rc = SQLITE_IOERR_DELETE_NOENT; + }else{ + rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); + } + return rc; + } +#ifndef SQLITE_DISABLE_DIRSYNC + if( (dirSync & 1)!=0 ){ + int fd; + rc = osOpenDirectory(zPath, &fd); + if( rc==SQLITE_OK ){ +#if OS_VXWORKS + if( fsync(fd)==-1 ) +#else + if( fsync(fd) ) +#endif + { + rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); + } + robust_close(0, fd, __LINE__); + }else if( rc==SQLITE_CANTOPEN ){ + rc = SQLITE_OK; + } + } +#endif + return rc; +} + +/* +** Test the existence of or access permissions of file zPath. The +** test performed depends on the value of flags: +** +** SQLITE_ACCESS_EXISTS: Return 1 if the file exists +** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. +** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. +** +** Otherwise return 0. +*/ +static int unixAccess( + sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */ + const char *zPath, /* Path of the file to examine */ + int flags, /* What do we want to learn about the zPath file? */ + int *pResOut /* Write result boolean here */ +){ + int amode = 0; + UNUSED_PARAMETER(NotUsed); + SimulateIOError( return SQLITE_IOERR_ACCESS; ); + switch( flags ){ + case SQLITE_ACCESS_EXISTS: + amode = F_OK; + break; + case SQLITE_ACCESS_READWRITE: + amode = W_OK|R_OK; + break; + case SQLITE_ACCESS_READ: + amode = R_OK; + break; + + default: + assert(!"Invalid flags argument"); + } + *pResOut = (osAccess(zPath, amode)==0); + if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){ + struct stat buf; + if( 0==osStat(zPath, &buf) && buf.st_size==0 ){ + *pResOut = 0; + } + } + return SQLITE_OK; +} + + +/* +** Turn a relative pathname into a full pathname. The relative path +** is stored as a nul-terminated string in the buffer pointed to by +** zPath. +** +** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes +** (in this case, MAX_PATHNAME bytes). The full-path is written to +** this buffer before returning. +*/ +static int unixFullPathname( + sqlite3_vfs *pVfs, /* Pointer to vfs object */ + const char *zPath, /* Possibly relative input path */ + int nOut, /* Size of output buffer in bytes */ + char *zOut /* Output buffer */ +){ + + /* It's odd to simulate an io-error here, but really this is just + ** using the io-error infrastructure to test that SQLite handles this + ** function failing. This function could fail if, for example, the + ** current working directory has been unlinked. + */ + SimulateIOError( return SQLITE_ERROR ); + + assert( pVfs->mxPathname==MAX_PATHNAME ); + UNUSED_PARAMETER(pVfs); + + zOut[nOut-1] = '\0'; + if( zPath[0]=='/' ){ + sqlite3_snprintf(nOut, zOut, "%s", zPath); + }else{ + int nCwd; + if( osGetcwd(zOut, nOut-1)==0 ){ + return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath); + } + nCwd = (int)strlen(zOut); + sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath); + } + return SQLITE_OK; +} + + #define unixDlOpen 0 + #define unixDlError 0 + #define unixDlSym 0 + #define unixDlClose 0 + +/* +** Write nBuf bytes of random data to the supplied buffer zBuf. +*/ +static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ + UNUSED_PARAMETER(NotUsed); + assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int))); + + /* We have to initialize zBuf to prevent valgrind from reporting + ** errors. The reports issued by valgrind are incorrect - we would + ** prefer that the randomness be increased by making use of the + ** uninitialized space in zBuf - but valgrind errors tend to worry + ** some users. Rather than argue, it seems easier just to initialize + ** the whole array and silence valgrind, even if that means less randomness + ** in the random seed. + ** + ** When testing, initializing zBuf[] to zero is all we do. That means + ** that we always use the same random number sequence. This makes the + ** tests repeatable. + */ + memset(zBuf, 0, nBuf); + randomnessPid = osGetpid(0); +#if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) + { + int fd, got; + fd = robust_open("/dev/urandom", O_RDONLY, 0); + if( fd<0 ){ + time_t t; + time(&t); + memcpy(zBuf, &t, sizeof(t)); + memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid)); + assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf ); + nBuf = sizeof(t) + sizeof(randomnessPid); + }else{ + do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); + robust_close(0, fd, __LINE__); + } + } +#endif + return nBuf; +} + + +/* +** Sleep for a little while. Return the amount of time slept. +** The argument is the number of microseconds we want to sleep. +** The return value is the number of microseconds of sleep actually +** requested from the underlying operating system, a number which +** might be greater than or equal to the argument, but not less +** than the argument. +*/ +static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){ +#if OS_VXWORKS + struct timespec sp; + + sp.tv_sec = microseconds / 1000000; + sp.tv_nsec = (microseconds % 1000000) * 1000; + nanosleep(&sp, NULL); + UNUSED_PARAMETER(NotUsed); + return microseconds; +#elif defined(HAVE_USLEEP) && HAVE_USLEEP + usleep(microseconds); + UNUSED_PARAMETER(NotUsed); + return microseconds; +#else + int seconds = (microseconds+999999)/1000000; + sleep(seconds); + UNUSED_PARAMETER(NotUsed); + return seconds*1000000; +#endif +} + +/* +** The following variable, if set to a non-zero value, is interpreted as +** the number of seconds since 1970 and is used to set the result of +** sqlite3OsCurrentTime() during testing. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ +#endif + +/* +** Find the current time (in Universal Coordinated Time). Write into *piNow +** the current time and date as a Julian Day number times 86_400_000. In +** other words, write into *piNow the number of milliseconds since the Julian +** epoch of noon in Greenwich on November 24, 4714 B.C according to the +** proleptic Gregorian calendar. +** +** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date +** cannot be found. +*/ +static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ + static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; + int rc = SQLITE_OK; +#if defined(NO_GETTOD) + time_t t; + time(&t); + *piNow = ((sqlite3_int64)t)*1000 + unixEpoch; +#elif OS_VXWORKS + struct timespec sNow; + clock_gettime(CLOCK_REALTIME, &sNow); + *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; +#else + struct timeval sNow; + if( gettimeofday(&sNow, 0)==0 ){ + *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; + }else{ + rc = SQLITE_ERROR; + } +#endif + +#ifdef SQLITE_TEST + if( sqlite3_current_time ){ + *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; + } +#endif + UNUSED_PARAMETER(NotUsed); + return rc; +} + +/* +** Find the current time (in Universal Coordinated Time). Write the +** current time and date as a Julian Day number into *prNow and +** return 0. Return 1 if the time and date cannot be found. +*/ +static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ + sqlite3_int64 i = 0; + int rc; + UNUSED_PARAMETER(NotUsed); + rc = unixCurrentTimeInt64(0, &i); + *prNow = i/86400000.0; + return rc; +} + +/* +** We added the xGetLastError() method with the intention of providing +** better low-level error messages when operating-system problems come up +** during SQLite operation. But so far, none of that has been implemented +** in the core. So this routine is never called. For now, it is merely +** a place-holder. +*/ +static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ + UNUSED_PARAMETER(NotUsed); + UNUSED_PARAMETER(NotUsed2); + UNUSED_PARAMETER(NotUsed3); + return 0; +} + + +/* +************************ End of sqlite3_vfs methods *************************** +******************************************************************************/ + +/****************************************************************************** +************************** Begin Proxy Locking ******************************** +** +** Proxy locking is a "uber-locking-method" in this sense: It uses the +** other locking methods on secondary lock files. Proxy locking is a +** meta-layer over top of the primitive locking implemented above. For +** this reason, the division that implements of proxy locking is deferred +** until late in the file (here) after all of the other I/O methods have +** been defined - so that the primitive locking methods are available +** as services to help with the implementation of proxy locking. +** +**** +** +** The default locking schemes in SQLite use byte-range locks on the +** database file to coordinate safe, concurrent access by multiple readers +** and writers [http://sqlite.org/lockingv3.html]. The five file locking +** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented +** as POSIX read & write locks over fixed set of locations (via fsctl), +** on AFP and SMB only exclusive byte-range locks are available via fsctl +** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. +** To simulate a F_RDLCK on the shared range, on AFP a randomly selected +** address in the shared range is taken for a SHARED lock, the entire +** shared range is taken for an EXCLUSIVE lock): +** +** PENDING_BYTE 0x40000000 +** RESERVED_BYTE 0x40000001 +** SHARED_RANGE 0x40000002 -> 0x40000200 +** +** This works well on the local file system, but shows a nearly 100x +** slowdown in read performance on AFP because the AFP client disables +** the read cache when byte-range locks are present. Enabling the read +** cache exposes a cache coherency problem that is present on all OS X +** supported network file systems. NFS and AFP both observe the +** close-to-open semantics for ensuring cache coherency +** [http://nfs.sourceforge.net/#faq_a8], which does not effectively +** address the requirements for concurrent database access by multiple +** readers and writers +** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. +** +** To address the performance and cache coherency issues, proxy file locking +** changes the way database access is controlled by limiting access to a +** single host at a time and moving file locks off of the database file +** and onto a proxy file on the local file system. +** +** +** Using proxy locks +** ----------------- +** +** C APIs +** +** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE, +** <proxy_path> | ":auto:"); +** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE, +** &<proxy_path>); +** +** +** SQL pragmas +** +** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto: +** PRAGMA [database.]lock_proxy_file +** +** Specifying ":auto:" means that if there is a conch file with a matching +** host ID in it, the proxy path in the conch file will be used, otherwise +** a proxy path based on the user's temp dir +** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the +** actual proxy file name is generated from the name and path of the +** database file. For example: +** +** For database path "/Users/me/foo.db" +** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:") +** +** Once a lock proxy is configured for a database connection, it can not +** be removed, however it may be switched to a different proxy path via +** the above APIs (assuming the conch file is not being held by another +** connection or process). +** +** +** How proxy locking works +** ----------------------- +** +** Proxy file locking relies primarily on two new supporting files: +** +** * conch file to limit access to the database file to a single host +** at a time +** +** * proxy file to act as a proxy for the advisory locks normally +** taken on the database +** +** The conch file - to use a proxy file, sqlite must first "hold the conch" +** by taking an sqlite-style shared lock on the conch file, reading the +** contents and comparing the host's unique host ID (see below) and lock +** proxy path against the values stored in the conch. The conch file is +** stored in the same directory as the database file and the file name +** is patterned after the database file name as ".<databasename>-conch". +** If the conch file does not exist, or its contents do not match the +** host ID and/or proxy path, then the lock is escalated to an exclusive +** lock and the conch file contents is updated with the host ID and proxy +** path and the lock is downgraded to a shared lock again. If the conch +** is held by another process (with a shared lock), the exclusive lock +** will fail and SQLITE_BUSY is returned. +** +** The proxy file - a single-byte file used for all advisory file locks +** normally taken on the database file. This allows for safe sharing +** of the database file for multiple readers and writers on the same +** host (the conch ensures that they all use the same local lock file). +** +** Requesting the lock proxy does not immediately take the conch, it is +** only taken when the first request to lock database file is made. +** This matches the semantics of the traditional locking behavior, where +** opening a connection to a database file does not take a lock on it. +** The shared lock and an open file descriptor are maintained until +** the connection to the database is closed. +** +** The proxy file and the lock file are never deleted so they only need +** to be created the first time they are used. +** +** Configuration options +** --------------------- +** +** SQLITE_PREFER_PROXY_LOCKING +** +** Database files accessed on non-local file systems are +** automatically configured for proxy locking, lock files are +** named automatically using the same logic as +** PRAGMA lock_proxy_file=":auto:" +** +** SQLITE_PROXY_DEBUG +** +** Enables the logging of error messages during host id file +** retrieval and creation +** +** LOCKPROXYDIR +** +** Overrides the default directory used for lock proxy files that +** are named automatically via the ":auto:" setting +** +** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS +** +** Permissions to use when creating a directory for storing the +** lock proxy files, only used when LOCKPROXYDIR is not set. +** +** +** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, +** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will +** force proxy locking to be used for every database file opened, and 0 +** will force automatic proxy locking to be disabled for all database +** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or +** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). +*/ + +/* +** Proxy locking is only available on MacOSX +*/ +#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE + +/* +** The proxyLockingContext has the path and file structures for the remote +** and local proxy files in it +*/ +typedef struct proxyLockingContext proxyLockingContext; +struct proxyLockingContext { + unixFile *conchFile; /* Open conch file */ + char *conchFilePath; /* Name of the conch file */ + unixFile *lockProxy; /* Open proxy lock file */ + char *lockProxyPath; /* Name of the proxy lock file */ + char *dbPath; /* Name of the open file */ + int conchHeld; /* 1 if the conch is held, -1 if lockless */ + int nFails; /* Number of conch taking failures */ + void *oldLockingContext; /* Original lockingcontext to restore on close */ + sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ +}; + +/* +** The proxy lock file path for the database at dbPath is written into lPath, +** which must point to valid, writable memory large enough for a maxLen length +** file path. +*/ +static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){ + int len; + int dbLen; + int i; + +#ifdef LOCKPROXYDIR + len = strlcpy(lPath, LOCKPROXYDIR, maxLen); +#else +# ifdef _CS_DARWIN_USER_TEMP_DIR + { + if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){ + OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n", + lPath, errno, osGetpid(0))); + return SQLITE_IOERR_LOCK; + } + len = strlcat(lPath, "sqliteplocks", maxLen); + } +# else + len = strlcpy(lPath, "/tmp/", maxLen); +# endif +#endif + + if( lPath[len-1]!='/' ){ + len = strlcat(lPath, "/", maxLen); + } + + /* transform the db path to a unique cache name */ + dbLen = (int)strlen(dbPath); + for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){ + char c = dbPath[i]; + lPath[i+len] = (c=='/')?'_':c; + } + lPath[i+len]='\0'; + strlcat(lPath, ":auto:", maxLen); + OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, osGetpid(0))); + return SQLITE_OK; +} + +/* + ** Creates the lock file and any missing directories in lockPath + */ +static int proxyCreateLockPath(const char *lockPath){ + int i, len; + char buf[MAXPATHLEN]; + int start = 0; + + assert(lockPath!=NULL); + /* try to create all the intermediate directories */ + len = (int)strlen(lockPath); + buf[0] = lockPath[0]; + for( i=1; i<len; i++ ){ + if( lockPath[i] == '/' && (i - start > 0) ){ + /* only mkdir if leaf dir != "." or "/" or ".." */ + if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') + || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ + buf[i]='\0'; + if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ + int err=errno; + if( err!=EEXIST ) { + OSTRACE(("CREATELOCKPATH FAILED creating %s, " + "'%s' proxy lock path=%s pid=%d\n", + buf, strerror(err), lockPath, osGetpid(0))); + return err; + } + } + } + start=i+1; + } + buf[i] = lockPath[i]; + } + OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n", lockPath, osGetpid(0))); + return 0; +} + +/* +** Create a new VFS file descriptor (stored in memory obtained from +** sqlite3_malloc) and open the file named "path" in the file descriptor. +** +** The caller is responsible not only for closing the file descriptor +** but also for freeing the memory associated with the file descriptor. +*/ +static int proxyCreateUnixFile( + const char *path, /* path for the new unixFile */ + unixFile **ppFile, /* unixFile created and returned by ref */ + int islockfile /* if non zero missing dirs will be created */ +) { + int fd = -1; + unixFile *pNew; + int rc = SQLITE_OK; + int openFlags = O_RDWR | O_CREAT; + sqlite3_vfs dummyVfs; + int terrno = 0; + UnixUnusedFd *pUnused = NULL; + + /* 1. first try to open/create the file + ** 2. if that fails, and this is a lock file (not-conch), try creating + ** the parent directories and then try again. + ** 3. if that fails, try to open the file read-only + ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file + */ + pUnused = findReusableFd(path, openFlags); + if( pUnused ){ + fd = pUnused->fd; + }else{ + pUnused = sqlite3_malloc64(sizeof(*pUnused)); + if( !pUnused ){ + return SQLITE_NOMEM; + } + } + if( fd<0 ){ + fd = robust_open(path, openFlags, 0); + terrno = errno; + if( fd<0 && errno==ENOENT && islockfile ){ + if( proxyCreateLockPath(path) == SQLITE_OK ){ + fd = robust_open(path, openFlags, 0); + } + } + } + if( fd<0 ){ + openFlags = O_RDONLY; + fd = robust_open(path, openFlags, 0); + terrno = errno; + } + if( fd<0 ){ + if( islockfile ){ + return SQLITE_BUSY; + } + switch (terrno) { + case EACCES: + return SQLITE_PERM; + case EIO: + return SQLITE_IOERR_LOCK; /* even though it is the conch */ + default: + return SQLITE_CANTOPEN_BKPT; + } + } + + pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew)); + if( pNew==NULL ){ + rc = SQLITE_NOMEM; + goto end_create_proxy; + } + memset(pNew, 0, sizeof(unixFile)); + pNew->openFlags = openFlags; + memset(&dummyVfs, 0, sizeof(dummyVfs)); + dummyVfs.pAppData = (void*)&autolockIoFinder; + dummyVfs.zName = "dummy"; + pUnused->fd = fd; + pUnused->flags = openFlags; + pNew->pUnused = pUnused; + + rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); + if( rc==SQLITE_OK ){ + *ppFile = pNew; + return SQLITE_OK; + } +end_create_proxy: + robust_close(pNew, fd, __LINE__); + sqlite3_free(pNew); + sqlite3_free(pUnused); + return rc; +} + +#ifdef SQLITE_TEST +/* simulate multiple hosts by creating unique hostid file paths */ +SQLITE_PRIVATE int sqlite3_hostid_num = 0; +#endif + +#define PROXY_HOSTIDLEN 16 /* conch file host id length */ + +#ifdef HAVE_GETHOSTUUID +/* Not always defined in the headers as it ought to be */ +extern int gethostuuid(uuid_t id, const struct timespec *wait); +#endif + +/* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN +** bytes of writable memory. +*/ +static int proxyGetHostID(unsigned char *pHostID, int *pError){ + assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); + memset(pHostID, 0, PROXY_HOSTIDLEN); +#ifdef HAVE_GETHOSTUUID + { + struct timespec timeout = {1, 0}; /* 1 sec timeout */ + if( gethostuuid(pHostID, &timeout) ){ + int err = errno; + if( pError ){ + *pError = err; + } + return SQLITE_IOERR; + } + } +#else + UNUSED_PARAMETER(pError); +#endif +#ifdef SQLITE_TEST + /* simulate multiple hosts by creating unique hostid file paths */ + if( sqlite3_hostid_num != 0){ + pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); + } +#endif + + return SQLITE_OK; +} + +/* The conch file contains the header, host id and lock file path + */ +#define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ +#define PROXY_HEADERLEN 1 /* conch file header length */ +#define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) +#define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) + +/* +** Takes an open conch file, copies the contents to a new path and then moves +** it back. The newly created file's file descriptor is assigned to the +** conch file structure and finally the original conch file descriptor is +** closed. Returns zero if successful. +*/ +static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + unixFile *conchFile = pCtx->conchFile; + char tPath[MAXPATHLEN]; + char buf[PROXY_MAXCONCHLEN]; + char *cPath = pCtx->conchFilePath; + size_t readLen = 0; + size_t pathLen = 0; + char errmsg[64] = ""; + int fd = -1; + int rc = -1; + UNUSED_PARAMETER(myHostID); + + /* create a new path by replace the trailing '-conch' with '-break' */ + pathLen = strlcpy(tPath, cPath, MAXPATHLEN); + if( pathLen>MAXPATHLEN || pathLen<6 || + (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){ + sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen); + goto end_breaklock; + } + /* read the conch content */ + readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); + if( readLen<PROXY_PATHINDEX ){ + sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen); + goto end_breaklock; + } + /* write it out to the temporary break file */ + fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 0); + if( fd<0 ){ + sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno); + goto end_breaklock; + } + if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){ + sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno); + goto end_breaklock; + } + if( rename(tPath, cPath) ){ + sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno); + goto end_breaklock; + } + rc = 0; + fprintf(stderr, "broke stale lock on %s\n", cPath); + robust_close(pFile, conchFile->h, __LINE__); + conchFile->h = fd; + conchFile->openFlags = O_RDWR | O_CREAT; + +end_breaklock: + if( rc ){ + if( fd>=0 ){ + osUnlink(tPath); + robust_close(pFile, fd, __LINE__); + } + fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); + } + return rc; +} + +/* Take the requested lock on the conch file and break a stale lock if the +** host id matches. +*/ +static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + unixFile *conchFile = pCtx->conchFile; + int rc = SQLITE_OK; + int nTries = 0; + struct timespec conchModTime; + + memset(&conchModTime, 0, sizeof(conchModTime)); + do { + rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); + nTries ++; + if( rc==SQLITE_BUSY ){ + /* If the lock failed (busy): + * 1st try: get the mod time of the conch, wait 0.5s and try again. + * 2nd try: fail if the mod time changed or host id is different, wait + * 10 sec and try again + * 3rd try: break the lock unless the mod time has changed. + */ + struct stat buf; + if( osFstat(conchFile->h, &buf) ){ + storeLastErrno(pFile, errno); + return SQLITE_IOERR_LOCK; + } + + if( nTries==1 ){ + conchModTime = buf.st_mtimespec; + usleep(500000); /* wait 0.5 sec and try the lock again*/ + continue; + } + + assert( nTries>1 ); + if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || + conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){ + return SQLITE_BUSY; + } + + if( nTries==2 ){ + char tBuf[PROXY_MAXCONCHLEN]; + int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0); + if( len<0 ){ + storeLastErrno(pFile, errno); + return SQLITE_IOERR_LOCK; + } + if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){ + /* don't break the lock if the host id doesn't match */ + if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){ + return SQLITE_BUSY; + } + }else{ + /* don't break the lock on short read or a version mismatch */ + return SQLITE_BUSY; + } + usleep(10000000); /* wait 10 sec and try the lock again */ + continue; + } + + assert( nTries==3 ); + if( 0==proxyBreakConchLock(pFile, myHostID) ){ + rc = SQLITE_OK; + if( lockType==EXCLUSIVE_LOCK ){ + rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK); + } + if( !rc ){ + rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); + } + } + } + } while( rc==SQLITE_BUSY && nTries<3 ); + + return rc; +} + +/* Takes the conch by taking a shared lock and read the contents conch, if +** lockPath is non-NULL, the host ID and lock file path must match. A NULL +** lockPath means that the lockPath in the conch file will be used if the +** host IDs match, or a new lock path will be generated automatically +** and written to the conch file. +*/ +static int proxyTakeConch(unixFile *pFile){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + + if( pCtx->conchHeld!=0 ){ + return SQLITE_OK; + }else{ + unixFile *conchFile = pCtx->conchFile; + uuid_t myHostID; + int pError = 0; + char readBuf[PROXY_MAXCONCHLEN]; + char lockPath[MAXPATHLEN]; + char *tempLockPath = NULL; + int rc = SQLITE_OK; + int createConch = 0; + int hostIdMatch = 0; + int readLen = 0; + int tryOldLockPath = 0; + int forceNewLockPath = 0; + + OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h, + (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), + osGetpid(0))); + + rc = proxyGetHostID(myHostID, &pError); + if( (rc&0xff)==SQLITE_IOERR ){ + storeLastErrno(pFile, pError); + goto end_takeconch; + } + rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); + if( rc!=SQLITE_OK ){ + goto end_takeconch; + } + /* read the existing conch file */ + readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN); + if( readLen<0 ){ + /* I/O error: lastErrno set by seekAndRead */ + storeLastErrno(pFile, conchFile->lastErrno); + rc = SQLITE_IOERR_READ; + goto end_takeconch; + }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || + readBuf[0]!=(char)PROXY_CONCHVERSION ){ + /* a short read or version format mismatch means we need to create a new + ** conch file. + */ + createConch = 1; + } + /* if the host id matches and the lock path already exists in the conch + ** we'll try to use the path there, if we can't open that path, we'll + ** retry with a new auto-generated path + */ + do { /* in case we need to try again for an :auto: named lock file */ + + if( !createConch && !forceNewLockPath ){ + hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, + PROXY_HOSTIDLEN); + /* if the conch has data compare the contents */ + if( !pCtx->lockProxyPath ){ + /* for auto-named local lock file, just check the host ID and we'll + ** use the local lock file path that's already in there + */ + if( hostIdMatch ){ + size_t pathLen = (readLen - PROXY_PATHINDEX); + + if( pathLen>=MAXPATHLEN ){ + pathLen=MAXPATHLEN-1; + } + memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen); + lockPath[pathLen] = 0; + tempLockPath = lockPath; + tryOldLockPath = 1; + /* create a copy of the lock path if the conch is taken */ + goto end_takeconch; + } + }else if( hostIdMatch + && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX], + readLen-PROXY_PATHINDEX) + ){ + /* conch host and lock path match */ + goto end_takeconch; + } + } + + /* if the conch isn't writable and doesn't match, we can't take it */ + if( (conchFile->openFlags&O_RDWR) == 0 ){ + rc = SQLITE_BUSY; + goto end_takeconch; + } + + /* either the conch didn't match or we need to create a new one */ + if( !pCtx->lockProxyPath ){ + proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN); + tempLockPath = lockPath; + /* create a copy of the lock path _only_ if the conch is taken */ + } + + /* update conch with host and path (this will fail if other process + ** has a shared lock already), if the host id matches, use the big + ** stick. + */ + futimes(conchFile->h, NULL); + if( hostIdMatch && !createConch ){ + if( conchFile->pInode && conchFile->pInode->nShared>1 ){ + /* We are trying for an exclusive lock but another thread in this + ** same process is still holding a shared lock. */ + rc = SQLITE_BUSY; + } else { + rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); + } + }else{ + rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); + } + if( rc==SQLITE_OK ){ + char writeBuffer[PROXY_MAXCONCHLEN]; + int writeSize = 0; + + writeBuffer[0] = (char)PROXY_CONCHVERSION; + memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); + if( pCtx->lockProxyPath!=NULL ){ + strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, + MAXPATHLEN); + }else{ + strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); + } + writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); + robust_ftruncate(conchFile->h, writeSize); + rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); + fsync(conchFile->h); + /* If we created a new conch file (not just updated the contents of a + ** valid conch file), try to match the permissions of the database + */ + if( rc==SQLITE_OK && createConch ){ + struct stat buf; + int err = osFstat(pFile->h, &buf); + if( err==0 ){ + mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | + S_IROTH|S_IWOTH); + /* try to match the database file R/W permissions, ignore failure */ +#ifndef SQLITE_PROXY_DEBUG + osFchmod(conchFile->h, cmode); +#else + do{ + rc = osFchmod(conchFile->h, cmode); + }while( rc==(-1) && errno==EINTR ); + if( rc!=0 ){ + int code = errno; + fprintf(stderr, "fchmod %o FAILED with %d %s\n", + cmode, code, strerror(code)); + } else { + fprintf(stderr, "fchmod %o SUCCEDED\n",cmode); + } + }else{ + int code = errno; + fprintf(stderr, "STAT FAILED[%d] with %d %s\n", + err, code, strerror(code)); +#endif + } + } + } + conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK); + + end_takeconch: + OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); + if( rc==SQLITE_OK && pFile->openFlags ){ + int fd; + if( pFile->h>=0 ){ + robust_close(pFile, pFile->h, __LINE__); + } + pFile->h = -1; + fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); + OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); + if( fd>=0 ){ + pFile->h = fd; + }else{ + rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called + during locking */ + } + } + if( rc==SQLITE_OK && !pCtx->lockProxy ){ + char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath; + rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1); + if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){ + /* we couldn't create the proxy lock file with the old lock file path + ** so try again via auto-naming + */ + forceNewLockPath = 1; + tryOldLockPath = 0; + continue; /* go back to the do {} while start point, try again */ + } + } + if( rc==SQLITE_OK ){ + /* Need to make a copy of path if we extracted the value + ** from the conch file or the path was allocated on the stack + */ + if( tempLockPath ){ + pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath); + if( !pCtx->lockProxyPath ){ + rc = SQLITE_NOMEM; + } + } + } + if( rc==SQLITE_OK ){ + pCtx->conchHeld = 1; + + if( pCtx->lockProxy->pMethod == &afpIoMethods ){ + afpLockingContext *afpCtx; + afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext; + afpCtx->dbPath = pCtx->lockProxyPath; + } + } else { + conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); + } + OSTRACE(("TAKECONCH %d %s\n", conchFile->h, + rc==SQLITE_OK?"ok":"failed")); + return rc; + } while (1); /* in case we need to retry the :auto: lock file - + ** we should never get here except via the 'continue' call. */ + } +} + +/* +** If pFile holds a lock on a conch file, then release that lock. +*/ +static int proxyReleaseConch(unixFile *pFile){ + int rc = SQLITE_OK; /* Subroutine return code */ + proxyLockingContext *pCtx; /* The locking context for the proxy lock */ + unixFile *conchFile; /* Name of the conch file */ + + pCtx = (proxyLockingContext *)pFile->lockingContext; + conchFile = pCtx->conchFile; + OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h, + (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), + osGetpid(0))); + if( pCtx->conchHeld>0 ){ + rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); + } + pCtx->conchHeld = 0; + OSTRACE(("RELEASECONCH %d %s\n", conchFile->h, + (rc==SQLITE_OK ? "ok" : "failed"))); + return rc; +} + +/* +** Given the name of a database file, compute the name of its conch file. +** Store the conch filename in memory obtained from sqlite3_malloc64(). +** Make *pConchPath point to the new name. Return SQLITE_OK on success +** or SQLITE_NOMEM if unable to obtain memory. +** +** The caller is responsible for ensuring that the allocated memory +** space is eventually freed. +** +** *pConchPath is set to NULL if a memory allocation error occurs. +*/ +static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ + int i; /* Loop counter */ + int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ + char *conchPath; /* buffer in which to construct conch name */ + + /* Allocate space for the conch filename and initialize the name to + ** the name of the original database file. */ + *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); + if( conchPath==0 ){ + return SQLITE_NOMEM; + } + memcpy(conchPath, dbPath, len+1); + + /* now insert a "." before the last / character */ + for( i=(len-1); i>=0; i-- ){ + if( conchPath[i]=='/' ){ + i++; + break; + } + } + conchPath[i]='.'; + while ( i<len ){ + conchPath[i+1]=dbPath[i]; + i++; + } + + /* append the "-conch" suffix to the file */ + memcpy(&conchPath[i+1], "-conch", 7); + assert( (int)strlen(conchPath) == len+7 ); + + return SQLITE_OK; +} + + +/* Takes a fully configured proxy locking-style unix file and switches +** the local lock file path +*/ +static int switchLockProxyPath(unixFile *pFile, const char *path) { + proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; + char *oldPath = pCtx->lockProxyPath; + int rc = SQLITE_OK; + + if( pFile->eFileLock!=NO_LOCK ){ + return SQLITE_BUSY; + } + + /* nothing to do if the path is NULL, :auto: or matches the existing path */ + if( !path || path[0]=='\0' || !strcmp(path, ":auto:") || + (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){ + return SQLITE_OK; + }else{ + unixFile *lockProxy = pCtx->lockProxy; + pCtx->lockProxy=NULL; + pCtx->conchHeld = 0; + if( lockProxy!=NULL ){ + rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy); + if( rc ) return rc; + sqlite3_free(lockProxy); + } + sqlite3_free(oldPath); + pCtx->lockProxyPath = sqlite3DbStrDup(0, path); + } + + return rc; +} + +/* +** pFile is a file that has been opened by a prior xOpen call. dbPath +** is a string buffer at least MAXPATHLEN+1 characters in size. +** +** This routine find the filename associated with pFile and writes it +** int dbPath. +*/ +static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){ +#if defined(__APPLE__) + if( pFile->pMethod == &afpIoMethods ){ + /* afp style keeps a reference to the db path in the filePath field + ** of the struct */ + assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); + strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, + MAXPATHLEN); + } else +#endif + if( pFile->pMethod == &dotlockIoMethods ){ + /* dot lock style uses the locking context to store the dot lock + ** file path */ + int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX); + memcpy(dbPath, (char *)pFile->lockingContext, len + 1); + }else{ + /* all other styles use the locking context to store the db file path */ + assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); + strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); + } + return SQLITE_OK; +} + +/* +** Takes an already filled in unix file and alters it so all file locking +** will be performed on the local proxy lock file. The following fields +** are preserved in the locking context so that they can be restored and +** the unix structure properly cleaned up at close time: +** ->lockingContext +** ->pMethod +*/ +static int proxyTransformUnixFile(unixFile *pFile, const char *path) { + proxyLockingContext *pCtx; + char dbPath[MAXPATHLEN+1]; /* Name of the database file */ + char *lockPath=NULL; + int rc = SQLITE_OK; + + if( pFile->eFileLock!=NO_LOCK ){ + return SQLITE_BUSY; + } + proxyGetDbPathForUnixFile(pFile, dbPath); + if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){ + lockPath=NULL; + }else{ + lockPath=(char *)path; + } + + OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h, + (lockPath ? lockPath : ":auto:"), osGetpid(0))); + + pCtx = sqlite3_malloc64( sizeof(*pCtx) ); + if( pCtx==0 ){ + return SQLITE_NOMEM; + } + memset(pCtx, 0, sizeof(*pCtx)); + + rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); + if( rc==SQLITE_OK ){ + rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0); + if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){ + /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and + ** (c) the file system is read-only, then enable no-locking access. + ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts + ** that openFlags will have only one of O_RDONLY or O_RDWR. + */ + struct statfs fsInfo; + struct stat conchInfo; + int goLockless = 0; + + if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) { + int err = errno; + if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){ + goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY; + } + } + if( goLockless ){ + pCtx->conchHeld = -1; /* read only FS/ lockless */ + rc = SQLITE_OK; + } + } + } + if( rc==SQLITE_OK && lockPath ){ + pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); + } + + if( rc==SQLITE_OK ){ + pCtx->dbPath = sqlite3DbStrDup(0, dbPath); + if( pCtx->dbPath==NULL ){ + rc = SQLITE_NOMEM; + } + } + if( rc==SQLITE_OK ){ + /* all memory is allocated, proxys are created and assigned, + ** switch the locking context and pMethod then return. + */ + pCtx->oldLockingContext = pFile->lockingContext; + pFile->lockingContext = pCtx; + pCtx->pOldMethod = pFile->pMethod; + pFile->pMethod = &proxyIoMethods; + }else{ + if( pCtx->conchFile ){ + pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile); + sqlite3_free(pCtx->conchFile); + } + sqlite3DbFree(0, pCtx->lockProxyPath); + sqlite3_free(pCtx->conchFilePath); + sqlite3_free(pCtx); + } + OSTRACE(("TRANSPROXY %d %s\n", pFile->h, + (rc==SQLITE_OK ? "ok" : "failed"))); + return rc; +} + + +/* +** This routine handles sqlite3_file_control() calls that are specific +** to proxy locking. +*/ +static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ + switch( op ){ + case SQLITE_FCNTL_GET_LOCKPROXYFILE: { + unixFile *pFile = (unixFile*)id; + if( pFile->pMethod == &proxyIoMethods ){ + proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; + proxyTakeConch(pFile); + if( pCtx->lockProxyPath ){ + *(const char **)pArg = pCtx->lockProxyPath; + }else{ + *(const char **)pArg = ":auto: (not held)"; + } + } else { + *(const char **)pArg = NULL; + } + return SQLITE_OK; + } + case SQLITE_FCNTL_SET_LOCKPROXYFILE: { + unixFile *pFile = (unixFile*)id; + int rc = SQLITE_OK; + int isProxyStyle = (pFile->pMethod == &proxyIoMethods); + if( pArg==NULL || (const char *)pArg==0 ){ + if( isProxyStyle ){ + /* turn off proxy locking - not supported. If support is added for + ** switching proxy locking mode off then it will need to fail if + ** the journal mode is WAL mode. + */ + rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/; + }else{ + /* turn off proxy locking - already off - NOOP */ + rc = SQLITE_OK; + } + }else{ + const char *proxyPath = (const char *)pArg; + if( isProxyStyle ){ + proxyLockingContext *pCtx = + (proxyLockingContext*)pFile->lockingContext; + if( !strcmp(pArg, ":auto:") + || (pCtx->lockProxyPath && + !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN)) + ){ + rc = SQLITE_OK; + }else{ + rc = switchLockProxyPath(pFile, proxyPath); + } + }else{ + /* turn on proxy file locking */ + rc = proxyTransformUnixFile(pFile, proxyPath); + } + } + return rc; + } + default: { + assert( 0 ); /* The call assures that only valid opcodes are sent */ + } + } + /*NOTREACHED*/ + return SQLITE_ERROR; +} + +/* +** Within this division (the proxying locking implementation) the procedures +** above this point are all utilities. The lock-related methods of the +** proxy-locking sqlite3_io_method object follow. +*/ + + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, set *pResOut +** to a non-zero value otherwise *pResOut is set to zero. The return value +** is set to SQLITE_OK unless an I/O error occurs during lock checking. +*/ +static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) { + unixFile *pFile = (unixFile*)id; + int rc = proxyTakeConch(pFile); + if( rc==SQLITE_OK ){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + if( pCtx->conchHeld>0 ){ + unixFile *proxy = pCtx->lockProxy; + return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut); + }else{ /* conchHeld < 0 is lockless */ + pResOut=0; + } + } + return rc; +} + +/* +** Lock the file with the lock specified by parameter eFileLock - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. +*/ +static int proxyLock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + int rc = proxyTakeConch(pFile); + if( rc==SQLITE_OK ){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + if( pCtx->conchHeld>0 ){ + unixFile *proxy = pCtx->lockProxy; + rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock); + pFile->eFileLock = proxy->eFileLock; + }else{ + /* conchHeld < 0 is lockless */ + } + } + return rc; +} + + +/* +** Lower the locking level on file descriptor pFile to eFileLock. eFileLock +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +*/ +static int proxyUnlock(sqlite3_file *id, int eFileLock) { + unixFile *pFile = (unixFile*)id; + int rc = proxyTakeConch(pFile); + if( rc==SQLITE_OK ){ + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + if( pCtx->conchHeld>0 ){ + unixFile *proxy = pCtx->lockProxy; + rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock); + pFile->eFileLock = proxy->eFileLock; + }else{ + /* conchHeld < 0 is lockless */ + } + } + return rc; +} + +/* +** Close a file that uses proxy locks. +*/ +static int proxyClose(sqlite3_file *id) { + if( id ){ + unixFile *pFile = (unixFile*)id; + proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; + unixFile *lockProxy = pCtx->lockProxy; + unixFile *conchFile = pCtx->conchFile; + int rc = SQLITE_OK; + + if( lockProxy ){ + rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK); + if( rc ) return rc; + rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy); + if( rc ) return rc; + sqlite3_free(lockProxy); + pCtx->lockProxy = 0; + } + if( conchFile ){ + if( pCtx->conchHeld ){ + rc = proxyReleaseConch(pFile); + if( rc ) return rc; + } + rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile); + if( rc ) return rc; + sqlite3_free(conchFile); + } + sqlite3DbFree(0, pCtx->lockProxyPath); + sqlite3_free(pCtx->conchFilePath); + sqlite3DbFree(0, pCtx->dbPath); + /* restore the original locking context and pMethod then close it */ + pFile->lockingContext = pCtx->oldLockingContext; + pFile->pMethod = pCtx->pOldMethod; + sqlite3_free(pCtx); + return pFile->pMethod->xClose(id); + } + return SQLITE_OK; +} + + + +#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ +/* +** The proxy locking style is intended for use with AFP filesystems. +** And since AFP is only supported on MacOSX, the proxy locking is also +** restricted to MacOSX. +** +** +******************* End of the proxy lock implementation ********************** +******************************************************************************/ + +/* +** Initialize the operating system interface. +** +** This routine registers all VFS implementations for unix-like operating +** systems. This routine, and the sqlite3_os_end() routine that follows, +** should be the only routines in this file that are visible from other +** files. +** +** This routine is called once during SQLite initialization and by a +** single thread. The memory allocation and mutex subsystems have not +** necessarily been initialized when this routine is called, and so they +** should not be used. +*/ +SQLITE_PRIVATE int sqlite3_os_init(void){ + /* + ** The following macro defines an initializer for an sqlite3_vfs object. + ** The name of the VFS is NAME. The pAppData is a pointer to a pointer + ** to the "finder" function. (pAppData is a pointer to a pointer because + ** silly C90 rules prohibit a void* from being cast to a function pointer + ** and so we have to go through the intermediate pointer to avoid problems + ** when compiling with -pedantic-errors on GCC.) + ** + ** The FINDER parameter to this macro is the name of the pointer to the + ** finder-function. The finder-function returns a pointer to the + ** sqlite_io_methods object that implements the desired locking + ** behaviors. See the division above that contains the IOMETHODS + ** macro for addition information on finder-functions. + ** + ** Most finders simply return a pointer to a fixed sqlite3_io_methods + ** object. But the "autolockIoFinder" available on MacOSX does a little + ** more than that; it looks at the filesystem type that hosts the + ** database file and tries to choose an locking method appropriate for + ** that filesystem time. + */ + #define UNIXVFS(VFSNAME, FINDER) { \ + 3, /* iVersion */ \ + sizeof(unixFile), /* szOsFile */ \ + MAX_PATHNAME, /* mxPathname */ \ + 0, /* pNext */ \ + VFSNAME, /* zName */ \ + (void*)&FINDER, /* pAppData */ \ + unixOpen, /* xOpen */ \ + unixDelete, /* xDelete */ \ + unixAccess, /* xAccess */ \ + unixFullPathname, /* xFullPathname */ \ + unixDlOpen, /* xDlOpen */ \ + unixDlError, /* xDlError */ \ + unixDlSym, /* xDlSym */ \ + unixDlClose, /* xDlClose */ \ + unixRandomness, /* xRandomness */ \ + unixSleep, /* xSleep */ \ + unixCurrentTime, /* xCurrentTime */ \ + unixGetLastError, /* xGetLastError */ \ + unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ + unixSetSystemCall, /* xSetSystemCall */ \ + unixGetSystemCall, /* xGetSystemCall */ \ + unixNextSystemCall, /* xNextSystemCall */ \ + } + + /* + ** All default VFSes for unix are contained in the following array. + ** + ** Note that the sqlite3_vfs.pNext field of the VFS object is modified + ** by the SQLite core when the VFS is registered. So the following + ** array cannot be const. + */ + static sqlite3_vfs aVfs[] = { +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) + UNIXVFS("unix", autolockIoFinder ), +#elif OS_VXWORKS + UNIXVFS("unix", vxworksIoFinder ), +#else + UNIXVFS("unix", posixIoFinder ), +#endif + UNIXVFS("unix-none", nolockIoFinder ), + UNIXVFS("unix-dotfile", dotlockIoFinder ), + UNIXVFS("unix-excl", posixIoFinder ), +#if OS_VXWORKS + UNIXVFS("unix-namedsem", semIoFinder ), +#endif +#if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS + UNIXVFS("unix-posix", posixIoFinder ), +#endif +#if SQLITE_ENABLE_LOCKING_STYLE + UNIXVFS("unix-flock", flockIoFinder ), +#endif +#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) + UNIXVFS("unix-afp", afpIoFinder ), + UNIXVFS("unix-nfs", nfsIoFinder ), + UNIXVFS("unix-proxy", proxyIoFinder ), +#endif + }; + unsigned int i; /* Loop counter */ + + /* Double-check that the aSyscall[] array has been constructed + ** correctly. See ticket [bb3a86e890c8e96ab] */ + assert( ArraySize(aSyscall)==25 ); + + /* Register all VFSes defined in the aVfs[] array */ + for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ + sqlite3_vfs_register(&aVfs[i], i==0); + } + return SQLITE_OK; +} + +/* +** Shutdown the operating system interface. +** +** Some operating systems might need to do some cleanup in this routine, +** to release dynamically allocated objects. But not on unix. +** This routine is a no-op for unix. +*/ +SQLITE_PRIVATE int sqlite3_os_end(void){ + return SQLITE_OK; +} + +#endif /* SQLITE_OS_UNIX */ + +/************** End of os_unix.c *********************************************/ +/************** Begin file os_win.c ******************************************/ +/* +** 2004 May 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code that is specific to Windows. +*/ +/* #include "sqliteInt.h" */ +#if SQLITE_OS_WIN /* This file is used for Windows only */ + +/* +** Include code that is common to all os_*.c files +*/ +/************** Include os_common.h in the middle of os_win.c ****************/ +/************** Begin file os_common.h ***************************************/ +/* +** 2004 May 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains macros and a little bit of code that is common to +** all of the platform-specific files (os_*.c) and is #included into those +** files. +** +** This file should be #included by the os_*.c files only. It is not a +** general purpose header file. +*/ +#ifndef _OS_COMMON_H_ +#define _OS_COMMON_H_ + +/* +** At least two bugs have slipped in because we changed the MEMORY_DEBUG +** macro to SQLITE_DEBUG and some older makefiles have not yet made the +** switch. The following code should catch this problem at compile-time. +*/ +#ifdef MEMORY_DEBUG +# error "The MEMORY_DEBUG macro is obsolete. Use SQLITE_DEBUG instead." +#endif + +/* +** Macros for performance tracing. Normally turned off. Only works +** on i486 hardware. +*/ +#ifdef SQLITE_PERFORMANCE_TRACE + +/* +** hwtime.h contains inline assembler code for implementing +** high-performance timing routines. +*/ +/* #include "hwtime.h" */ + +static sqlite_uint64 g_start; +static sqlite_uint64 g_elapsed; +#define TIMER_START g_start=sqlite3Hwtime() +#define TIMER_END g_elapsed=sqlite3Hwtime()-g_start +#define TIMER_ELAPSED g_elapsed +#else +#define TIMER_START +#define TIMER_END +#define TIMER_ELAPSED ((sqlite_uint64)0) +#endif + +/* +** If we compile with the SQLITE_TEST macro set, then the following block +** of code will give us the ability to simulate a disk I/O error. This +** is used for testing the I/O recovery logic. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_io_error_hit = 0; /* Total number of I/O Errors */ +SQLITE_PRIVATE int sqlite3_io_error_hardhit = 0; /* Number of non-benign errors */ +SQLITE_PRIVATE int sqlite3_io_error_pending = 0; /* Count down to first I/O error */ +SQLITE_PRIVATE int sqlite3_io_error_persist = 0; /* True if I/O errors persist */ +SQLITE_PRIVATE int sqlite3_io_error_benign = 0; /* True if errors are benign */ +SQLITE_PRIVATE int sqlite3_diskfull_pending = 0; +SQLITE_PRIVATE int sqlite3_diskfull = 0; +#define SimulateIOErrorBenign(X) sqlite3_io_error_benign=(X) +#define SimulateIOError(CODE) \ + if( (sqlite3_io_error_persist && sqlite3_io_error_hit) \ + || sqlite3_io_error_pending-- == 1 ) \ + { local_ioerr(); CODE; } +static void local_ioerr(){ + IOTRACE(("IOERR\n")); + sqlite3_io_error_hit++; + if( !sqlite3_io_error_benign ) sqlite3_io_error_hardhit++; +} +#define SimulateDiskfullError(CODE) \ + if( sqlite3_diskfull_pending ){ \ + if( sqlite3_diskfull_pending == 1 ){ \ + local_ioerr(); \ + sqlite3_diskfull = 1; \ + sqlite3_io_error_hit = 1; \ + CODE; \ + }else{ \ + sqlite3_diskfull_pending--; \ + } \ + } +#else +#define SimulateIOErrorBenign(X) +#define SimulateIOError(A) +#define SimulateDiskfullError(A) +#endif + +/* +** When testing, keep a count of the number of open files. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_open_file_count = 0; +#define OpenCounter(X) sqlite3_open_file_count+=(X) +#else +#define OpenCounter(X) +#endif + +#endif /* !defined(_OS_COMMON_H_) */ + +/************** End of os_common.h *******************************************/ +/************** Continuing where we left off in os_win.c *********************/ + +/* +** Include the header file for the Windows VFS. +*/ +/* #include "os_win.h" */ + +/* +** Compiling and using WAL mode requires several APIs that are only +** available in Windows platforms based on the NT kernel. +*/ +#if !SQLITE_OS_WINNT && !defined(SQLITE_OMIT_WAL) +# error "WAL mode requires support from the Windows NT kernel, compile\ + with SQLITE_OMIT_WAL." +#endif + +#if !SQLITE_OS_WINNT && SQLITE_MAX_MMAP_SIZE>0 +# error "Memory mapped files require support from the Windows NT kernel,\ + compile with SQLITE_MAX_MMAP_SIZE=0." +#endif + +/* +** Are most of the Win32 ANSI APIs available (i.e. with certain exceptions +** based on the sub-platform)? +*/ +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && !defined(SQLITE_WIN32_NO_ANSI) +# define SQLITE_WIN32_HAS_ANSI +#endif + +/* +** Are most of the Win32 Unicode APIs available (i.e. with certain exceptions +** based on the sub-platform)? +*/ +#if (SQLITE_OS_WINCE || SQLITE_OS_WINNT || SQLITE_OS_WINRT) && \ + !defined(SQLITE_WIN32_NO_WIDE) +# define SQLITE_WIN32_HAS_WIDE +#endif + +/* +** Make sure at least one set of Win32 APIs is available. +*/ +#if !defined(SQLITE_WIN32_HAS_ANSI) && !defined(SQLITE_WIN32_HAS_WIDE) +# error "At least one of SQLITE_WIN32_HAS_ANSI and SQLITE_WIN32_HAS_WIDE\ + must be defined." +#endif + +/* +** Define the required Windows SDK version constants if they are not +** already available. +*/ +#ifndef NTDDI_WIN8 +# define NTDDI_WIN8 0x06020000 +#endif + +#ifndef NTDDI_WINBLUE +# define NTDDI_WINBLUE 0x06030000 +#endif + +/* +** Check to see if the GetVersionEx[AW] functions are deprecated on the +** target system. GetVersionEx was first deprecated in Win8.1. +*/ +#ifndef SQLITE_WIN32_GETVERSIONEX +# if defined(NTDDI_VERSION) && NTDDI_VERSION >= NTDDI_WINBLUE +# define SQLITE_WIN32_GETVERSIONEX 0 /* GetVersionEx() is deprecated */ +# else +# define SQLITE_WIN32_GETVERSIONEX 1 /* GetVersionEx() is current */ +# endif +#endif + +/* +** This constant should already be defined (in the "WinDef.h" SDK file). +*/ +#ifndef MAX_PATH +# define MAX_PATH (260) +#endif + +/* +** Maximum pathname length (in chars) for Win32. This should normally be +** MAX_PATH. +*/ +#ifndef SQLITE_WIN32_MAX_PATH_CHARS +# define SQLITE_WIN32_MAX_PATH_CHARS (MAX_PATH) +#endif + +/* +** This constant should already be defined (in the "WinNT.h" SDK file). +*/ +#ifndef UNICODE_STRING_MAX_CHARS +# define UNICODE_STRING_MAX_CHARS (32767) +#endif + +/* +** Maximum pathname length (in chars) for WinNT. This should normally be +** UNICODE_STRING_MAX_CHARS. +*/ +#ifndef SQLITE_WINNT_MAX_PATH_CHARS +# define SQLITE_WINNT_MAX_PATH_CHARS (UNICODE_STRING_MAX_CHARS) +#endif + +/* +** Maximum pathname length (in bytes) for Win32. The MAX_PATH macro is in +** characters, so we allocate 4 bytes per character assuming worst-case of +** 4-bytes-per-character for UTF8. +*/ +#ifndef SQLITE_WIN32_MAX_PATH_BYTES +# define SQLITE_WIN32_MAX_PATH_BYTES (SQLITE_WIN32_MAX_PATH_CHARS*4) +#endif + +/* +** Maximum pathname length (in bytes) for WinNT. This should normally be +** UNICODE_STRING_MAX_CHARS * sizeof(WCHAR). +*/ +#ifndef SQLITE_WINNT_MAX_PATH_BYTES +# define SQLITE_WINNT_MAX_PATH_BYTES \ + (sizeof(WCHAR) * SQLITE_WINNT_MAX_PATH_CHARS) +#endif + +/* +** Maximum error message length (in chars) for WinRT. +*/ +#ifndef SQLITE_WIN32_MAX_ERRMSG_CHARS +# define SQLITE_WIN32_MAX_ERRMSG_CHARS (1024) +#endif + +/* +** Returns non-zero if the character should be treated as a directory +** separator. +*/ +#ifndef winIsDirSep +# define winIsDirSep(a) (((a) == '/') || ((a) == '\\')) +#endif + +/* +** This macro is used when a local variable is set to a value that is +** [sometimes] not used by the code (e.g. via conditional compilation). +*/ +#ifndef UNUSED_VARIABLE_VALUE +# define UNUSED_VARIABLE_VALUE(x) (void)(x) +#endif + +/* +** Returns the character that should be used as the directory separator. +*/ +#ifndef winGetDirSep +# define winGetDirSep() '\\' +#endif + +/* +** Do we need to manually define the Win32 file mapping APIs for use with WAL +** mode or memory mapped files (e.g. these APIs are available in the Windows +** CE SDK; however, they are not present in the header file)? +*/ +#if SQLITE_WIN32_FILEMAPPING_API && \ + (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) +/* +** Two of the file mapping APIs are different under WinRT. Figure out which +** set we need. +*/ +#if SQLITE_OS_WINRT +WINBASEAPI HANDLE WINAPI CreateFileMappingFromApp(HANDLE, \ + LPSECURITY_ATTRIBUTES, ULONG, ULONG64, LPCWSTR); + +WINBASEAPI LPVOID WINAPI MapViewOfFileFromApp(HANDLE, ULONG, ULONG64, SIZE_T); +#else +#if defined(SQLITE_WIN32_HAS_ANSI) +WINBASEAPI HANDLE WINAPI CreateFileMappingA(HANDLE, LPSECURITY_ATTRIBUTES, \ + DWORD, DWORD, DWORD, LPCSTR); +#endif /* defined(SQLITE_WIN32_HAS_ANSI) */ + +#if defined(SQLITE_WIN32_HAS_WIDE) +WINBASEAPI HANDLE WINAPI CreateFileMappingW(HANDLE, LPSECURITY_ATTRIBUTES, \ + DWORD, DWORD, DWORD, LPCWSTR); +#endif /* defined(SQLITE_WIN32_HAS_WIDE) */ + +WINBASEAPI LPVOID WINAPI MapViewOfFile(HANDLE, DWORD, DWORD, DWORD, SIZE_T); +#endif /* SQLITE_OS_WINRT */ + +/* +** These file mapping APIs are common to both Win32 and WinRT. +*/ + +WINBASEAPI BOOL WINAPI FlushViewOfFile(LPCVOID, SIZE_T); +WINBASEAPI BOOL WINAPI UnmapViewOfFile(LPCVOID); +#endif /* SQLITE_WIN32_FILEMAPPING_API */ + +/* +** Some Microsoft compilers lack this definition. +*/ +#ifndef INVALID_FILE_ATTRIBUTES +# define INVALID_FILE_ATTRIBUTES ((DWORD)-1) +#endif + +#ifndef FILE_FLAG_MASK +# define FILE_FLAG_MASK (0xFF3C0000) +#endif + +#ifndef FILE_ATTRIBUTE_MASK +# define FILE_ATTRIBUTE_MASK (0x0003FFF7) +#endif + +#ifndef SQLITE_OMIT_WAL +/* Forward references to structures used for WAL */ +typedef struct winShm winShm; /* A connection to shared-memory */ +typedef struct winShmNode winShmNode; /* A region of shared-memory */ +#endif + +/* +** WinCE lacks native support for file locking so we have to fake it +** with some code of our own. +*/ +#if SQLITE_OS_WINCE +typedef struct winceLock { + int nReaders; /* Number of reader locks obtained */ + BOOL bPending; /* Indicates a pending lock has been obtained */ + BOOL bReserved; /* Indicates a reserved lock has been obtained */ + BOOL bExclusive; /* Indicates an exclusive lock has been obtained */ +} winceLock; +#endif + +/* +** The winFile structure is a subclass of sqlite3_file* specific to the win32 +** portability layer. +*/ +typedef struct winFile winFile; +struct winFile { + const sqlite3_io_methods *pMethod; /*** Must be first ***/ + sqlite3_vfs *pVfs; /* The VFS used to open this file */ + HANDLE h; /* Handle for accessing the file */ + u8 locktype; /* Type of lock currently held on this file */ + short sharedLockByte; /* Randomly chosen byte used as a shared lock */ + u8 ctrlFlags; /* Flags. See WINFILE_* below */ + DWORD lastErrno; /* The Windows errno from the last I/O error */ +#ifndef SQLITE_OMIT_WAL + winShm *pShm; /* Instance of shared memory on this file */ +#endif + const char *zPath; /* Full pathname of this file */ + int szChunk; /* Chunk size configured by FCNTL_CHUNK_SIZE */ +#if SQLITE_OS_WINCE + LPWSTR zDeleteOnClose; /* Name of file to delete when closing */ + HANDLE hMutex; /* Mutex used to control access to shared lock */ + HANDLE hShared; /* Shared memory segment used for locking */ + winceLock local; /* Locks obtained by this instance of winFile */ + winceLock *shared; /* Global shared lock memory for the file */ +#endif +#if SQLITE_MAX_MMAP_SIZE>0 + int nFetchOut; /* Number of outstanding xFetch references */ + HANDLE hMap; /* Handle for accessing memory mapping */ + void *pMapRegion; /* Area memory mapped */ + sqlite3_int64 mmapSize; /* Usable size of mapped region */ + sqlite3_int64 mmapSizeActual; /* Actual size of mapped region */ + sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ +#endif +}; + +/* +** Allowed values for winFile.ctrlFlags +*/ +#define WINFILE_RDONLY 0x02 /* Connection is read only */ +#define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ +#define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ + +/* + * The size of the buffer used by sqlite3_win32_write_debug(). + */ +#ifndef SQLITE_WIN32_DBG_BUF_SIZE +# define SQLITE_WIN32_DBG_BUF_SIZE ((int)(4096-sizeof(DWORD))) +#endif + +/* + * The value used with sqlite3_win32_set_directory() to specify that + * the temporary directory should be changed. + */ +#ifndef SQLITE_WIN32_TEMP_DIRECTORY_TYPE +# define SQLITE_WIN32_TEMP_DIRECTORY_TYPE (2) +#endif + +/* + * If compiled with SQLITE_WIN32_MALLOC on Windows, we will use the + * various Win32 API heap functions instead of our own. + */ +#ifdef SQLITE_WIN32_MALLOC + +/* + * If this is non-zero, an isolated heap will be created by the native Win32 + * allocator subsystem; otherwise, the default process heap will be used. This + * setting has no effect when compiling for WinRT. By default, this is enabled + * and an isolated heap will be created to store all allocated data. + * + ****************************************************************************** + * WARNING: It is important to note that when this setting is non-zero and the + * winMemShutdown function is called (e.g. by the sqlite3BtreeShutdown + * function), all data that was allocated using the isolated heap will + * be freed immediately and any attempt to access any of that freed + * data will almost certainly result in an immediate access violation. + ****************************************************************************** + */ +#ifndef SQLITE_WIN32_HEAP_CREATE +# define SQLITE_WIN32_HEAP_CREATE (TRUE) +#endif + +/* + * The initial size of the Win32-specific heap. This value may be zero. + */ +#ifndef SQLITE_WIN32_HEAP_INIT_SIZE +# define SQLITE_WIN32_HEAP_INIT_SIZE ((SQLITE_DEFAULT_CACHE_SIZE) * \ + (SQLITE_DEFAULT_PAGE_SIZE) + 4194304) +#endif + +/* + * The maximum size of the Win32-specific heap. This value may be zero. + */ +#ifndef SQLITE_WIN32_HEAP_MAX_SIZE +# define SQLITE_WIN32_HEAP_MAX_SIZE (0) +#endif + +/* + * The extra flags to use in calls to the Win32 heap APIs. This value may be + * zero for the default behavior. + */ +#ifndef SQLITE_WIN32_HEAP_FLAGS +# define SQLITE_WIN32_HEAP_FLAGS (0) +#endif + + +/* +** The winMemData structure stores information required by the Win32-specific +** sqlite3_mem_methods implementation. +*/ +typedef struct winMemData winMemData; +struct winMemData { +#ifndef NDEBUG + u32 magic1; /* Magic number to detect structure corruption. */ +#endif + HANDLE hHeap; /* The handle to our heap. */ + BOOL bOwned; /* Do we own the heap (i.e. destroy it on shutdown)? */ +#ifndef NDEBUG + u32 magic2; /* Magic number to detect structure corruption. */ +#endif +}; + +#ifndef NDEBUG +#define WINMEM_MAGIC1 0x42b2830b +#define WINMEM_MAGIC2 0xbd4d7cf4 +#endif + +static struct winMemData win_mem_data = { +#ifndef NDEBUG + WINMEM_MAGIC1, +#endif + NULL, FALSE +#ifndef NDEBUG + ,WINMEM_MAGIC2 +#endif +}; + +#ifndef NDEBUG +#define winMemAssertMagic1() assert( win_mem_data.magic1==WINMEM_MAGIC1 ) +#define winMemAssertMagic2() assert( win_mem_data.magic2==WINMEM_MAGIC2 ) +#define winMemAssertMagic() winMemAssertMagic1(); winMemAssertMagic2(); +#else +#define winMemAssertMagic() +#endif + +#define winMemGetDataPtr() &win_mem_data +#define winMemGetHeap() win_mem_data.hHeap +#define winMemGetOwned() win_mem_data.bOwned + +static void *winMemMalloc(int nBytes); +static void winMemFree(void *pPrior); +static void *winMemRealloc(void *pPrior, int nBytes); +static int winMemSize(void *p); +static int winMemRoundup(int n); +static int winMemInit(void *pAppData); +static void winMemShutdown(void *pAppData); + +SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetWin32(void); +#endif /* SQLITE_WIN32_MALLOC */ + +/* +** The following variable is (normally) set once and never changes +** thereafter. It records whether the operating system is Win9x +** or WinNT. +** +** 0: Operating system unknown. +** 1: Operating system is Win9x. +** 2: Operating system is WinNT. +** +** In order to facilitate testing on a WinNT system, the test fixture +** can manually set this value to 1 to emulate Win98 behavior. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE LONG SQLITE_WIN32_VOLATILE sqlite3_os_type = 0; +#else +static LONG SQLITE_WIN32_VOLATILE sqlite3_os_type = 0; +#endif + +#ifndef SYSCALL +# define SYSCALL sqlite3_syscall_ptr +#endif + +/* +** This function is not available on Windows CE or WinRT. + */ + +#if SQLITE_OS_WINCE || SQLITE_OS_WINRT +# define osAreFileApisANSI() 1 +#endif + +/* +** Many system calls are accessed through pointer-to-functions so that +** they may be overridden at runtime to facilitate fault injection during +** testing and sandboxing. The following array holds the names and pointers +** to all overrideable system calls. +*/ +static struct win_syscall { + const char *zName; /* Name of the system call */ + sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ + sqlite3_syscall_ptr pDefault; /* Default value */ +} aSyscall[] = { +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT + { "AreFileApisANSI", (SYSCALL)AreFileApisANSI, 0 }, +#else + { "AreFileApisANSI", (SYSCALL)0, 0 }, +#endif + +#ifndef osAreFileApisANSI +#define osAreFileApisANSI ((BOOL(WINAPI*)(VOID))aSyscall[0].pCurrent) +#endif + +#if SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_WIDE) + { "CharLowerW", (SYSCALL)CharLowerW, 0 }, +#else + { "CharLowerW", (SYSCALL)0, 0 }, +#endif + +#define osCharLowerW ((LPWSTR(WINAPI*)(LPWSTR))aSyscall[1].pCurrent) + +#if SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_WIDE) + { "CharUpperW", (SYSCALL)CharUpperW, 0 }, +#else + { "CharUpperW", (SYSCALL)0, 0 }, +#endif + +#define osCharUpperW ((LPWSTR(WINAPI*)(LPWSTR))aSyscall[2].pCurrent) + + { "CloseHandle", (SYSCALL)CloseHandle, 0 }, + +#define osCloseHandle ((BOOL(WINAPI*)(HANDLE))aSyscall[3].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "CreateFileA", (SYSCALL)CreateFileA, 0 }, +#else + { "CreateFileA", (SYSCALL)0, 0 }, +#endif + +#define osCreateFileA ((HANDLE(WINAPI*)(LPCSTR,DWORD,DWORD, \ + LPSECURITY_ATTRIBUTES,DWORD,DWORD,HANDLE))aSyscall[4].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "CreateFileW", (SYSCALL)CreateFileW, 0 }, +#else + { "CreateFileW", (SYSCALL)0, 0 }, +#endif + +#define osCreateFileW ((HANDLE(WINAPI*)(LPCWSTR,DWORD,DWORD, \ + LPSECURITY_ATTRIBUTES,DWORD,DWORD,HANDLE))aSyscall[5].pCurrent) + +#if (!SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_ANSI) && \ + (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)) + { "CreateFileMappingA", (SYSCALL)CreateFileMappingA, 0 }, +#else + { "CreateFileMappingA", (SYSCALL)0, 0 }, +#endif + +#define osCreateFileMappingA ((HANDLE(WINAPI*)(HANDLE,LPSECURITY_ATTRIBUTES, \ + DWORD,DWORD,DWORD,LPCSTR))aSyscall[6].pCurrent) + +#if SQLITE_OS_WINCE || (!SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \ + (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)) + { "CreateFileMappingW", (SYSCALL)CreateFileMappingW, 0 }, +#else + { "CreateFileMappingW", (SYSCALL)0, 0 }, +#endif + +#define osCreateFileMappingW ((HANDLE(WINAPI*)(HANDLE,LPSECURITY_ATTRIBUTES, \ + DWORD,DWORD,DWORD,LPCWSTR))aSyscall[7].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "CreateMutexW", (SYSCALL)CreateMutexW, 0 }, +#else + { "CreateMutexW", (SYSCALL)0, 0 }, +#endif + +#define osCreateMutexW ((HANDLE(WINAPI*)(LPSECURITY_ATTRIBUTES,BOOL, \ + LPCWSTR))aSyscall[8].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "DeleteFileA", (SYSCALL)DeleteFileA, 0 }, +#else + { "DeleteFileA", (SYSCALL)0, 0 }, +#endif + +#define osDeleteFileA ((BOOL(WINAPI*)(LPCSTR))aSyscall[9].pCurrent) + +#if defined(SQLITE_WIN32_HAS_WIDE) + { "DeleteFileW", (SYSCALL)DeleteFileW, 0 }, +#else + { "DeleteFileW", (SYSCALL)0, 0 }, +#endif + +#define osDeleteFileW ((BOOL(WINAPI*)(LPCWSTR))aSyscall[10].pCurrent) + +#if SQLITE_OS_WINCE + { "FileTimeToLocalFileTime", (SYSCALL)FileTimeToLocalFileTime, 0 }, +#else + { "FileTimeToLocalFileTime", (SYSCALL)0, 0 }, +#endif + +#define osFileTimeToLocalFileTime ((BOOL(WINAPI*)(CONST FILETIME*, \ + LPFILETIME))aSyscall[11].pCurrent) + +#if SQLITE_OS_WINCE + { "FileTimeToSystemTime", (SYSCALL)FileTimeToSystemTime, 0 }, +#else + { "FileTimeToSystemTime", (SYSCALL)0, 0 }, +#endif + +#define osFileTimeToSystemTime ((BOOL(WINAPI*)(CONST FILETIME*, \ + LPSYSTEMTIME))aSyscall[12].pCurrent) + + { "FlushFileBuffers", (SYSCALL)FlushFileBuffers, 0 }, + +#define osFlushFileBuffers ((BOOL(WINAPI*)(HANDLE))aSyscall[13].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "FormatMessageA", (SYSCALL)FormatMessageA, 0 }, +#else + { "FormatMessageA", (SYSCALL)0, 0 }, +#endif + +#define osFormatMessageA ((DWORD(WINAPI*)(DWORD,LPCVOID,DWORD,DWORD,LPSTR, \ + DWORD,va_list*))aSyscall[14].pCurrent) + +#if defined(SQLITE_WIN32_HAS_WIDE) + { "FormatMessageW", (SYSCALL)FormatMessageW, 0 }, +#else + { "FormatMessageW", (SYSCALL)0, 0 }, +#endif + +#define osFormatMessageW ((DWORD(WINAPI*)(DWORD,LPCVOID,DWORD,DWORD,LPWSTR, \ + DWORD,va_list*))aSyscall[15].pCurrent) + + { "FreeLibrary", (SYSCALL)0, 0 }, + +#define osFreeLibrary ((BOOL(WINAPI*)(HMODULE))aSyscall[16].pCurrent) + + { "GetCurrentProcessId", (SYSCALL)GetCurrentProcessId, 0 }, + +#define osGetCurrentProcessId ((DWORD(WINAPI*)(VOID))aSyscall[17].pCurrent) + +#if !SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_ANSI) + { "GetDiskFreeSpaceA", (SYSCALL)GetDiskFreeSpaceA, 0 }, +#else + { "GetDiskFreeSpaceA", (SYSCALL)0, 0 }, +#endif + +#define osGetDiskFreeSpaceA ((BOOL(WINAPI*)(LPCSTR,LPDWORD,LPDWORD,LPDWORD, \ + LPDWORD))aSyscall[18].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "GetDiskFreeSpaceW", (SYSCALL)GetDiskFreeSpaceW, 0 }, +#else + { "GetDiskFreeSpaceW", (SYSCALL)0, 0 }, +#endif + +#define osGetDiskFreeSpaceW ((BOOL(WINAPI*)(LPCWSTR,LPDWORD,LPDWORD,LPDWORD, \ + LPDWORD))aSyscall[19].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "GetFileAttributesA", (SYSCALL)GetFileAttributesA, 0 }, +#else + { "GetFileAttributesA", (SYSCALL)0, 0 }, +#endif + +#define osGetFileAttributesA ((DWORD(WINAPI*)(LPCSTR))aSyscall[20].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "GetFileAttributesW", (SYSCALL)GetFileAttributesW, 0 }, +#else + { "GetFileAttributesW", (SYSCALL)0, 0 }, +#endif + +#define osGetFileAttributesW ((DWORD(WINAPI*)(LPCWSTR))aSyscall[21].pCurrent) + +#if defined(SQLITE_WIN32_HAS_WIDE) + { "GetFileAttributesExW", (SYSCALL)GetFileAttributesExW, 0 }, +#else + { "GetFileAttributesExW", (SYSCALL)0, 0 }, +#endif + +#define osGetFileAttributesExW ((BOOL(WINAPI*)(LPCWSTR,GET_FILEEX_INFO_LEVELS, \ + LPVOID))aSyscall[22].pCurrent) + +#if !SQLITE_OS_WINRT + { "GetFileSize", (SYSCALL)GetFileSize, 0 }, +#else + { "GetFileSize", (SYSCALL)0, 0 }, +#endif + +#define osGetFileSize ((DWORD(WINAPI*)(HANDLE,LPDWORD))aSyscall[23].pCurrent) + +#if !SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_ANSI) + { "GetFullPathNameA", (SYSCALL)GetFullPathNameA, 0 }, +#else + { "GetFullPathNameA", (SYSCALL)0, 0 }, +#endif + +#define osGetFullPathNameA ((DWORD(WINAPI*)(LPCSTR,DWORD,LPSTR, \ + LPSTR*))aSyscall[24].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "GetFullPathNameW", (SYSCALL)GetFullPathNameW, 0 }, +#else + { "GetFullPathNameW", (SYSCALL)0, 0 }, +#endif + +#define osGetFullPathNameW ((DWORD(WINAPI*)(LPCWSTR,DWORD,LPWSTR, \ + LPWSTR*))aSyscall[25].pCurrent) + + { "GetLastError", (SYSCALL)GetLastError, 0 }, + +#define osGetLastError ((DWORD(WINAPI*)(VOID))aSyscall[26].pCurrent) + + { "GetProcAddressA", (SYSCALL)0, 0 }, + +#define osGetProcAddressA ((FARPROC(WINAPI*)(HMODULE, \ + LPCSTR))aSyscall[27].pCurrent) + +#if !SQLITE_OS_WINRT + { "GetSystemInfo", (SYSCALL)GetSystemInfo, 0 }, +#else + { "GetSystemInfo", (SYSCALL)0, 0 }, +#endif + +#define osGetSystemInfo ((VOID(WINAPI*)(LPSYSTEM_INFO))aSyscall[28].pCurrent) + + { "GetSystemTime", (SYSCALL)GetSystemTime, 0 }, + +#define osGetSystemTime ((VOID(WINAPI*)(LPSYSTEMTIME))aSyscall[29].pCurrent) + +#if !SQLITE_OS_WINCE + { "GetSystemTimeAsFileTime", (SYSCALL)GetSystemTimeAsFileTime, 0 }, +#else + { "GetSystemTimeAsFileTime", (SYSCALL)0, 0 }, +#endif + +#define osGetSystemTimeAsFileTime ((VOID(WINAPI*)( \ + LPFILETIME))aSyscall[30].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "GetTempPathA", (SYSCALL)GetTempPathA, 0 }, +#else + { "GetTempPathA", (SYSCALL)0, 0 }, +#endif + +#define osGetTempPathA ((DWORD(WINAPI*)(DWORD,LPSTR))aSyscall[31].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "GetTempPathW", (SYSCALL)GetTempPathW, 0 }, +#else + { "GetTempPathW", (SYSCALL)0, 0 }, +#endif + +#define osGetTempPathW ((DWORD(WINAPI*)(DWORD,LPWSTR))aSyscall[32].pCurrent) + +#if !SQLITE_OS_WINRT + { "GetTickCount", (SYSCALL)GetTickCount, 0 }, +#else + { "GetTickCount", (SYSCALL)0, 0 }, +#endif + +#define osGetTickCount ((DWORD(WINAPI*)(VOID))aSyscall[33].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) && defined(SQLITE_WIN32_GETVERSIONEX) && \ + SQLITE_WIN32_GETVERSIONEX + { "GetVersionExA", (SYSCALL)GetVersionExA, 0 }, +#else + { "GetVersionExA", (SYSCALL)0, 0 }, +#endif + +#define osGetVersionExA ((BOOL(WINAPI*)( \ + LPOSVERSIONINFOA))aSyscall[34].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \ + defined(SQLITE_WIN32_GETVERSIONEX) && SQLITE_WIN32_GETVERSIONEX + { "GetVersionExW", (SYSCALL)GetVersionExW, 0 }, +#else + { "GetVersionExW", (SYSCALL)0, 0 }, +#endif + +#define osGetVersionExW ((BOOL(WINAPI*)( \ + LPOSVERSIONINFOW))aSyscall[35].pCurrent) + + { "HeapAlloc", (SYSCALL)HeapAlloc, 0 }, + +#define osHeapAlloc ((LPVOID(WINAPI*)(HANDLE,DWORD, \ + SIZE_T))aSyscall[36].pCurrent) + +#if !SQLITE_OS_WINRT + { "HeapCreate", (SYSCALL)HeapCreate, 0 }, +#else + { "HeapCreate", (SYSCALL)0, 0 }, +#endif + +#define osHeapCreate ((HANDLE(WINAPI*)(DWORD,SIZE_T, \ + SIZE_T))aSyscall[37].pCurrent) + +#if !SQLITE_OS_WINRT + { "HeapDestroy", (SYSCALL)HeapDestroy, 0 }, +#else + { "HeapDestroy", (SYSCALL)0, 0 }, +#endif + +#define osHeapDestroy ((BOOL(WINAPI*)(HANDLE))aSyscall[38].pCurrent) + + { "HeapFree", (SYSCALL)HeapFree, 0 }, + +#define osHeapFree ((BOOL(WINAPI*)(HANDLE,DWORD,LPVOID))aSyscall[39].pCurrent) + + { "HeapReAlloc", (SYSCALL)HeapReAlloc, 0 }, + +#define osHeapReAlloc ((LPVOID(WINAPI*)(HANDLE,DWORD,LPVOID, \ + SIZE_T))aSyscall[40].pCurrent) + + { "HeapSize", (SYSCALL)HeapSize, 0 }, + +#define osHeapSize ((SIZE_T(WINAPI*)(HANDLE,DWORD, \ + LPCVOID))aSyscall[41].pCurrent) + +#if !SQLITE_OS_WINRT + { "HeapValidate", (SYSCALL)HeapValidate, 0 }, +#else + { "HeapValidate", (SYSCALL)0, 0 }, +#endif + +#define osHeapValidate ((BOOL(WINAPI*)(HANDLE,DWORD, \ + LPCVOID))aSyscall[42].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT + { "HeapCompact", (SYSCALL)HeapCompact, 0 }, +#else + { "HeapCompact", (SYSCALL)0, 0 }, +#endif + +#define osHeapCompact ((UINT(WINAPI*)(HANDLE,DWORD))aSyscall[43].pCurrent) + + { "LoadLibraryA", (SYSCALL)0, 0 }, + +#define osLoadLibraryA ((HMODULE(WINAPI*)(LPCSTR))aSyscall[44].pCurrent) + +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) + { "LoadLibraryW", (SYSCALL)LoadLibraryW, 0 }, +#else + { "LoadLibraryW", (SYSCALL)0, 0 }, +#endif + +#define osLoadLibraryW ((HMODULE(WINAPI*)(LPCWSTR))aSyscall[45].pCurrent) + +#if !SQLITE_OS_WINRT + { "LocalFree", (SYSCALL)LocalFree, 0 }, +#else + { "LocalFree", (SYSCALL)0, 0 }, +#endif + +#define osLocalFree ((HLOCAL(WINAPI*)(HLOCAL))aSyscall[46].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT + { "LockFile", (SYSCALL)LockFile, 0 }, +#else + { "LockFile", (SYSCALL)0, 0 }, +#endif + +#ifndef osLockFile +#define osLockFile ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ + DWORD))aSyscall[47].pCurrent) +#endif + +#if !SQLITE_OS_WINCE + { "LockFileEx", (SYSCALL)LockFileEx, 0 }, +#else + { "LockFileEx", (SYSCALL)0, 0 }, +#endif + +#ifndef osLockFileEx +#define osLockFileEx ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD,DWORD, \ + LPOVERLAPPED))aSyscall[48].pCurrent) +#endif + +#if SQLITE_OS_WINCE || (!SQLITE_OS_WINRT && \ + (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)) + { "MapViewOfFile", (SYSCALL)MapViewOfFile, 0 }, +#else + { "MapViewOfFile", (SYSCALL)0, 0 }, +#endif + +#define osMapViewOfFile ((LPVOID(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ + SIZE_T))aSyscall[49].pCurrent) + + { "MultiByteToWideChar", (SYSCALL)MultiByteToWideChar, 0 }, + +#define osMultiByteToWideChar ((int(WINAPI*)(UINT,DWORD,LPCSTR,int,LPWSTR, \ + int))aSyscall[50].pCurrent) + + { "QueryPerformanceCounter", (SYSCALL)QueryPerformanceCounter, 0 }, + +#define osQueryPerformanceCounter ((BOOL(WINAPI*)( \ + LARGE_INTEGER*))aSyscall[51].pCurrent) + + { "ReadFile", (SYSCALL)ReadFile, 0 }, + +#define osReadFile ((BOOL(WINAPI*)(HANDLE,LPVOID,DWORD,LPDWORD, \ + LPOVERLAPPED))aSyscall[52].pCurrent) + + { "SetEndOfFile", (SYSCALL)SetEndOfFile, 0 }, + +#define osSetEndOfFile ((BOOL(WINAPI*)(HANDLE))aSyscall[53].pCurrent) + +#if !SQLITE_OS_WINRT + { "SetFilePointer", (SYSCALL)SetFilePointer, 0 }, +#else + { "SetFilePointer", (SYSCALL)0, 0 }, +#endif + +#define osSetFilePointer ((DWORD(WINAPI*)(HANDLE,LONG,PLONG, \ + DWORD))aSyscall[54].pCurrent) + +#if !SQLITE_OS_WINRT + { "Sleep", (SYSCALL)Sleep, 0 }, +#else + { "Sleep", (SYSCALL)0, 0 }, +#endif + +#define osSleep ((VOID(WINAPI*)(DWORD))aSyscall[55].pCurrent) + + { "SystemTimeToFileTime", (SYSCALL)SystemTimeToFileTime, 0 }, + +#define osSystemTimeToFileTime ((BOOL(WINAPI*)(CONST SYSTEMTIME*, \ + LPFILETIME))aSyscall[56].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT + { "UnlockFile", (SYSCALL)UnlockFile, 0 }, +#else + { "UnlockFile", (SYSCALL)0, 0 }, +#endif + +#ifndef osUnlockFile +#define osUnlockFile ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ + DWORD))aSyscall[57].pCurrent) +#endif + +#if !SQLITE_OS_WINCE + { "UnlockFileEx", (SYSCALL)UnlockFileEx, 0 }, +#else + { "UnlockFileEx", (SYSCALL)0, 0 }, +#endif + +#define osUnlockFileEx ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ + LPOVERLAPPED))aSyscall[58].pCurrent) + +#if SQLITE_OS_WINCE || !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 + { "UnmapViewOfFile", (SYSCALL)UnmapViewOfFile, 0 }, +#else + { "UnmapViewOfFile", (SYSCALL)0, 0 }, +#endif + +#define osUnmapViewOfFile ((BOOL(WINAPI*)(LPCVOID))aSyscall[59].pCurrent) + + { "WideCharToMultiByte", (SYSCALL)WideCharToMultiByte, 0 }, + +#define osWideCharToMultiByte ((int(WINAPI*)(UINT,DWORD,LPCWSTR,int,LPSTR,int, \ + LPCSTR,LPBOOL))aSyscall[60].pCurrent) + + { "WriteFile", (SYSCALL)WriteFile, 0 }, + +#define osWriteFile ((BOOL(WINAPI*)(HANDLE,LPCVOID,DWORD,LPDWORD, \ + LPOVERLAPPED))aSyscall[61].pCurrent) + +#if SQLITE_OS_WINRT + { "CreateEventExW", (SYSCALL)CreateEventExW, 0 }, +#else + { "CreateEventExW", (SYSCALL)0, 0 }, +#endif + +#define osCreateEventExW ((HANDLE(WINAPI*)(LPSECURITY_ATTRIBUTES,LPCWSTR, \ + DWORD,DWORD))aSyscall[62].pCurrent) + +#if !SQLITE_OS_WINRT + { "WaitForSingleObject", (SYSCALL)WaitForSingleObject, 0 }, +#else + { "WaitForSingleObject", (SYSCALL)0, 0 }, +#endif + +#define osWaitForSingleObject ((DWORD(WINAPI*)(HANDLE, \ + DWORD))aSyscall[63].pCurrent) + +#if !SQLITE_OS_WINCE + { "WaitForSingleObjectEx", (SYSCALL)WaitForSingleObjectEx, 0 }, +#else + { "WaitForSingleObjectEx", (SYSCALL)0, 0 }, +#endif + +#define osWaitForSingleObjectEx ((DWORD(WINAPI*)(HANDLE,DWORD, \ + BOOL))aSyscall[64].pCurrent) + +#if SQLITE_OS_WINRT + { "SetFilePointerEx", (SYSCALL)SetFilePointerEx, 0 }, +#else + { "SetFilePointerEx", (SYSCALL)0, 0 }, +#endif + +#define osSetFilePointerEx ((BOOL(WINAPI*)(HANDLE,LARGE_INTEGER, \ + PLARGE_INTEGER,DWORD))aSyscall[65].pCurrent) + +#if SQLITE_OS_WINRT + { "GetFileInformationByHandleEx", (SYSCALL)GetFileInformationByHandleEx, 0 }, +#else + { "GetFileInformationByHandleEx", (SYSCALL)0, 0 }, +#endif + +#define osGetFileInformationByHandleEx ((BOOL(WINAPI*)(HANDLE, \ + FILE_INFO_BY_HANDLE_CLASS,LPVOID,DWORD))aSyscall[66].pCurrent) + +#if SQLITE_OS_WINRT && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) + { "MapViewOfFileFromApp", (SYSCALL)MapViewOfFileFromApp, 0 }, +#else + { "MapViewOfFileFromApp", (SYSCALL)0, 0 }, +#endif + +#define osMapViewOfFileFromApp ((LPVOID(WINAPI*)(HANDLE,ULONG,ULONG64, \ + SIZE_T))aSyscall[67].pCurrent) + +#if SQLITE_OS_WINRT + { "CreateFile2", (SYSCALL)CreateFile2, 0 }, +#else + { "CreateFile2", (SYSCALL)0, 0 }, +#endif + +#define osCreateFile2 ((HANDLE(WINAPI*)(LPCWSTR,DWORD,DWORD,DWORD, \ + LPCREATEFILE2_EXTENDED_PARAMETERS))aSyscall[68].pCurrent) + + { "LoadPackagedLibrary", (SYSCALL)0, 0 }, + +#define osLoadPackagedLibrary ((HMODULE(WINAPI*)(LPCWSTR, \ + DWORD))aSyscall[69].pCurrent) + +#if SQLITE_OS_WINRT + { "GetTickCount64", (SYSCALL)GetTickCount64, 0 }, +#else + { "GetTickCount64", (SYSCALL)0, 0 }, +#endif + +#define osGetTickCount64 ((ULONGLONG(WINAPI*)(VOID))aSyscall[70].pCurrent) + +#if SQLITE_OS_WINRT + { "GetNativeSystemInfo", (SYSCALL)GetNativeSystemInfo, 0 }, +#else + { "GetNativeSystemInfo", (SYSCALL)0, 0 }, +#endif + +#define osGetNativeSystemInfo ((VOID(WINAPI*)( \ + LPSYSTEM_INFO))aSyscall[71].pCurrent) + +#if defined(SQLITE_WIN32_HAS_ANSI) + { "OutputDebugStringA", (SYSCALL)OutputDebugStringA, 0 }, +#else + { "OutputDebugStringA", (SYSCALL)0, 0 }, +#endif + +#define osOutputDebugStringA ((VOID(WINAPI*)(LPCSTR))aSyscall[72].pCurrent) + +#if defined(SQLITE_WIN32_HAS_WIDE) + { "OutputDebugStringW", (SYSCALL)OutputDebugStringW, 0 }, +#else + { "OutputDebugStringW", (SYSCALL)0, 0 }, +#endif + +#define osOutputDebugStringW ((VOID(WINAPI*)(LPCWSTR))aSyscall[73].pCurrent) + + { "GetProcessHeap", (SYSCALL)GetProcessHeap, 0 }, + +#define osGetProcessHeap ((HANDLE(WINAPI*)(VOID))aSyscall[74].pCurrent) + +#if SQLITE_OS_WINRT && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) + { "CreateFileMappingFromApp", (SYSCALL)CreateFileMappingFromApp, 0 }, +#else + { "CreateFileMappingFromApp", (SYSCALL)0, 0 }, +#endif + +#define osCreateFileMappingFromApp ((HANDLE(WINAPI*)(HANDLE, \ + LPSECURITY_ATTRIBUTES,ULONG,ULONG64,LPCWSTR))aSyscall[75].pCurrent) + +/* +** NOTE: On some sub-platforms, the InterlockedCompareExchange "function" +** is really just a macro that uses a compiler intrinsic (e.g. x64). +** So do not try to make this is into a redefinable interface. +*/ +#if defined(InterlockedCompareExchange) + { "InterlockedCompareExchange", (SYSCALL)0, 0 }, + +#define osInterlockedCompareExchange InterlockedCompareExchange +#else + { "InterlockedCompareExchange", (SYSCALL)InterlockedCompareExchange, 0 }, + +#define osInterlockedCompareExchange ((LONG(WINAPI*)(LONG \ + SQLITE_WIN32_VOLATILE*, LONG,LONG))aSyscall[76].pCurrent) +#endif /* defined(InterlockedCompareExchange) */ + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID + { "UuidCreate", (SYSCALL)UuidCreate, 0 }, +#else + { "UuidCreate", (SYSCALL)0, 0 }, +#endif + +#define osUuidCreate ((RPC_STATUS(RPC_ENTRY*)(UUID*))aSyscall[77].pCurrent) + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID + { "UuidCreateSequential", (SYSCALL)UuidCreateSequential, 0 }, +#else + { "UuidCreateSequential", (SYSCALL)0, 0 }, +#endif + +#define osUuidCreateSequential \ + ((RPC_STATUS(RPC_ENTRY*)(UUID*))aSyscall[78].pCurrent) + +#if !defined(SQLITE_NO_SYNC) && SQLITE_MAX_MMAP_SIZE>0 + { "FlushViewOfFile", (SYSCALL)FlushViewOfFile, 0 }, +#else + { "FlushViewOfFile", (SYSCALL)0, 0 }, +#endif + +#define osFlushViewOfFile \ + ((BOOL(WINAPI*)(LPCVOID,SIZE_T))aSyscall[79].pCurrent) + +}; /* End of the overrideable system calls */ + +/* +** This is the xSetSystemCall() method of sqlite3_vfs for all of the +** "win32" VFSes. Return SQLITE_OK opon successfully updating the +** system call pointer, or SQLITE_NOTFOUND if there is no configurable +** system call named zName. +*/ +static int winSetSystemCall( + sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ + const char *zName, /* Name of system call to override */ + sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ +){ + unsigned int i; + int rc = SQLITE_NOTFOUND; + + UNUSED_PARAMETER(pNotUsed); + if( zName==0 ){ + /* If no zName is given, restore all system calls to their default + ** settings and return NULL + */ + rc = SQLITE_OK; + for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ + if( aSyscall[i].pDefault ){ + aSyscall[i].pCurrent = aSyscall[i].pDefault; + } + } + }else{ + /* If zName is specified, operate on only the one system call + ** specified. + */ + for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ + if( strcmp(zName, aSyscall[i].zName)==0 ){ + if( aSyscall[i].pDefault==0 ){ + aSyscall[i].pDefault = aSyscall[i].pCurrent; + } + rc = SQLITE_OK; + if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault; + aSyscall[i].pCurrent = pNewFunc; + break; + } + } + } + return rc; +} + +/* +** Return the value of a system call. Return NULL if zName is not a +** recognized system call name. NULL is also returned if the system call +** is currently undefined. +*/ +static sqlite3_syscall_ptr winGetSystemCall( + sqlite3_vfs *pNotUsed, + const char *zName +){ + unsigned int i; + + UNUSED_PARAMETER(pNotUsed); + for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ + if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent; + } + return 0; +} + +/* +** Return the name of the first system call after zName. If zName==NULL +** then return the name of the first system call. Return NULL if zName +** is the last system call or if zName is not the name of a valid +** system call. +*/ +static const char *winNextSystemCall(sqlite3_vfs *p, const char *zName){ + int i = -1; + + UNUSED_PARAMETER(p); + if( zName ){ + for(i=0; i<ArraySize(aSyscall)-1; i++){ + if( strcmp(zName, aSyscall[i].zName)==0 ) break; + } + } + for(i++; i<ArraySize(aSyscall); i++){ + if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName; + } + return 0; +} + +#ifdef SQLITE_WIN32_MALLOC +/* +** If a Win32 native heap has been configured, this function will attempt to +** compact it. Upon success, SQLITE_OK will be returned. Upon failure, one +** of SQLITE_NOMEM, SQLITE_ERROR, or SQLITE_NOTFOUND will be returned. The +** "pnLargest" argument, if non-zero, will be used to return the size of the +** largest committed free block in the heap, in bytes. +*/ +SQLITE_PRIVATE int sqlite3_win32_compact_heap(LPUINT pnLargest){ + int rc = SQLITE_OK; + UINT nLargest = 0; + HANDLE hHeap; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT + if( (nLargest=osHeapCompact(hHeap, SQLITE_WIN32_HEAP_FLAGS))==0 ){ + DWORD lastErrno = osGetLastError(); + if( lastErrno==NO_ERROR ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapCompact (no space), heap=%p", + (void*)hHeap); + rc = SQLITE_NOMEM; + }else{ + sqlite3_log(SQLITE_ERROR, "failed to HeapCompact (%lu), heap=%p", + osGetLastError(), (void*)hHeap); + rc = SQLITE_ERROR; + } + } +#else + sqlite3_log(SQLITE_NOTFOUND, "failed to HeapCompact, heap=%p", + (void*)hHeap); + rc = SQLITE_NOTFOUND; +#endif + if( pnLargest ) *pnLargest = nLargest; + return rc; +} + +/* +** If a Win32 native heap has been configured, this function will attempt to +** destroy and recreate it. If the Win32 native heap is not isolated and/or +** the sqlite3_memory_used() function does not return zero, SQLITE_BUSY will +** be returned and no changes will be made to the Win32 native heap. +*/ +SQLITE_PRIVATE int sqlite3_win32_reset_heap(){ + int rc; + MUTEX_LOGIC( sqlite3_mutex *pMaster; ) /* The main static mutex */ + MUTEX_LOGIC( sqlite3_mutex *pMem; ) /* The memsys static mutex */ + MUTEX_LOGIC( pMaster = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER); ) + MUTEX_LOGIC( pMem = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM); ) + sqlite3_mutex_enter(pMaster); + sqlite3_mutex_enter(pMem); + winMemAssertMagic(); + if( winMemGetHeap()!=NULL && winMemGetOwned() && sqlite3_memory_used()==0 ){ + /* + ** At this point, there should be no outstanding memory allocations on + ** the heap. Also, since both the master and memsys locks are currently + ** being held by us, no other function (i.e. from another thread) should + ** be able to even access the heap. Attempt to destroy and recreate our + ** isolated Win32 native heap now. + */ + assert( winMemGetHeap()!=NULL ); + assert( winMemGetOwned() ); + assert( sqlite3_memory_used()==0 ); + winMemShutdown(winMemGetDataPtr()); + assert( winMemGetHeap()==NULL ); + assert( !winMemGetOwned() ); + assert( sqlite3_memory_used()==0 ); + rc = winMemInit(winMemGetDataPtr()); + assert( rc!=SQLITE_OK || winMemGetHeap()!=NULL ); + assert( rc!=SQLITE_OK || winMemGetOwned() ); + assert( rc!=SQLITE_OK || sqlite3_memory_used()==0 ); + }else{ + /* + ** The Win32 native heap cannot be modified because it may be in use. + */ + rc = SQLITE_BUSY; + } + sqlite3_mutex_leave(pMem); + sqlite3_mutex_leave(pMaster); + return rc; +} +#endif /* SQLITE_WIN32_MALLOC */ + +/* +** This function outputs the specified (ANSI) string to the Win32 debugger +** (if available). +*/ + +SQLITE_PRIVATE void sqlite3_win32_write_debug(const char *zBuf, int nBuf){ + char zDbgBuf[SQLITE_WIN32_DBG_BUF_SIZE]; + int nMin = MIN(nBuf, (SQLITE_WIN32_DBG_BUF_SIZE - 1)); /* may be negative. */ + if( nMin<-1 ) nMin = -1; /* all negative values become -1. */ + assert( nMin==-1 || nMin==0 || nMin<SQLITE_WIN32_DBG_BUF_SIZE ); +#if defined(SQLITE_WIN32_HAS_ANSI) + if( nMin>0 ){ + memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE); + memcpy(zDbgBuf, zBuf, nMin); + osOutputDebugStringA(zDbgBuf); + }else{ + osOutputDebugStringA(zBuf); + } +#elif defined(SQLITE_WIN32_HAS_WIDE) + memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE); + if ( osMultiByteToWideChar( + osAreFileApisANSI() ? CP_ACP : CP_OEMCP, 0, zBuf, + nMin, (LPWSTR)zDbgBuf, SQLITE_WIN32_DBG_BUF_SIZE/sizeof(WCHAR))<=0 ){ + return; + } + osOutputDebugStringW((LPCWSTR)zDbgBuf); +#else + if( nMin>0 ){ + memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE); + memcpy(zDbgBuf, zBuf, nMin); + fprintf(stderr, "%s", zDbgBuf); + }else{ + fprintf(stderr, "%s", zBuf); + } +#endif +} + +/* +** The following routine suspends the current thread for at least ms +** milliseconds. This is equivalent to the Win32 Sleep() interface. +*/ +#if SQLITE_OS_WINRT +static HANDLE sleepObj = NULL; +#endif + +SQLITE_PRIVATE void sqlite3_win32_sleep(DWORD milliseconds){ +#if SQLITE_OS_WINRT + if ( sleepObj==NULL ){ + sleepObj = osCreateEventExW(NULL, NULL, CREATE_EVENT_MANUAL_RESET, + SYNCHRONIZE); + } + assert( sleepObj!=NULL ); + osWaitForSingleObjectEx(sleepObj, milliseconds, FALSE); +#else + osSleep(milliseconds); +#endif +} + +#if SQLITE_MAX_WORKER_THREADS>0 && !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && \ + SQLITE_THREADSAFE>0 +SQLITE_PRIVATE DWORD sqlite3Win32Wait(HANDLE hObject){ + DWORD rc; + while( (rc = osWaitForSingleObjectEx(hObject, INFINITE, + TRUE))==WAIT_IO_COMPLETION ){} + return rc; +} +#endif + +/* +** Return true (non-zero) if we are running under WinNT, Win2K, WinXP, +** or WinCE. Return false (zero) for Win95, Win98, or WinME. +** +** Here is an interesting observation: Win95, Win98, and WinME lack +** the LockFileEx() API. But we can still statically link against that +** API as long as we don't call it when running Win95/98/ME. A call to +** this routine is used to determine if the host is Win95/98/ME or +** WinNT/2K/XP so that we will know whether or not we can safely call +** the LockFileEx() API. +*/ + +#if !defined(SQLITE_WIN32_GETVERSIONEX) || !SQLITE_WIN32_GETVERSIONEX +# define osIsNT() (1) +#elif SQLITE_OS_WINCE || SQLITE_OS_WINRT || !defined(SQLITE_WIN32_HAS_ANSI) +# define osIsNT() (1) +#elif !defined(SQLITE_WIN32_HAS_WIDE) +# define osIsNT() (0) +#else +# define osIsNT() ((sqlite3_os_type==2) || sqlite3_win32_is_nt()) +#endif + +/* +** This function determines if the machine is running a version of Windows +** based on the NT kernel. +*/ +SQLITE_PRIVATE int sqlite3_win32_is_nt(void){ +#if SQLITE_OS_WINRT + /* + ** NOTE: The WinRT sub-platform is always assumed to be based on the NT + ** kernel. + */ + return 1; +#elif defined(SQLITE_WIN32_GETVERSIONEX) && SQLITE_WIN32_GETVERSIONEX + if( osInterlockedCompareExchange(&sqlite3_os_type, 0, 0)==0 ){ +#if defined(SQLITE_WIN32_HAS_ANSI) + OSVERSIONINFOA sInfo; + sInfo.dwOSVersionInfoSize = sizeof(sInfo); + osGetVersionExA(&sInfo); + osInterlockedCompareExchange(&sqlite3_os_type, + (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0); +#elif defined(SQLITE_WIN32_HAS_WIDE) + OSVERSIONINFOW sInfo; + sInfo.dwOSVersionInfoSize = sizeof(sInfo); + osGetVersionExW(&sInfo); + osInterlockedCompareExchange(&sqlite3_os_type, + (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0); +#endif + } + return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2; +#elif SQLITE_TEST + return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2; +#else + /* + ** NOTE: All sub-platforms where the GetVersionEx[AW] functions are + ** deprecated are always assumed to be based on the NT kernel. + */ + return 1; +#endif +} + +#ifdef SQLITE_WIN32_MALLOC +/* +** Allocate nBytes of memory. +*/ +static void *winMemMalloc(int nBytes){ + HANDLE hHeap; + void *p; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif + assert( nBytes>=0 ); + p = osHeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes); + if( !p ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapAlloc %u bytes (%lu), heap=%p", + nBytes, osGetLastError(), (void*)hHeap); + } + return p; +} + +/* +** Free memory. +*/ +static void winMemFree(void *pPrior){ + HANDLE hHeap; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ); +#endif + if( !pPrior ) return; /* Passing NULL to HeapFree is undefined. */ + if( !osHeapFree(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapFree block %p (%lu), heap=%p", + pPrior, osGetLastError(), (void*)hHeap); + } +} + +/* +** Change the size of an existing memory allocation +*/ +static void *winMemRealloc(void *pPrior, int nBytes){ + HANDLE hHeap; + void *p; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ); +#endif + assert( nBytes>=0 ); + if( !pPrior ){ + p = osHeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes); + }else{ + p = osHeapReAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior, (SIZE_T)nBytes); + } + if( !p ){ + sqlite3_log(SQLITE_NOMEM, "failed to %s %u bytes (%lu), heap=%p", + pPrior ? "HeapReAlloc" : "HeapAlloc", nBytes, osGetLastError(), + (void*)hHeap); + } + return p; +} + +/* +** Return the size of an outstanding allocation, in bytes. +*/ +static int winMemSize(void *p){ + HANDLE hHeap; + SIZE_T n; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, p) ); +#endif + if( !p ) return 0; + n = osHeapSize(hHeap, SQLITE_WIN32_HEAP_FLAGS, p); + if( n==(SIZE_T)-1 ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapSize block %p (%lu), heap=%p", + p, osGetLastError(), (void*)hHeap); + return 0; + } + return (int)n; +} + +/* +** Round up a request size to the next valid allocation size. +*/ +static int winMemRoundup(int n){ + return n; +} + +/* +** Initialize this module. +*/ +static int winMemInit(void *pAppData){ + winMemData *pWinMemData = (winMemData *)pAppData; + + if( !pWinMemData ) return SQLITE_ERROR; + assert( pWinMemData->magic1==WINMEM_MAGIC1 ); + assert( pWinMemData->magic2==WINMEM_MAGIC2 ); + +#if !SQLITE_OS_WINRT && SQLITE_WIN32_HEAP_CREATE + if( !pWinMemData->hHeap ){ + DWORD dwInitialSize = SQLITE_WIN32_HEAP_INIT_SIZE; + DWORD dwMaximumSize = (DWORD)sqlite3GlobalConfig.nHeap; + if( dwMaximumSize==0 ){ + dwMaximumSize = SQLITE_WIN32_HEAP_MAX_SIZE; + }else if( dwInitialSize>dwMaximumSize ){ + dwInitialSize = dwMaximumSize; + } + pWinMemData->hHeap = osHeapCreate(SQLITE_WIN32_HEAP_FLAGS, + dwInitialSize, dwMaximumSize); + if( !pWinMemData->hHeap ){ + sqlite3_log(SQLITE_NOMEM, + "failed to HeapCreate (%lu), flags=%u, initSize=%lu, maxSize=%lu", + osGetLastError(), SQLITE_WIN32_HEAP_FLAGS, dwInitialSize, + dwMaximumSize); + return SQLITE_NOMEM; + } + pWinMemData->bOwned = TRUE; + assert( pWinMemData->bOwned ); + } +#else + pWinMemData->hHeap = osGetProcessHeap(); + if( !pWinMemData->hHeap ){ + sqlite3_log(SQLITE_NOMEM, + "failed to GetProcessHeap (%lu)", osGetLastError()); + return SQLITE_NOMEM; + } + pWinMemData->bOwned = FALSE; + assert( !pWinMemData->bOwned ); +#endif + assert( pWinMemData->hHeap!=0 ); + assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif + return SQLITE_OK; +} + +/* +** Deinitialize this module. +*/ +static void winMemShutdown(void *pAppData){ + winMemData *pWinMemData = (winMemData *)pAppData; + + if( !pWinMemData ) return; + assert( pWinMemData->magic1==WINMEM_MAGIC1 ); + assert( pWinMemData->magic2==WINMEM_MAGIC2 ); + + if( pWinMemData->hHeap ){ + assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE ); +#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) + assert( osHeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif + if( pWinMemData->bOwned ){ + if( !osHeapDestroy(pWinMemData->hHeap) ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapDestroy (%lu), heap=%p", + osGetLastError(), (void*)pWinMemData->hHeap); + } + pWinMemData->bOwned = FALSE; + } + pWinMemData->hHeap = NULL; + } +} + +/* +** Populate the low-level memory allocation function pointers in +** sqlite3GlobalConfig.m with pointers to the routines in this file. The +** arguments specify the block of memory to manage. +** +** This routine is only called by sqlite3_config(), and therefore +** is not required to be threadsafe (it is not). +*/ +SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetWin32(void){ + static const sqlite3_mem_methods winMemMethods = { + winMemMalloc, + winMemFree, + winMemRealloc, + winMemSize, + winMemRoundup, + winMemInit, + winMemShutdown, + &win_mem_data + }; + return &winMemMethods; +} + +SQLITE_PRIVATE void sqlite3MemSetDefault(void){ + sqlite3_config(SQLITE_CONFIG_MALLOC, sqlite3MemGetWin32()); +} +#endif /* SQLITE_WIN32_MALLOC */ + +/* +** Convert a UTF-8 string to Microsoft Unicode (UTF-16?). +** +** Space to hold the returned string is obtained from malloc. +*/ +static LPWSTR winUtf8ToUnicode(const char *zFilename){ + int nChar; + LPWSTR zWideFilename; + + nChar = osMultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0); + if( nChar==0 ){ + return 0; + } + zWideFilename = sqlite3MallocZero( nChar*sizeof(zWideFilename[0]) ); + if( zWideFilename==0 ){ + return 0; + } + nChar = osMultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, + nChar); + if( nChar==0 ){ + sqlite3_free(zWideFilename); + zWideFilename = 0; + } + return zWideFilename; +} + +/* +** Convert Microsoft Unicode to UTF-8. Space to hold the returned string is +** obtained from sqlite3_malloc(). +*/ +static char *winUnicodeToUtf8(LPCWSTR zWideFilename){ + int nByte; + char *zFilename; + + nByte = osWideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0); + if( nByte == 0 ){ + return 0; + } + zFilename = sqlite3MallocZero( nByte ); + if( zFilename==0 ){ + return 0; + } + nByte = osWideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nByte, + 0, 0); + if( nByte == 0 ){ + sqlite3_free(zFilename); + zFilename = 0; + } + return zFilename; +} + +/* +** Convert an ANSI string to Microsoft Unicode, based on the +** current codepage settings for file apis. +** +** Space to hold the returned string is obtained +** from sqlite3_malloc. +*/ +static LPWSTR winMbcsToUnicode(const char *zFilename){ + int nByte; + LPWSTR zMbcsFilename; + int codepage = osAreFileApisANSI() ? CP_ACP : CP_OEMCP; + + nByte = osMultiByteToWideChar(codepage, 0, zFilename, -1, NULL, + 0)*sizeof(WCHAR); + if( nByte==0 ){ + return 0; + } + zMbcsFilename = sqlite3MallocZero( nByte*sizeof(zMbcsFilename[0]) ); + if( zMbcsFilename==0 ){ + return 0; + } + nByte = osMultiByteToWideChar(codepage, 0, zFilename, -1, zMbcsFilename, + nByte); + if( nByte==0 ){ + sqlite3_free(zMbcsFilename); + zMbcsFilename = 0; + } + return zMbcsFilename; +} + +/* +** Convert Microsoft Unicode to multi-byte character string, based on the +** user's ANSI codepage. +** +** Space to hold the returned string is obtained from +** sqlite3_malloc(). +*/ +static char *winUnicodeToMbcs(LPCWSTR zWideFilename){ + int nByte; + char *zFilename; + int codepage = osAreFileApisANSI() ? CP_ACP : CP_OEMCP; + + nByte = osWideCharToMultiByte(codepage, 0, zWideFilename, -1, 0, 0, 0, 0); + if( nByte == 0 ){ + return 0; + } + zFilename = sqlite3MallocZero( nByte ); + if( zFilename==0 ){ + return 0; + } + nByte = osWideCharToMultiByte(codepage, 0, zWideFilename, -1, zFilename, + nByte, 0, 0); + if( nByte == 0 ){ + sqlite3_free(zFilename); + zFilename = 0; + } + return zFilename; +} + +/* +** Convert multibyte character string to UTF-8. Space to hold the +** returned string is obtained from sqlite3_malloc(). +*/ +SQLITE_PRIVATE char *sqlite3_win32_mbcs_to_utf8(const char *zFilename){ + char *zFilenameUtf8; + LPWSTR zTmpWide; + + zTmpWide = winMbcsToUnicode(zFilename); + if( zTmpWide==0 ){ + return 0; + } + zFilenameUtf8 = winUnicodeToUtf8(zTmpWide); + sqlite3_free(zTmpWide); + return zFilenameUtf8; +} + +/* +** Convert UTF-8 to multibyte character string. Space to hold the +** returned string is obtained from sqlite3_malloc(). +*/ +SQLITE_PRIVATE char *sqlite3_win32_utf8_to_mbcs(const char *zFilename){ + char *zFilenameMbcs; + LPWSTR zTmpWide; + + zTmpWide = winUtf8ToUnicode(zFilename); + if( zTmpWide==0 ){ + return 0; + } + zFilenameMbcs = winUnicodeToMbcs(zTmpWide); + sqlite3_free(zTmpWide); + return zFilenameMbcs; +} + +/* +** This function sets the data directory or the temporary directory based on +** the provided arguments. The type argument must be 1 in order to set the +** data directory or 2 in order to set the temporary directory. The zValue +** argument is the name of the directory to use. The return value will be +** SQLITE_OK if successful. +*/ +SQLITE_PRIVATE int sqlite3_win32_set_directory(DWORD type, LPCWSTR zValue){ + char **ppDirectory = 0; +#ifndef SQLITE_OMIT_AUTOINIT + int rc = sqlite3BtreeInitialize(); + if( rc ) return rc; +#endif + if( type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE ){ + ppDirectory = &sqlite3_temp_directory; + } + assert( !ppDirectory || type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE + ); + assert( !ppDirectory || sqlite3MemdebugHasType(*ppDirectory, MEMTYPE_HEAP) ); + if( ppDirectory ){ + char *zValueUtf8 = 0; + if( zValue && zValue[0] ){ + zValueUtf8 = winUnicodeToUtf8(zValue); + if ( zValueUtf8==0 ){ + return SQLITE_NOMEM; + } + } + sqlite3_free(*ppDirectory); + *ppDirectory = zValueUtf8; + return SQLITE_OK; + } + return SQLITE_ERROR; +} + +/* +** The return value of winGetLastErrorMsg +** is zero if the error message fits in the buffer, or non-zero +** otherwise (if the message was truncated). +*/ +static int winGetLastErrorMsg(DWORD lastErrno, int nBuf, char *zBuf){ + /* FormatMessage returns 0 on failure. Otherwise it + ** returns the number of TCHARs written to the output + ** buffer, excluding the terminating null char. + */ + DWORD dwLen = 0; + char *zOut = 0; + + if( osIsNT() ){ +#if SQLITE_OS_WINRT + WCHAR zTempWide[SQLITE_WIN32_MAX_ERRMSG_CHARS+1]; + dwLen = osFormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + lastErrno, + 0, + zTempWide, + SQLITE_WIN32_MAX_ERRMSG_CHARS, + 0); +#else + LPWSTR zTempWide = NULL; + dwLen = osFormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + lastErrno, + 0, + (LPWSTR) &zTempWide, + 0, + 0); +#endif + if( dwLen > 0 ){ + /* allocate a buffer and convert to UTF8 */ + zOut = winUnicodeToUtf8(zTempWide); +#if !SQLITE_OS_WINRT + /* free the system buffer allocated by FormatMessage */ + osLocalFree(zTempWide); +#endif + } + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + char *zTemp = NULL; + dwLen = osFormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + lastErrno, + 0, + (LPSTR) &zTemp, + 0, + 0); + if( dwLen > 0 ){ + /* allocate a buffer and convert to UTF8 */ + zOut = sqlite3_win32_mbcs_to_utf8(zTemp); + /* free the system buffer allocated by FormatMessage */ + osLocalFree(zTemp); + } + } +#endif + if( 0 == dwLen ){ + sqlite3_snprintf(nBuf, zBuf, "OsError 0x%lx (%lu)", lastErrno, lastErrno); + }else{ + /* copy a maximum of nBuf chars to output buffer */ + sqlite3_snprintf(nBuf, zBuf, "%s", zOut); + /* free the UTF8 buffer */ + sqlite3_free(zOut); + } + return 0; +} + +/* +** +** This function - winLogErrorAtLine() - is only ever called via the macro +** winLogError(). +** +** This routine is invoked after an error occurs in an OS function. +** It logs a message using sqlite3_log() containing the current value of +** error code and, if possible, the human-readable equivalent from +** FormatMessage. +** +** The first argument passed to the macro should be the error code that +** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). +** The two subsequent arguments should be the name of the OS function that +** failed and the associated file-system path, if any. +*/ +#define winLogError(a,b,c,d) winLogErrorAtLine(a,b,c,d,__LINE__) +static int winLogErrorAtLine( + int errcode, /* SQLite error code */ + DWORD lastErrno, /* Win32 last error */ + const char *zFunc, /* Name of OS function that failed */ + const char *zPath, /* File path associated with error */ + int iLine /* Source line number where error occurred */ +){ + char zMsg[500]; /* Human readable error text */ + int i; /* Loop counter */ + + zMsg[0] = 0; + winGetLastErrorMsg(lastErrno, sizeof(zMsg), zMsg); + assert( errcode!=SQLITE_OK ); + if( zPath==0 ) zPath = ""; + for(i=0; zMsg[i] && zMsg[i]!='\r' && zMsg[i]!='\n'; i++){} + zMsg[i] = 0; + sqlite3_log(errcode, + "os_win.c:%d: (%lu) %s(%s) - %s", + iLine, lastErrno, zFunc, zPath, zMsg + ); + + return errcode; +} + +/* +** The number of times that a ReadFile(), WriteFile(), and DeleteFile() +** will be retried following a locking error - probably caused by +** antivirus software. Also the initial delay before the first retry. +** The delay increases linearly with each retry. +*/ +#ifndef SQLITE_WIN32_IOERR_RETRY +# define SQLITE_WIN32_IOERR_RETRY 10 +#endif +#ifndef SQLITE_WIN32_IOERR_RETRY_DELAY +# define SQLITE_WIN32_IOERR_RETRY_DELAY 25 +#endif +static int winIoerrRetry = SQLITE_WIN32_IOERR_RETRY; +static int winIoerrRetryDelay = SQLITE_WIN32_IOERR_RETRY_DELAY; + +/* +** The "winIoerrCanRetry1" macro is used to determine if a particular I/O +** error code obtained via GetLastError() is eligible to be retried. It +** must accept the error code DWORD as its only argument and should return +** non-zero if the error code is transient in nature and the operation +** responsible for generating the original error might succeed upon being +** retried. The argument to this macro should be a variable. +** +** Additionally, a macro named "winIoerrCanRetry2" may be defined. If it +** is defined, it will be consulted only when the macro "winIoerrCanRetry1" +** returns zero. The "winIoerrCanRetry2" macro is completely optional and +** may be used to include additional error codes in the set that should +** result in the failing I/O operation being retried by the caller. If +** defined, the "winIoerrCanRetry2" macro must exhibit external semantics +** identical to those of the "winIoerrCanRetry1" macro. +*/ +#if !defined(winIoerrCanRetry1) +#define winIoerrCanRetry1(a) (((a)==ERROR_ACCESS_DENIED) || \ + ((a)==ERROR_SHARING_VIOLATION) || \ + ((a)==ERROR_LOCK_VIOLATION) || \ + ((a)==ERROR_DEV_NOT_EXIST) || \ + ((a)==ERROR_NETNAME_DELETED) || \ + ((a)==ERROR_SEM_TIMEOUT) || \ + ((a)==ERROR_NETWORK_UNREACHABLE)) +#endif + +/* +** If a ReadFile() or WriteFile() error occurs, invoke this routine +** to see if it should be retried. Return TRUE to retry. Return FALSE +** to give up with an error. +*/ +static int winRetryIoerr(int *pnRetry, DWORD *pError){ + DWORD e = osGetLastError(); + if( *pnRetry>=winIoerrRetry ){ + if( pError ){ + *pError = e; + } + return 0; + } + if( winIoerrCanRetry1(e) ){ + sqlite3_win32_sleep(winIoerrRetryDelay*(1+*pnRetry)); + ++*pnRetry; + return 1; + } +#if defined(winIoerrCanRetry2) + else if( winIoerrCanRetry2(e) ){ + sqlite3_win32_sleep(winIoerrRetryDelay*(1+*pnRetry)); + ++*pnRetry; + return 1; + } +#endif + if( pError ){ + *pError = e; + } + return 0; +} + +/* +** Log a I/O error retry episode. +*/ +static void winLogIoerr(int nRetry, int lineno){ + if( nRetry ){ + sqlite3_log(SQLITE_NOTICE, + "delayed %dms for lock/sharing conflict at line %d", + winIoerrRetryDelay*nRetry*(nRetry+1)/2, lineno + ); + } +} + +#if SQLITE_OS_WINCE +/************************************************************************* +** This section contains code for WinCE only. +*/ +#if !defined(SQLITE_MSVC_LOCALTIME_API) || !SQLITE_MSVC_LOCALTIME_API +/* +** The MSVC CRT on Windows CE may not have a localtime() function. So +** create a substitute. +*/ +/* #include <time.h> */ +struct tm *__cdecl localtime(const time_t *t) +{ + static struct tm y; + FILETIME uTm, lTm; + SYSTEMTIME pTm; + sqlite3_int64 t64; + t64 = *t; + t64 = (t64 + 11644473600)*10000000; + uTm.dwLowDateTime = (DWORD)(t64 & 0xFFFFFFFF); + uTm.dwHighDateTime= (DWORD)(t64 >> 32); + osFileTimeToLocalFileTime(&uTm,&lTm); + osFileTimeToSystemTime(&lTm,&pTm); + y.tm_year = pTm.wYear - 1900; + y.tm_mon = pTm.wMonth - 1; + y.tm_wday = pTm.wDayOfWeek; + y.tm_mday = pTm.wDay; + y.tm_hour = pTm.wHour; + y.tm_min = pTm.wMinute; + y.tm_sec = pTm.wSecond; + return &y; +} +#endif + +#define HANDLE_TO_WINFILE(a) (winFile*)&((char*)a)[-(int)offsetof(winFile,h)] + +/* +** Acquire a lock on the handle h +*/ +static void winceMutexAcquire(HANDLE h){ + DWORD dwErr; + do { + dwErr = osWaitForSingleObject(h, INFINITE); + } while (dwErr != WAIT_OBJECT_0 && dwErr != WAIT_ABANDONED); +} +/* +** Release a lock acquired by winceMutexAcquire() +*/ +#define winceMutexRelease(h) ReleaseMutex(h) + +/* +** Create the mutex and shared memory used for locking in the file +** descriptor pFile +*/ +static int winceCreateLock(const char *zFilename, winFile *pFile){ + LPWSTR zTok; + LPWSTR zName; + DWORD lastErrno; + BOOL bLogged = FALSE; + BOOL bInit = TRUE; + + zName = winUtf8ToUnicode(zFilename); + if( zName==0 ){ + /* out of memory */ + return SQLITE_IOERR_NOMEM; + } + + /* Initialize the local lockdata */ + memset(&pFile->local, 0, sizeof(pFile->local)); + + /* Replace the backslashes from the filename and lowercase it + ** to derive a mutex name. */ + zTok = osCharLowerW(zName); + for (;*zTok;zTok++){ + if (*zTok == '\\') *zTok = '_'; + } + + /* Create/open the named mutex */ + pFile->hMutex = osCreateMutexW(NULL, FALSE, zName); + if (!pFile->hMutex){ + pFile->lastErrno = osGetLastError(); + sqlite3_free(zName); + return winLogError(SQLITE_IOERR, pFile->lastErrno, + "winceCreateLock1", zFilename); + } + + /* Acquire the mutex before continuing */ + winceMutexAcquire(pFile->hMutex); + + /* Since the names of named mutexes, semaphores, file mappings etc are + ** case-sensitive, take advantage of that by uppercasing the mutex name + ** and using that as the shared filemapping name. + */ + osCharUpperW(zName); + pFile->hShared = osCreateFileMappingW(INVALID_HANDLE_VALUE, NULL, + PAGE_READWRITE, 0, sizeof(winceLock), + zName); + + /* Set a flag that indicates we're the first to create the memory so it + ** must be zero-initialized */ + lastErrno = osGetLastError(); + if (lastErrno == ERROR_ALREADY_EXISTS){ + bInit = FALSE; + } + + sqlite3_free(zName); + + /* If we succeeded in making the shared memory handle, map it. */ + if( pFile->hShared ){ + pFile->shared = (winceLock*)osMapViewOfFile(pFile->hShared, + FILE_MAP_READ|FILE_MAP_WRITE, 0, 0, sizeof(winceLock)); + /* If mapping failed, close the shared memory handle and erase it */ + if( !pFile->shared ){ + pFile->lastErrno = osGetLastError(); + winLogError(SQLITE_IOERR, pFile->lastErrno, + "winceCreateLock2", zFilename); + bLogged = TRUE; + osCloseHandle(pFile->hShared); + pFile->hShared = NULL; + } + } + + /* If shared memory could not be created, then close the mutex and fail */ + if( pFile->hShared==NULL ){ + if( !bLogged ){ + pFile->lastErrno = lastErrno; + winLogError(SQLITE_IOERR, pFile->lastErrno, + "winceCreateLock3", zFilename); + bLogged = TRUE; + } + winceMutexRelease(pFile->hMutex); + osCloseHandle(pFile->hMutex); + pFile->hMutex = NULL; + return SQLITE_IOERR; + } + + /* Initialize the shared memory if we're supposed to */ + if( bInit ){ + memset(pFile->shared, 0, sizeof(winceLock)); + } + + winceMutexRelease(pFile->hMutex); + return SQLITE_OK; +} + +/* +** Destroy the part of winFile that deals with wince locks +*/ +static void winceDestroyLock(winFile *pFile){ + if (pFile->hMutex){ + /* Acquire the mutex */ + winceMutexAcquire(pFile->hMutex); + + /* The following blocks should probably assert in debug mode, but they + are to cleanup in case any locks remained open */ + if (pFile->local.nReaders){ + pFile->shared->nReaders --; + } + if (pFile->local.bReserved){ + pFile->shared->bReserved = FALSE; + } + if (pFile->local.bPending){ + pFile->shared->bPending = FALSE; + } + if (pFile->local.bExclusive){ + pFile->shared->bExclusive = FALSE; + } + + /* De-reference and close our copy of the shared memory handle */ + osUnmapViewOfFile(pFile->shared); + osCloseHandle(pFile->hShared); + + /* Done with the mutex */ + winceMutexRelease(pFile->hMutex); + osCloseHandle(pFile->hMutex); + pFile->hMutex = NULL; + } +} + +/* +** An implementation of the LockFile() API of Windows for CE +*/ +static BOOL winceLockFile( + LPHANDLE phFile, + DWORD dwFileOffsetLow, + DWORD dwFileOffsetHigh, + DWORD nNumberOfBytesToLockLow, + DWORD nNumberOfBytesToLockHigh +){ + winFile *pFile = HANDLE_TO_WINFILE(phFile); + BOOL bReturn = FALSE; + + UNUSED_PARAMETER(dwFileOffsetHigh); + UNUSED_PARAMETER(nNumberOfBytesToLockHigh); + + if (!pFile->hMutex) return TRUE; + winceMutexAcquire(pFile->hMutex); + + /* Wanting an exclusive lock? */ + if (dwFileOffsetLow == (DWORD)SHARED_FIRST + && nNumberOfBytesToLockLow == (DWORD)SHARED_SIZE){ + if (pFile->shared->nReaders == 0 && pFile->shared->bExclusive == 0){ + pFile->shared->bExclusive = TRUE; + pFile->local.bExclusive = TRUE; + bReturn = TRUE; + } + } + + /* Want a read-only lock? */ + else if (dwFileOffsetLow == (DWORD)SHARED_FIRST && + nNumberOfBytesToLockLow == 1){ + if (pFile->shared->bExclusive == 0){ + pFile->local.nReaders ++; + if (pFile->local.nReaders == 1){ + pFile->shared->nReaders ++; + } + bReturn = TRUE; + } + } + + /* Want a pending lock? */ + else if (dwFileOffsetLow == (DWORD)PENDING_BYTE + && nNumberOfBytesToLockLow == 1){ + /* If no pending lock has been acquired, then acquire it */ + if (pFile->shared->bPending == 0) { + pFile->shared->bPending = TRUE; + pFile->local.bPending = TRUE; + bReturn = TRUE; + } + } + + /* Want a reserved lock? */ + else if (dwFileOffsetLow == (DWORD)RESERVED_BYTE + && nNumberOfBytesToLockLow == 1){ + if (pFile->shared->bReserved == 0) { + pFile->shared->bReserved = TRUE; + pFile->local.bReserved = TRUE; + bReturn = TRUE; + } + } + + winceMutexRelease(pFile->hMutex); + return bReturn; +} + +/* +** An implementation of the UnlockFile API of Windows for CE +*/ +static BOOL winceUnlockFile( + LPHANDLE phFile, + DWORD dwFileOffsetLow, + DWORD dwFileOffsetHigh, + DWORD nNumberOfBytesToUnlockLow, + DWORD nNumberOfBytesToUnlockHigh +){ + winFile *pFile = HANDLE_TO_WINFILE(phFile); + BOOL bReturn = FALSE; + + UNUSED_PARAMETER(dwFileOffsetHigh); + UNUSED_PARAMETER(nNumberOfBytesToUnlockHigh); + + if (!pFile->hMutex) return TRUE; + winceMutexAcquire(pFile->hMutex); + + /* Releasing a reader lock or an exclusive lock */ + if (dwFileOffsetLow == (DWORD)SHARED_FIRST){ + /* Did we have an exclusive lock? */ + if (pFile->local.bExclusive){ + assert(nNumberOfBytesToUnlockLow == (DWORD)SHARED_SIZE); + pFile->local.bExclusive = FALSE; + pFile->shared->bExclusive = FALSE; + bReturn = TRUE; + } + + /* Did we just have a reader lock? */ + else if (pFile->local.nReaders){ + assert(nNumberOfBytesToUnlockLow == (DWORD)SHARED_SIZE + || nNumberOfBytesToUnlockLow == 1); + pFile->local.nReaders --; + if (pFile->local.nReaders == 0) + { + pFile->shared->nReaders --; + } + bReturn = TRUE; + } + } + + /* Releasing a pending lock */ + else if (dwFileOffsetLow == (DWORD)PENDING_BYTE + && nNumberOfBytesToUnlockLow == 1){ + if (pFile->local.bPending){ + pFile->local.bPending = FALSE; + pFile->shared->bPending = FALSE; + bReturn = TRUE; + } + } + /* Releasing a reserved lock */ + else if (dwFileOffsetLow == (DWORD)RESERVED_BYTE + && nNumberOfBytesToUnlockLow == 1){ + if (pFile->local.bReserved) { + pFile->local.bReserved = FALSE; + pFile->shared->bReserved = FALSE; + bReturn = TRUE; + } + } + + winceMutexRelease(pFile->hMutex); + return bReturn; +} +/* +** End of the special code for wince +*****************************************************************************/ +#endif /* SQLITE_OS_WINCE */ + +/* +** Lock a file region. +*/ +static BOOL winLockFile( + LPHANDLE phFile, + DWORD flags, + DWORD offsetLow, + DWORD offsetHigh, + DWORD numBytesLow, + DWORD numBytesHigh +){ +#if SQLITE_OS_WINCE + /* + ** NOTE: Windows CE is handled differently here due its lack of the Win32 + ** API LockFile. + */ + return winceLockFile(phFile, offsetLow, offsetHigh, + numBytesLow, numBytesHigh); +#else + if( osIsNT() ){ + OVERLAPPED ovlp; + memset(&ovlp, 0, sizeof(OVERLAPPED)); + ovlp.Offset = offsetLow; + ovlp.OffsetHigh = offsetHigh; + return osLockFileEx(*phFile, flags, 0, numBytesLow, numBytesHigh, &ovlp); + }else{ + return osLockFile(*phFile, offsetLow, offsetHigh, numBytesLow, + numBytesHigh); + } +#endif +} + +/* +** Unlock a file region. + */ +static BOOL winUnlockFile( + LPHANDLE phFile, + DWORD offsetLow, + DWORD offsetHigh, + DWORD numBytesLow, + DWORD numBytesHigh +){ +#if SQLITE_OS_WINCE + /* + ** NOTE: Windows CE is handled differently here due its lack of the Win32 + ** API UnlockFile. + */ + return winceUnlockFile(phFile, offsetLow, offsetHigh, + numBytesLow, numBytesHigh); +#else + if( osIsNT() ){ + OVERLAPPED ovlp; + memset(&ovlp, 0, sizeof(OVERLAPPED)); + ovlp.Offset = offsetLow; + ovlp.OffsetHigh = offsetHigh; + return osUnlockFileEx(*phFile, 0, numBytesLow, numBytesHigh, &ovlp); + }else{ + return osUnlockFile(*phFile, offsetLow, offsetHigh, numBytesLow, + numBytesHigh); + } +#endif +} + +/***************************************************************************** +** The next group of routines implement the I/O methods specified +** by the sqlite3_io_methods object. +******************************************************************************/ + +/* +** Some Microsoft compilers lack this definition. +*/ +#ifndef INVALID_SET_FILE_POINTER +# define INVALID_SET_FILE_POINTER ((DWORD)-1) +#endif + +/* +** Move the current position of the file handle passed as the first +** argument to offset iOffset within the file. If successful, return 0. +** Otherwise, set pFile->lastErrno and return non-zero. +*/ +static int winSeekFile(winFile *pFile, sqlite3_int64 iOffset){ +#if !SQLITE_OS_WINRT + LONG upperBits; /* Most sig. 32 bits of new offset */ + LONG lowerBits; /* Least sig. 32 bits of new offset */ + DWORD dwRet; /* Value returned by SetFilePointer() */ + DWORD lastErrno; /* Value returned by GetLastError() */ + + OSTRACE(("SEEK file=%p, offset=%lld\n", pFile->h, iOffset)); + + upperBits = (LONG)((iOffset>>32) & 0x7fffffff); + lowerBits = (LONG)(iOffset & 0xffffffff); + + /* API oddity: If successful, SetFilePointer() returns a dword + ** containing the lower 32-bits of the new file-offset. Or, if it fails, + ** it returns INVALID_SET_FILE_POINTER. However according to MSDN, + ** INVALID_SET_FILE_POINTER may also be a valid new offset. So to determine + ** whether an error has actually occurred, it is also necessary to call + ** GetLastError(). + */ + dwRet = osSetFilePointer(pFile->h, lowerBits, &upperBits, FILE_BEGIN); + + if( (dwRet==INVALID_SET_FILE_POINTER + && ((lastErrno = osGetLastError())!=NO_ERROR)) ){ + pFile->lastErrno = lastErrno; + winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno, + "winSeekFile", pFile->zPath); + OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h)); + return 1; + } + + OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h)); + return 0; +#else + /* + ** Same as above, except that this implementation works for WinRT. + */ + + LARGE_INTEGER x; /* The new offset */ + BOOL bRet; /* Value returned by SetFilePointerEx() */ + + x.QuadPart = iOffset; + bRet = osSetFilePointerEx(pFile->h, x, 0, FILE_BEGIN); + + if(!bRet){ + pFile->lastErrno = osGetLastError(); + winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno, + "winSeekFile", pFile->zPath); + OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h)); + return 1; + } + + OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h)); + return 0; +#endif +} + +#if SQLITE_MAX_MMAP_SIZE>0 +/* Forward references to VFS helper methods used for memory mapped files */ +static int winMapfile(winFile*, sqlite3_int64); +static int winUnmapfile(winFile*); +#endif + +/* +** Close a file. +** +** It is reported that an attempt to close a handle might sometimes +** fail. This is a very unreasonable result, but Windows is notorious +** for being unreasonable so I do not doubt that it might happen. If +** the close fails, we pause for 100 milliseconds and try again. As +** many as MX_CLOSE_ATTEMPT attempts to close the handle are made before +** giving up and returning an error. +*/ +#define MX_CLOSE_ATTEMPT 3 +static int winClose(sqlite3_file *id){ + int rc, cnt = 0; + winFile *pFile = (winFile*)id; + + assert( id!=0 ); +#ifndef SQLITE_OMIT_WAL + assert( pFile->pShm==0 ); +#endif + assert( pFile->h!=NULL && pFile->h!=INVALID_HANDLE_VALUE ); + OSTRACE(("CLOSE pid=%lu, pFile=%p, file=%p\n", + osGetCurrentProcessId(), pFile, pFile->h)); + +#if SQLITE_MAX_MMAP_SIZE>0 + winUnmapfile(pFile); +#endif + + do{ + rc = osCloseHandle(pFile->h); + /* SimulateIOError( rc=0; cnt=MX_CLOSE_ATTEMPT; ); */ + }while( rc==0 && ++cnt < MX_CLOSE_ATTEMPT && (sqlite3_win32_sleep(100), 1) ); +#if SQLITE_OS_WINCE +#define WINCE_DELETION_ATTEMPTS 3 + winceDestroyLock(pFile); + if( pFile->zDeleteOnClose ){ + int cnt = 0; + while( + osDeleteFileW(pFile->zDeleteOnClose)==0 + && osGetFileAttributesW(pFile->zDeleteOnClose)!=0xffffffff + && cnt++ < WINCE_DELETION_ATTEMPTS + ){ + sqlite3_win32_sleep(100); /* Wait a little before trying again */ + } + sqlite3_free(pFile->zDeleteOnClose); + } +#endif + if( rc ){ + pFile->h = NULL; + } + OpenCounter(-1); + OSTRACE(("CLOSE pid=%lu, pFile=%p, file=%p, rc=%s\n", + osGetCurrentProcessId(), pFile, pFile->h, rc ? "ok" : "failed")); + return rc ? SQLITE_OK + : winLogError(SQLITE_IOERR_CLOSE, osGetLastError(), + "winClose", pFile->zPath); +} + +/* +** Read data from a file into a buffer. Return SQLITE_OK if all +** bytes were read successfully and SQLITE_IOERR if anything goes +** wrong. +*/ +static int winRead( + sqlite3_file *id, /* File to read from */ + void *pBuf, /* Write content into this buffer */ + int amt, /* Number of bytes to read */ + sqlite3_int64 offset /* Begin reading at this offset */ +){ +#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) + OVERLAPPED overlapped; /* The offset for ReadFile. */ +#endif + winFile *pFile = (winFile*)id; /* file handle */ + DWORD nRead; /* Number of bytes actually read from file */ + int nRetry = 0; /* Number of retrys */ + + assert( id!=0 ); + assert( amt>0 ); + assert( offset>=0 ); + SimulateIOError(return SQLITE_IOERR_READ); + OSTRACE(("READ pid=%lu, pFile=%p, file=%p, buffer=%p, amount=%d, " + "offset=%lld, lock=%d\n", osGetCurrentProcessId(), pFile, + pFile->h, pBuf, amt, offset, pFile->locktype)); + +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this read request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offset<pFile->mmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); + OSTRACE(("READ-MMAP pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; + }else{ + int nCopy = (int)(pFile->mmapSize - offset); + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + +#if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED) + if( winSeekFile(pFile, offset) ){ + OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_FULL\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_FULL; + } + while( !osReadFile(pFile->h, pBuf, amt, &nRead, 0) ){ +#else + memset(&overlapped, 0, sizeof(OVERLAPPED)); + overlapped.Offset = (LONG)(offset & 0xffffffff); + overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff); + while( !osReadFile(pFile->h, pBuf, amt, &nRead, &overlapped) && + osGetLastError()!=ERROR_HANDLE_EOF ){ +#endif + DWORD lastErrno; + if( winRetryIoerr(&nRetry, &lastErrno) ) continue; + pFile->lastErrno = lastErrno; + OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_READ\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return winLogError(SQLITE_IOERR_READ, pFile->lastErrno, + "winRead", pFile->zPath); + } + winLogIoerr(nRetry, __LINE__); + if( nRead<(DWORD)amt ){ + /* Unread parts of the buffer must be zero-filled */ + memset(&((char*)pBuf)[nRead], 0, amt-nRead); + OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_SHORT_READ\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_IOERR_SHORT_READ; + } + + OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; +} + +/* +** Write data from a buffer into a file. Return SQLITE_OK on success +** or some other error code on failure. +*/ +static int winWrite( + sqlite3_file *id, /* File to write into */ + const void *pBuf, /* The bytes to be written */ + int amt, /* Number of bytes to write */ + sqlite3_int64 offset /* Offset into the file to begin writing at */ +){ + int rc = 0; /* True if error has occurred, else false */ + winFile *pFile = (winFile*)id; /* File handle */ + int nRetry = 0; /* Number of retries */ + + assert( amt>0 ); + assert( pFile ); + SimulateIOError(return SQLITE_IOERR_WRITE); + SimulateDiskfullError(return SQLITE_FULL); + + OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, buffer=%p, amount=%d, " + "offset=%lld, lock=%d\n", osGetCurrentProcessId(), pFile, + pFile->h, pBuf, amt, offset, pFile->locktype)); + +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this write request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offset<pFile->mmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); + OSTRACE(("WRITE-MMAP pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; + }else{ + int nCopy = (int)(pFile->mmapSize - offset); + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + +#if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED) + rc = winSeekFile(pFile, offset); + if( rc==0 ){ +#else + { +#endif +#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) + OVERLAPPED overlapped; /* The offset for WriteFile. */ +#endif + u8 *aRem = (u8 *)pBuf; /* Data yet to be written */ + int nRem = amt; /* Number of bytes yet to be written */ + DWORD nWrite; /* Bytes written by each WriteFile() call */ + DWORD lastErrno = NO_ERROR; /* Value returned by GetLastError() */ + +#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) + memset(&overlapped, 0, sizeof(OVERLAPPED)); + overlapped.Offset = (LONG)(offset & 0xffffffff); + overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff); +#endif + + while( nRem>0 ){ +#if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED) + if( !osWriteFile(pFile->h, aRem, nRem, &nWrite, 0) ){ +#else + if( !osWriteFile(pFile->h, aRem, nRem, &nWrite, &overlapped) ){ +#endif + if( winRetryIoerr(&nRetry, &lastErrno) ) continue; + break; + } + assert( nWrite==0 || nWrite<=(DWORD)nRem ); + if( nWrite==0 || nWrite>(DWORD)nRem ){ + lastErrno = osGetLastError(); + break; + } +#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) + offset += nWrite; + overlapped.Offset = (LONG)(offset & 0xffffffff); + overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff); +#endif + aRem += nWrite; + nRem -= nWrite; + } + if( nRem>0 ){ + pFile->lastErrno = lastErrno; + rc = 1; + } + } + + if( rc ){ + if( ( pFile->lastErrno==ERROR_HANDLE_DISK_FULL ) + || ( pFile->lastErrno==ERROR_DISK_FULL )){ + OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, rc=SQLITE_FULL\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return winLogError(SQLITE_FULL, pFile->lastErrno, + "winWrite1", pFile->zPath); + } + OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_WRITE\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return winLogError(SQLITE_IOERR_WRITE, pFile->lastErrno, + "winWrite2", pFile->zPath); + }else{ + winLogIoerr(nRetry, __LINE__); + } + OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; +} + +/* +** Truncate an open file to a specified size +*/ +static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){ + winFile *pFile = (winFile*)id; /* File handle object */ + int rc = SQLITE_OK; /* Return code for this function */ + DWORD lastErrno; + + assert( pFile ); + SimulateIOError(return SQLITE_IOERR_TRUNCATE); + OSTRACE(("TRUNCATE pid=%lu, pFile=%p, file=%p, size=%lld, lock=%d\n", + osGetCurrentProcessId(), pFile, pFile->h, nByte, pFile->locktype)); + + /* If the user has configured a chunk-size for this file, truncate the + ** file so that it consists of an integer number of chunks (i.e. the + ** actual file size after the operation may be larger than the requested + ** size). + */ + if( pFile->szChunk>0 ){ + nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; + } + + /* SetEndOfFile() returns non-zero when successful, or zero when it fails. */ + if( winSeekFile(pFile, nByte) ){ + rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno, + "winTruncate1", pFile->zPath); + }else if( 0==osSetEndOfFile(pFile->h) && + ((lastErrno = osGetLastError())!=ERROR_USER_MAPPED_FILE) ){ + pFile->lastErrno = lastErrno; + rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno, + "winTruncate2", pFile->zPath); + } + +#if SQLITE_MAX_MMAP_SIZE>0 + /* If the file was truncated to a size smaller than the currently + ** mapped region, reduce the effective mapping size as well. SQLite will + ** use read() and write() to access data beyond this point from now on. + */ + if( pFile->pMapRegion && nByte<pFile->mmapSize ){ + pFile->mmapSize = nByte; + } +#endif + + OSTRACE(("TRUNCATE pid=%lu, pFile=%p, file=%p, rc=%s\n", + osGetCurrentProcessId(), pFile, pFile->h, sqlite3ErrName(rc))); + return rc; +} + +#ifdef SQLITE_TEST +/* +** Count the number of fullsyncs and normal syncs. This is used to test +** that syncs and fullsyncs are occuring at the right times. +*/ +SQLITE_PRIVATE int sqlite3_sync_count = 0; +SQLITE_PRIVATE int sqlite3_fullsync_count = 0; +#endif + +/* +** Make sure all writes to a particular file are committed to disk. +*/ +static int winSync(sqlite3_file *id, int flags){ +#ifndef SQLITE_NO_SYNC + /* + ** Used only when SQLITE_NO_SYNC is not defined. + */ + BOOL rc; +#endif +#if !defined(NDEBUG) || !defined(SQLITE_NO_SYNC) || \ + defined(SQLITE_HAVE_OS_TRACE) + /* + ** Used when SQLITE_NO_SYNC is not defined and by the assert() and/or + ** OSTRACE() macros. + */ + winFile *pFile = (winFile*)id; +#else + UNUSED_PARAMETER(id); +#endif + + assert( pFile ); + /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ + assert((flags&0x0F)==SQLITE_SYNC_NORMAL + || (flags&0x0F)==SQLITE_SYNC_FULL + ); + + /* Unix cannot, but some systems may return SQLITE_FULL from here. This + ** line is to test that doing so does not cause any problems. + */ + SimulateDiskfullError( return SQLITE_FULL ); + + OSTRACE(("SYNC pid=%lu, pFile=%p, file=%p, flags=%x, lock=%d\n", + osGetCurrentProcessId(), pFile, pFile->h, flags, + pFile->locktype)); + +#ifndef SQLITE_TEST + UNUSED_PARAMETER(flags); +#else + if( (flags&0x0F)==SQLITE_SYNC_FULL ){ + sqlite3_fullsync_count++; + } + sqlite3_sync_count++; +#endif + + /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a + ** no-op + */ +#ifdef SQLITE_NO_SYNC + OSTRACE(("SYNC-NOP pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; +#else +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFile->pMapRegion ){ + if( osFlushViewOfFile(pFile->pMapRegion, 0) ){ + OSTRACE(("SYNC-MMAP pid=%lu, pFile=%p, pMapRegion=%p, " + "rc=SQLITE_OK\n", osGetCurrentProcessId(), + pFile, pFile->pMapRegion)); + }else{ + pFile->lastErrno = osGetLastError(); + OSTRACE(("SYNC-MMAP pid=%lu, pFile=%p, pMapRegion=%p, " + "rc=SQLITE_IOERR_MMAP\n", osGetCurrentProcessId(), + pFile, pFile->pMapRegion)); + return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, + "winSync1", pFile->zPath); + } + } +#endif + rc = osFlushFileBuffers(pFile->h); + SimulateIOError( rc=FALSE ); + if( rc ){ + OSTRACE(("SYNC pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return SQLITE_OK; + }else{ + pFile->lastErrno = osGetLastError(); + OSTRACE(("SYNC pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_FSYNC\n", + osGetCurrentProcessId(), pFile, pFile->h)); + return winLogError(SQLITE_IOERR_FSYNC, pFile->lastErrno, + "winSync2", pFile->zPath); + } +#endif +} + +/* +** Determine the current size of a file in bytes +*/ +static int winFileSize(sqlite3_file *id, sqlite3_int64 *pSize){ + winFile *pFile = (winFile*)id; + int rc = SQLITE_OK; + + assert( id!=0 ); + assert( pSize!=0 ); + SimulateIOError(return SQLITE_IOERR_FSTAT); + OSTRACE(("SIZE file=%p, pSize=%p\n", pFile->h, pSize)); + +#if SQLITE_OS_WINRT + { + FILE_STANDARD_INFO info; + if( osGetFileInformationByHandleEx(pFile->h, FileStandardInfo, + &info, sizeof(info)) ){ + *pSize = info.EndOfFile.QuadPart; + }else{ + pFile->lastErrno = osGetLastError(); + rc = winLogError(SQLITE_IOERR_FSTAT, pFile->lastErrno, + "winFileSize", pFile->zPath); + } + } +#else + { + DWORD upperBits; + DWORD lowerBits; + DWORD lastErrno; + + lowerBits = osGetFileSize(pFile->h, &upperBits); + *pSize = (((sqlite3_int64)upperBits)<<32) + lowerBits; + if( (lowerBits == INVALID_FILE_SIZE) + && ((lastErrno = osGetLastError())!=NO_ERROR) ){ + pFile->lastErrno = lastErrno; + rc = winLogError(SQLITE_IOERR_FSTAT, pFile->lastErrno, + "winFileSize", pFile->zPath); + } + } +#endif + OSTRACE(("SIZE file=%p, pSize=%p, *pSize=%lld, rc=%s\n", + pFile->h, pSize, *pSize, sqlite3ErrName(rc))); + return rc; +} + +/* +** LOCKFILE_FAIL_IMMEDIATELY is undefined on some Windows systems. +*/ +#ifndef LOCKFILE_FAIL_IMMEDIATELY +# define LOCKFILE_FAIL_IMMEDIATELY 1 +#endif + +#ifndef LOCKFILE_EXCLUSIVE_LOCK +# define LOCKFILE_EXCLUSIVE_LOCK 2 +#endif + +/* +** Historically, SQLite has used both the LockFile and LockFileEx functions. +** When the LockFile function was used, it was always expected to fail +** immediately if the lock could not be obtained. Also, it always expected to +** obtain an exclusive lock. These flags are used with the LockFileEx function +** and reflect those expectations; therefore, they should not be changed. +*/ +#ifndef SQLITE_LOCKFILE_FLAGS +# define SQLITE_LOCKFILE_FLAGS (LOCKFILE_FAIL_IMMEDIATELY | \ + LOCKFILE_EXCLUSIVE_LOCK) +#endif + +/* +** Currently, SQLite never calls the LockFileEx function without wanting the +** call to fail immediately if the lock cannot be obtained. +*/ +#ifndef SQLITE_LOCKFILEEX_FLAGS +# define SQLITE_LOCKFILEEX_FLAGS (LOCKFILE_FAIL_IMMEDIATELY) +#endif + +/* +** Acquire a reader lock. +** Different API routines are called depending on whether or not this +** is Win9x or WinNT. +*/ +static int winGetReadLock(winFile *pFile){ + int res; + OSTRACE(("READ-LOCK file=%p, lock=%d\n", pFile->h, pFile->locktype)); + if( osIsNT() ){ +#if SQLITE_OS_WINCE + /* + ** NOTE: Windows CE is handled differently here due its lack of the Win32 + ** API LockFileEx. + */ + res = winceLockFile(&pFile->h, SHARED_FIRST, 0, 1, 0); +#else + res = winLockFile(&pFile->h, SQLITE_LOCKFILEEX_FLAGS, SHARED_FIRST, 0, + SHARED_SIZE, 0); +#endif + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + int lk; + sqlite3_randomness(sizeof(lk), &lk); + pFile->sharedLockByte = (short)((lk & 0x7fffffff)%(SHARED_SIZE - 1)); + res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, + SHARED_FIRST+pFile->sharedLockByte, 0, 1, 0); + } +#endif + if( res == 0 ){ + pFile->lastErrno = osGetLastError(); + /* No need to log a failure to lock */ + } + OSTRACE(("READ-LOCK file=%p, result=%d\n", pFile->h, res)); + return res; +} + +/* +** Undo a readlock +*/ +static int winUnlockReadLock(winFile *pFile){ + int res; + DWORD lastErrno; + OSTRACE(("READ-UNLOCK file=%p, lock=%d\n", pFile->h, pFile->locktype)); + if( osIsNT() ){ + res = winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0); + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + res = winUnlockFile(&pFile->h, SHARED_FIRST+pFile->sharedLockByte, 0, 1, 0); + } +#endif + if( res==0 && ((lastErrno = osGetLastError())!=ERROR_NOT_LOCKED) ){ + pFile->lastErrno = lastErrno; + winLogError(SQLITE_IOERR_UNLOCK, pFile->lastErrno, + "winUnlockReadLock", pFile->zPath); + } + OSTRACE(("READ-UNLOCK file=%p, result=%d\n", pFile->h, res)); + return res; +} + +/* +** Lock the file with the lock specified by parameter locktype - one +** of the following: +** +** (1) SHARED_LOCK +** (2) RESERVED_LOCK +** (3) PENDING_LOCK +** (4) EXCLUSIVE_LOCK +** +** Sometimes when requesting one lock state, additional lock states +** are inserted in between. The locking might fail on one of the later +** transitions leaving the lock state different from what it started but +** still short of its goal. The following chart shows the allowed +** transitions and the inserted intermediate states: +** +** UNLOCKED -> SHARED +** SHARED -> RESERVED +** SHARED -> (PENDING) -> EXCLUSIVE +** RESERVED -> (PENDING) -> EXCLUSIVE +** PENDING -> EXCLUSIVE +** +** This routine will only increase a lock. The winUnlock() routine +** erases all locks at once and returns us immediately to locking level 0. +** It is not possible to lower the locking level one step at a time. You +** must go straight to locking level 0. +*/ +static int winLock(sqlite3_file *id, int locktype){ + int rc = SQLITE_OK; /* Return code from subroutines */ + int res = 1; /* Result of a Windows lock call */ + int newLocktype; /* Set pFile->locktype to this value before exiting */ + int gotPendingLock = 0;/* True if we acquired a PENDING lock this time */ + winFile *pFile = (winFile*)id; + DWORD lastErrno = NO_ERROR; + + assert( id!=0 ); + OSTRACE(("LOCK file=%p, oldLock=%d(%d), newLock=%d\n", + pFile->h, pFile->locktype, pFile->sharedLockByte, locktype)); + + /* If there is already a lock of this type or more restrictive on the + ** OsFile, do nothing. Don't use the end_lock: exit path, as + ** sqlite3OsEnterMutex() hasn't been called yet. + */ + if( pFile->locktype>=locktype ){ + OSTRACE(("LOCK-HELD file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + + /* Do not allow any kind of write-lock on a read-only database + */ + if( (pFile->ctrlFlags & WINFILE_RDONLY)!=0 && locktype>=RESERVED_LOCK ){ + return SQLITE_IOERR_LOCK; + } + + /* Make sure the locking sequence is correct + */ + assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK ); + assert( locktype!=PENDING_LOCK ); + assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK ); + + /* Lock the PENDING_LOCK byte if we need to acquire a PENDING lock or + ** a SHARED lock. If we are acquiring a SHARED lock, the acquisition of + ** the PENDING_LOCK byte is temporary. + */ + newLocktype = pFile->locktype; + if( (pFile->locktype==NO_LOCK) + || ( (locktype==EXCLUSIVE_LOCK) + && (pFile->locktype==RESERVED_LOCK)) + ){ + int cnt = 3; + while( cnt-->0 && (res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, + PENDING_BYTE, 0, 1, 0))==0 ){ + /* Try 3 times to get the pending lock. This is needed to work + ** around problems caused by indexing and/or anti-virus software on + ** Windows systems. + ** If you are using this code as a model for alternative VFSes, do not + ** copy this retry logic. It is a hack intended for Windows only. + */ + lastErrno = osGetLastError(); + OSTRACE(("LOCK-PENDING-FAIL file=%p, count=%d, result=%d\n", + pFile->h, cnt, res)); + if( lastErrno==ERROR_INVALID_HANDLE ){ + pFile->lastErrno = lastErrno; + rc = SQLITE_IOERR_LOCK; + OSTRACE(("LOCK-FAIL file=%p, count=%d, rc=%s\n", + pFile->h, cnt, sqlite3ErrName(rc))); + return rc; + } + if( cnt ) sqlite3_win32_sleep(1); + } + gotPendingLock = res; + if( !res ){ + lastErrno = osGetLastError(); + } + } + + /* Acquire a shared lock + */ + if( locktype==SHARED_LOCK && res ){ + assert( pFile->locktype==NO_LOCK ); + res = winGetReadLock(pFile); + if( res ){ + newLocktype = SHARED_LOCK; + }else{ + lastErrno = osGetLastError(); + } + } + + /* Acquire a RESERVED lock + */ + if( locktype==RESERVED_LOCK && res ){ + assert( pFile->locktype==SHARED_LOCK ); + res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, RESERVED_BYTE, 0, 1, 0); + if( res ){ + newLocktype = RESERVED_LOCK; + }else{ + lastErrno = osGetLastError(); + } + } + + /* Acquire a PENDING lock + */ + if( locktype==EXCLUSIVE_LOCK && res ){ + newLocktype = PENDING_LOCK; + gotPendingLock = 0; + } + + /* Acquire an EXCLUSIVE lock + */ + if( locktype==EXCLUSIVE_LOCK && res ){ + assert( pFile->locktype>=SHARED_LOCK ); + res = winUnlockReadLock(pFile); + res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, SHARED_FIRST, 0, + SHARED_SIZE, 0); + if( res ){ + newLocktype = EXCLUSIVE_LOCK; + }else{ + lastErrno = osGetLastError(); + winGetReadLock(pFile); + } + } + + /* If we are holding a PENDING lock that ought to be released, then + ** release it now. + */ + if( gotPendingLock && locktype==SHARED_LOCK ){ + winUnlockFile(&pFile->h, PENDING_BYTE, 0, 1, 0); + } + + /* Update the state of the lock has held in the file descriptor then + ** return the appropriate result code. + */ + if( res ){ + rc = SQLITE_OK; + }else{ + pFile->lastErrno = lastErrno; + rc = SQLITE_BUSY; + OSTRACE(("LOCK-FAIL file=%p, wanted=%d, got=%d\n", + pFile->h, locktype, newLocktype)); + } + pFile->locktype = (u8)newLocktype; + OSTRACE(("LOCK file=%p, lock=%d, rc=%s\n", + pFile->h, pFile->locktype, sqlite3ErrName(rc))); + return rc; +} + +/* +** This routine checks if there is a RESERVED lock held on the specified +** file by this or any other process. If such a lock is held, return +** non-zero, otherwise zero. +*/ +static int winCheckReservedLock(sqlite3_file *id, int *pResOut){ + int res; + winFile *pFile = (winFile*)id; + + SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p\n", pFile->h, pResOut)); + + assert( id!=0 ); + if( pFile->locktype>=RESERVED_LOCK ){ + res = 1; + OSTRACE(("TEST-WR-LOCK file=%p, result=%d (local)\n", pFile->h, res)); + }else{ + res = winLockFile(&pFile->h, SQLITE_LOCKFILEEX_FLAGS,RESERVED_BYTE, 0, 1, 0); + if( res ){ + winUnlockFile(&pFile->h, RESERVED_BYTE, 0, 1, 0); + } + res = !res; + OSTRACE(("TEST-WR-LOCK file=%p, result=%d (remote)\n", pFile->h, res)); + } + *pResOut = res; + OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n", + pFile->h, pResOut, *pResOut)); + return SQLITE_OK; +} + +/* +** Lower the locking level on file descriptor id to locktype. locktype +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +** +** It is not possible for this routine to fail if the second argument +** is NO_LOCK. If the second argument is SHARED_LOCK then this routine +** might return SQLITE_IOERR; +*/ +static int winUnlock(sqlite3_file *id, int locktype){ + int type; + winFile *pFile = (winFile*)id; + int rc = SQLITE_OK; + assert( pFile!=0 ); + assert( locktype<=SHARED_LOCK ); + OSTRACE(("UNLOCK file=%p, oldLock=%d(%d), newLock=%d\n", + pFile->h, pFile->locktype, pFile->sharedLockByte, locktype)); + type = pFile->locktype; + if( type>=EXCLUSIVE_LOCK ){ + winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0); + if( locktype==SHARED_LOCK && !winGetReadLock(pFile) ){ + /* This should never happen. We should always be able to + ** reacquire the read lock */ + rc = winLogError(SQLITE_IOERR_UNLOCK, osGetLastError(), + "winUnlock", pFile->zPath); + } + } + if( type>=RESERVED_LOCK ){ + winUnlockFile(&pFile->h, RESERVED_BYTE, 0, 1, 0); + } + if( locktype==NO_LOCK && type>=SHARED_LOCK ){ + winUnlockReadLock(pFile); + } + if( type>=PENDING_LOCK ){ + winUnlockFile(&pFile->h, PENDING_BYTE, 0, 1, 0); + } + pFile->locktype = (u8)locktype; + OSTRACE(("UNLOCK file=%p, lock=%d, rc=%s\n", + pFile->h, pFile->locktype, sqlite3ErrName(rc))); + return rc; +} + +/* +** If *pArg is initially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void winModeBit(winFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} + +/* Forward references to VFS helper methods used for temporary files */ +static int winGetTempname(sqlite3_vfs *, char **); +static int winIsDir(const void *); +static BOOL winIsDriveLetterAndColon(const char *); + +/* +** Control and query of the open file handle. +*/ +static int winFileControl(sqlite3_file *id, int op, void *pArg){ + winFile *pFile = (winFile*)id; + OSTRACE(("FCNTL file=%p, op=%d, pArg=%p\n", pFile->h, op, pArg)); + switch( op ){ + case SQLITE_FCNTL_LOCKSTATE: { + *(int*)pArg = pFile->locktype; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_LAST_ERRNO: { + *(int*)pArg = (int)pFile->lastErrno; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_CHUNK_SIZE: { + pFile->szChunk = *(int *)pArg; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_SIZE_HINT: { + if( pFile->szChunk>0 ){ + sqlite3_int64 oldSz; + int rc = winFileSize(id, &oldSz); + if( rc==SQLITE_OK ){ + sqlite3_int64 newSz = *(sqlite3_int64*)pArg; + if( newSz>oldSz ){ + SimulateIOErrorBenign(1); + rc = winTruncate(id, newSz); + SimulateIOErrorBenign(0); + } + } + OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); + return rc; + } + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_PERSIST_WAL: { + winModeBit(pFile, WINFILE_PERSIST_WAL, (int*)pArg); + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + winModeBit(pFile, WINFILE_PSOW, (int*)pArg); + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_VFSNAME: { + *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } + case SQLITE_FCNTL_WIN32_AV_RETRY: { + int *a = (int*)pArg; + if( a[0]>0 ){ + winIoerrRetry = a[0]; + }else{ + a[0] = winIoerrRetry; + } + if( a[1]>0 ){ + winIoerrRetryDelay = a[1]; + }else{ + a[1] = winIoerrRetryDelay; + } + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } +#ifdef SQLITE_TEST + case SQLITE_FCNTL_WIN32_SET_HANDLE: { + LPHANDLE phFile = (LPHANDLE)pArg; + HANDLE hOldFile = pFile->h; + pFile->h = *phFile; + *phFile = hOldFile; + OSTRACE(("FCNTL oldFile=%p, newFile=%p, rc=SQLITE_OK\n", + hOldFile, pFile->h)); + return SQLITE_OK; + } +#endif + case SQLITE_FCNTL_TEMPFILENAME: { + char *zTFile = 0; + int rc = winGetTempname(pFile->pVfs, &zTFile); + if( rc==SQLITE_OK ){ + *(char**)pArg = zTFile; + } + OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); + return rc; + } +#if SQLITE_MAX_MMAP_SIZE>0 + case SQLITE_FCNTL_MMAP_SIZE: { + i64 newLimit = *(i64*)pArg; + int rc = SQLITE_OK; + if( newLimit>sqlite3GlobalConfig.mxMmap ){ + newLimit = sqlite3GlobalConfig.mxMmap; + } + *(i64*)pArg = pFile->mmapSizeMax; + if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ + pFile->mmapSizeMax = newLimit; + if( pFile->mmapSize>0 ){ + winUnmapfile(pFile); + rc = winMapfile(pFile, -1); + } + } + OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); + return rc; + } +#endif + } + OSTRACE(("FCNTL file=%p, rc=SQLITE_NOTFOUND\n", pFile->h)); + return SQLITE_NOTFOUND; +} + +/* +** Return the sector size in bytes of the underlying block device for +** the specified file. This is almost always 512 bytes, but may be +** larger for some devices. +** +** SQLite code assumes this function cannot fail. It also assumes that +** if two files are created in the same file-system directory (i.e. +** a database and its journal file) that the sector size will be the +** same for both. +*/ +static int winSectorSize(sqlite3_file *id){ + (void)id; + return SQLITE_DEFAULT_SECTOR_SIZE; +} + +/* +** Return a vector of device characteristics. +*/ +static int winDeviceCharacteristics(sqlite3_file *id){ + winFile *p = (winFile*)id; + return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | + ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0); +} + +/* +** Windows will only let you create file view mappings +** on allocation size granularity boundaries. +** During sqlite3_os_init() we do a GetSystemInfo() +** to get the granularity size. +*/ +static SYSTEM_INFO winSysInfo; + +#ifndef SQLITE_OMIT_WAL + +/* +** Helper functions to obtain and relinquish the global mutex. The +** global mutex is used to protect the winLockInfo objects used by +** this file, all of which may be shared by multiple threads. +** +** Function winShmMutexHeld() is used to assert() that the global mutex +** is held when required. This function is only used as part of assert() +** statements. e.g. +** +** winShmEnterMutex() +** assert( winShmMutexHeld() ); +** winShmLeaveMutex() +*/ +static void winShmEnterMutex(void){ + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); +} +static void winShmLeaveMutex(void){ + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); +} +#ifndef NDEBUG +static int winShmMutexHeld(void) { + return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); +} +#endif + +/* +** Object used to represent a single file opened and mmapped to provide +** shared memory. When multiple threads all reference the same +** log-summary, each thread has its own winFile object, but they all +** point to a single instance of this object. In other words, each +** log-summary is opened only once per process. +** +** winShmMutexHeld() must be true when creating or destroying +** this object or while reading or writing the following fields: +** +** nRef +** pNext +** +** The following fields are read-only after the object is created: +** +** fid +** zFilename +** +** Either winShmNode.mutex must be held or winShmNode.nRef==0 and +** winShmMutexHeld() is true when reading or writing any other field +** in this structure. +** +*/ +struct winShmNode { + sqlite3_mutex *mutex; /* Mutex to access this object */ + char *zFilename; /* Name of the file */ + winFile hFile; /* File handle from winOpen */ + + int szRegion; /* Size of shared-memory regions */ + int nRegion; /* Size of array apRegion */ + struct ShmRegion { + HANDLE hMap; /* File handle from CreateFileMapping */ + void *pMap; + } *aRegion; + DWORD lastErrno; /* The Windows errno from the last I/O error */ + + int nRef; /* Number of winShm objects pointing to this */ + winShm *pFirst; /* All winShm objects pointing to this */ + winShmNode *pNext; /* Next in list of all winShmNode objects */ +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) + u8 nextShmId; /* Next available winShm.id value */ +#endif +}; + +/* +** A global array of all winShmNode objects. +** +** The winShmMutexHeld() must be true while reading or writing this list. +*/ +static winShmNode *winShmNodeList = 0; + +/* +** Structure used internally by this VFS to record the state of an +** open shared memory connection. +** +** The following fields are initialized when this object is created and +** are read-only thereafter: +** +** winShm.pShmNode +** winShm.id +** +** All other fields are read/write. The winShm.pShmNode->mutex must be held +** while accessing any read/write fields. +*/ +struct winShm { + winShmNode *pShmNode; /* The underlying winShmNode object */ + winShm *pNext; /* Next winShm with the same winShmNode */ + u8 hasMutex; /* True if holding the winShmNode mutex */ + u16 sharedMask; /* Mask of shared locks held */ + u16 exclMask; /* Mask of exclusive locks held */ +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) + u8 id; /* Id of this connection with its winShmNode */ +#endif +}; + +/* +** Constants used for locking +*/ +#define WIN_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ +#define WIN_SHM_DMS (WIN_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ + +/* +** Apply advisory locks for all n bytes beginning at ofst. +*/ +#define _SHM_UNLCK 1 +#define _SHM_RDLCK 2 +#define _SHM_WRLCK 3 +static int winShmSystemLock( + winShmNode *pFile, /* Apply locks to this open shared-memory segment */ + int lockType, /* _SHM_UNLCK, _SHM_RDLCK, or _SHM_WRLCK */ + int ofst, /* Offset to first byte to be locked/unlocked */ + int nByte /* Number of bytes to lock or unlock */ +){ + int rc = 0; /* Result code form Lock/UnlockFileEx() */ + + /* Access to the winShmNode object is serialized by the caller */ + assert( sqlite3_mutex_held(pFile->mutex) || pFile->nRef==0 ); + + OSTRACE(("SHM-LOCK file=%p, lock=%d, offset=%d, size=%d\n", + pFile->hFile.h, lockType, ofst, nByte)); + + /* Release/Acquire the system-level lock */ + if( lockType==_SHM_UNLCK ){ + rc = winUnlockFile(&pFile->hFile.h, ofst, 0, nByte, 0); + }else{ + /* Initialize the locking parameters */ + DWORD dwFlags = LOCKFILE_FAIL_IMMEDIATELY; + if( lockType == _SHM_WRLCK ) dwFlags |= LOCKFILE_EXCLUSIVE_LOCK; + rc = winLockFile(&pFile->hFile.h, dwFlags, ofst, 0, nByte, 0); + } + + if( rc!= 0 ){ + rc = SQLITE_OK; + }else{ + pFile->lastErrno = osGetLastError(); + rc = SQLITE_BUSY; + } + + OSTRACE(("SHM-LOCK file=%p, func=%s, errno=%lu, rc=%s\n", + pFile->hFile.h, (lockType == _SHM_UNLCK) ? "winUnlockFile" : + "winLockFile", pFile->lastErrno, sqlite3ErrName(rc))); + + return rc; +} + +/* Forward references to VFS methods */ +static int winOpen(sqlite3_vfs*,const char*,sqlite3_file*,int,int*); +static int winDelete(sqlite3_vfs *,const char*,int); + +/* +** Purge the winShmNodeList list of all entries with winShmNode.nRef==0. +** +** This is not a VFS shared-memory method; it is a utility function called +** by VFS shared-memory methods. +*/ +static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){ + winShmNode **pp; + winShmNode *p; + assert( winShmMutexHeld() ); + OSTRACE(("SHM-PURGE pid=%lu, deleteFlag=%d\n", + osGetCurrentProcessId(), deleteFlag)); + pp = &winShmNodeList; + while( (p = *pp)!=0 ){ + if( p->nRef==0 ){ + int i; + if( p->mutex ){ sqlite3_mutex_free(p->mutex); } + for(i=0; i<p->nRegion; i++){ + BOOL bRc = osUnmapViewOfFile(p->aRegion[i].pMap); + OSTRACE(("SHM-PURGE-UNMAP pid=%lu, region=%d, rc=%s\n", + osGetCurrentProcessId(), i, bRc ? "ok" : "failed")); + UNUSED_VARIABLE_VALUE(bRc); + bRc = osCloseHandle(p->aRegion[i].hMap); + OSTRACE(("SHM-PURGE-CLOSE pid=%lu, region=%d, rc=%s\n", + osGetCurrentProcessId(), i, bRc ? "ok" : "failed")); + UNUSED_VARIABLE_VALUE(bRc); + } + if( p->hFile.h!=NULL && p->hFile.h!=INVALID_HANDLE_VALUE ){ + SimulateIOErrorBenign(1); + winClose((sqlite3_file *)&p->hFile); + SimulateIOErrorBenign(0); + } + if( deleteFlag ){ + SimulateIOErrorBenign(1); + winDelete(pVfs, p->zFilename, 0); + SimulateIOErrorBenign(0); + } + *pp = p->pNext; + sqlite3_free(p->aRegion); + sqlite3_free(p); + }else{ + pp = &p->pNext; + } + } +} + +/* +** Open the shared-memory area associated with database file pDbFd. +** +** When opening a new shared-memory file, if no other instances of that +** file are currently open, in this process or in other processes, then +** the file must be truncated to zero length or have its header cleared. +*/ +static int winOpenSharedMemory(winFile *pDbFd){ + struct winShm *p; /* The connection to be opened */ + struct winShmNode *pShmNode = 0; /* The underlying mmapped file */ + int rc; /* Result code */ + struct winShmNode *pNew; /* Newly allocated winShmNode */ + int nName; /* Size of zName in bytes */ + + assert( pDbFd->pShm==0 ); /* Not previously opened */ + + /* Allocate space for the new sqlite3_shm object. Also speculatively + ** allocate space for a new winShmNode and filename. + */ + p = sqlite3MallocZero( sizeof(*p) ); + if( p==0 ) return SQLITE_IOERR_NOMEM; + nName = sqlite3Strlen30(pDbFd->zPath); + pNew = sqlite3MallocZero( sizeof(*pShmNode) + nName + 17 ); + if( pNew==0 ){ + sqlite3_free(p); + return SQLITE_IOERR_NOMEM; + } + pNew->zFilename = (char*)&pNew[1]; + sqlite3_snprintf(nName+15, pNew->zFilename, "%s-shm", pDbFd->zPath); + + /* Look to see if there is an existing winShmNode that can be used. + ** If no matching winShmNode currently exists, create a new one. + */ + winShmEnterMutex(); + for(pShmNode = winShmNodeList; pShmNode; pShmNode=pShmNode->pNext){ + /* TBD need to come up with better match here. Perhaps + ** use FILE_ID_BOTH_DIR_INFO Structure. + */ + if( sqlite3StrICmp(pShmNode->zFilename, pNew->zFilename)==0 ) break; + } + if( pShmNode ){ + sqlite3_free(pNew); + }else{ + pShmNode = pNew; + pNew = 0; + ((winFile*)(&pShmNode->hFile))->h = INVALID_HANDLE_VALUE; + pShmNode->pNext = winShmNodeList; + winShmNodeList = pShmNode; + + pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); + if( pShmNode->mutex==0 ){ + rc = SQLITE_IOERR_NOMEM; + goto shm_open_err; + } + + rc = winOpen(pDbFd->pVfs, + pShmNode->zFilename, /* Name of the file (UTF-8) */ + (sqlite3_file*)&pShmNode->hFile, /* File handle here */ + SQLITE_OPEN_WAL | SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, + 0); + if( SQLITE_OK!=rc ){ + goto shm_open_err; + } + + /* Check to see if another process is holding the dead-man switch. + ** If not, truncate the file to zero length. + */ + if( winShmSystemLock(pShmNode, _SHM_WRLCK, WIN_SHM_DMS, 1)==SQLITE_OK ){ + rc = winTruncate((sqlite3_file *)&pShmNode->hFile, 0); + if( rc!=SQLITE_OK ){ + rc = winLogError(SQLITE_IOERR_SHMOPEN, osGetLastError(), + "winOpenShm", pDbFd->zPath); + } + } + if( rc==SQLITE_OK ){ + winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS, 1); + rc = winShmSystemLock(pShmNode, _SHM_RDLCK, WIN_SHM_DMS, 1); + } + if( rc ) goto shm_open_err; + } + + /* Make the new connection a child of the winShmNode */ + p->pShmNode = pShmNode; +#if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) + p->id = pShmNode->nextShmId++; +#endif + pShmNode->nRef++; + pDbFd->pShm = p; + winShmLeaveMutex(); + + /* The reference count on pShmNode has already been incremented under + ** the cover of the winShmEnterMutex() mutex and the pointer from the + ** new (struct winShm) object to the pShmNode has been set. All that is + ** left to do is to link the new object into the linked list starting + ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex + ** mutex. + */ + sqlite3_mutex_enter(pShmNode->mutex); + p->pNext = pShmNode->pFirst; + pShmNode->pFirst = p; + sqlite3_mutex_leave(pShmNode->mutex); + return SQLITE_OK; + + /* Jump here on any error */ +shm_open_err: + winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS, 1); + winShmPurge(pDbFd->pVfs, 0); /* This call frees pShmNode if required */ + sqlite3_free(p); + sqlite3_free(pNew); + winShmLeaveMutex(); + return rc; +} + +/* +** Close a connection to shared-memory. Delete the underlying +** storage if deleteFlag is true. +*/ +static int winShmUnmap( + sqlite3_file *fd, /* Database holding shared memory */ + int deleteFlag /* Delete after closing if true */ +){ + winFile *pDbFd; /* Database holding shared-memory */ + winShm *p; /* The connection to be closed */ + winShmNode *pShmNode; /* The underlying shared-memory file */ + winShm **pp; /* For looping over sibling connections */ + + pDbFd = (winFile*)fd; + p = pDbFd->pShm; + if( p==0 ) return SQLITE_OK; + pShmNode = p->pShmNode; + + /* Remove connection p from the set of connections associated + ** with pShmNode */ + sqlite3_mutex_enter(pShmNode->mutex); + for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} + *pp = p->pNext; + + /* Free the connection p */ + sqlite3_free(p); + pDbFd->pShm = 0; + sqlite3_mutex_leave(pShmNode->mutex); + + /* If pShmNode->nRef has reached 0, then close the underlying + ** shared-memory file, too */ + winShmEnterMutex(); + assert( pShmNode->nRef>0 ); + pShmNode->nRef--; + if( pShmNode->nRef==0 ){ + winShmPurge(pDbFd->pVfs, deleteFlag); + } + winShmLeaveMutex(); + + return SQLITE_OK; +} + +/* +** Change the lock state for a shared-memory segment. +*/ +static int winShmLock( + sqlite3_file *fd, /* Database file holding the shared memory */ + int ofst, /* First lock to acquire or release */ + int n, /* Number of locks to acquire or release */ + int flags /* What to do with the lock */ +){ + winFile *pDbFd = (winFile*)fd; /* Connection holding shared memory */ + winShm *p = pDbFd->pShm; /* The shared memory being locked */ + winShm *pX; /* For looping over all siblings */ + winShmNode *pShmNode = p->pShmNode; + int rc = SQLITE_OK; /* Result code */ + u16 mask; /* Mask of locks to take or release */ + + assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); + assert( n>=1 ); + assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); + assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); + + mask = (u16)((1U<<(ofst+n)) - (1U<<ofst)); + assert( n>1 || mask==(1<<ofst) ); + sqlite3_mutex_enter(pShmNode->mutex); + if( flags & SQLITE_SHM_UNLOCK ){ + u16 allMask = 0; /* Mask of locks held by siblings */ + + /* See if any siblings hold this same lock */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( pX==p ) continue; + assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); + allMask |= pX->sharedMask; + } + + /* Unlock the system-level locks */ + if( (mask & allMask)==0 ){ + rc = winShmSystemLock(pShmNode, _SHM_UNLCK, ofst+WIN_SHM_BASE, n); + }else{ + rc = SQLITE_OK; + } + + /* Undo the local locks */ + if( rc==SQLITE_OK ){ + p->exclMask &= ~mask; + p->sharedMask &= ~mask; + } + }else if( flags & SQLITE_SHM_SHARED ){ + u16 allShared = 0; /* Union of locks held by connections other than "p" */ + + /* Find out which shared locks are already held by sibling connections. + ** If any sibling already holds an exclusive lock, go ahead and return + ** SQLITE_BUSY. + */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( (pX->exclMask & mask)!=0 ){ + rc = SQLITE_BUSY; + break; + } + allShared |= pX->sharedMask; + } + + /* Get shared locks at the system level, if necessary */ + if( rc==SQLITE_OK ){ + if( (allShared & mask)==0 ){ + rc = winShmSystemLock(pShmNode, _SHM_RDLCK, ofst+WIN_SHM_BASE, n); + }else{ + rc = SQLITE_OK; + } + } + + /* Get the local shared locks */ + if( rc==SQLITE_OK ){ + p->sharedMask |= mask; + } + }else{ + /* Make sure no sibling connections hold locks that will block this + ** lock. If any do, return SQLITE_BUSY right away. + */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){ + rc = SQLITE_BUSY; + break; + } + } + + /* Get the exclusive locks at the system level. Then if successful + ** also mark the local connection as being locked. + */ + if( rc==SQLITE_OK ){ + rc = winShmSystemLock(pShmNode, _SHM_WRLCK, ofst+WIN_SHM_BASE, n); + if( rc==SQLITE_OK ){ + assert( (p->sharedMask & mask)==0 ); + p->exclMask |= mask; + } + } + } + sqlite3_mutex_leave(pShmNode->mutex); + OSTRACE(("SHM-LOCK pid=%lu, id=%d, sharedMask=%03x, exclMask=%03x, rc=%s\n", + osGetCurrentProcessId(), p->id, p->sharedMask, p->exclMask, + sqlite3ErrName(rc))); + return rc; +} + +/* +** Implement a memory barrier or memory fence on shared memory. +** +** All loads and stores begun before the barrier must complete before +** any load or store begun after the barrier. +*/ +static void winShmBarrier( + sqlite3_file *fd /* Database holding the shared memory */ +){ + UNUSED_PARAMETER(fd); + sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ + winShmEnterMutex(); /* Also mutex, for redundancy */ + winShmLeaveMutex(); +} + +/* +** This function is called to obtain a pointer to region iRegion of the +** shared-memory associated with the database file fd. Shared-memory regions +** are numbered starting from zero. Each shared-memory region is szRegion +** bytes in size. +** +** If an error occurs, an error code is returned and *pp is set to NULL. +** +** Otherwise, if the isWrite parameter is 0 and the requested shared-memory +** region has not been allocated (by any client, including one running in a +** separate process), then *pp is set to NULL and SQLITE_OK returned. If +** isWrite is non-zero and the requested shared-memory region has not yet +** been allocated, it is allocated by this function. +** +** If the shared-memory region has already been allocated or is allocated by +** this call as described above, then it is mapped into this processes +** address space (if it is not already), *pp is set to point to the mapped +** memory and SQLITE_OK returned. +*/ +static int winShmMap( + sqlite3_file *fd, /* Handle open on database file */ + int iRegion, /* Region to retrieve */ + int szRegion, /* Size of regions */ + int isWrite, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ +){ + winFile *pDbFd = (winFile*)fd; + winShm *pShm = pDbFd->pShm; + winShmNode *pShmNode; + int rc = SQLITE_OK; + + if( !pShm ){ + rc = winOpenSharedMemory(pDbFd); + if( rc!=SQLITE_OK ) return rc; + pShm = pDbFd->pShm; + } + pShmNode = pShm->pShmNode; + + sqlite3_mutex_enter(pShmNode->mutex); + assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); + + if( pShmNode->nRegion<=iRegion ){ + struct ShmRegion *apNew; /* New aRegion[] array */ + int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ + sqlite3_int64 sz; /* Current size of wal-index file */ + + pShmNode->szRegion = szRegion; + + /* The requested region is not mapped into this processes address space. + ** Check to see if it has been allocated (i.e. if the wal-index file is + ** large enough to contain the requested region). + */ + rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz); + if( rc!=SQLITE_OK ){ + rc = winLogError(SQLITE_IOERR_SHMSIZE, osGetLastError(), + "winShmMap1", pDbFd->zPath); + goto shmpage_out; + } + + if( sz<nByte ){ + /* The requested memory region does not exist. If isWrite is set to + ** zero, exit early. *pp will be set to NULL and SQLITE_OK returned. + ** + ** Alternatively, if isWrite is non-zero, use ftruncate() to allocate + ** the requested memory region. + */ + if( !isWrite ) goto shmpage_out; + rc = winTruncate((sqlite3_file *)&pShmNode->hFile, nByte); + if( rc!=SQLITE_OK ){ + rc = winLogError(SQLITE_IOERR_SHMSIZE, osGetLastError(), + "winShmMap2", pDbFd->zPath); + goto shmpage_out; + } + } + + /* Map the requested memory region into this processes address space. */ + apNew = (struct ShmRegion *)sqlite3_realloc64( + pShmNode->aRegion, (iRegion+1)*sizeof(apNew[0]) + ); + if( !apNew ){ + rc = SQLITE_IOERR_NOMEM; + goto shmpage_out; + } + pShmNode->aRegion = apNew; + + while( pShmNode->nRegion<=iRegion ){ + HANDLE hMap = NULL; /* file-mapping handle */ + void *pMap = 0; /* Mapped memory region */ + +#if SQLITE_OS_WINRT + hMap = osCreateFileMappingFromApp(pShmNode->hFile.h, + NULL, PAGE_READWRITE, nByte, NULL + ); +#elif defined(SQLITE_WIN32_HAS_WIDE) + hMap = osCreateFileMappingW(pShmNode->hFile.h, + NULL, PAGE_READWRITE, 0, nByte, NULL + ); +#elif defined(SQLITE_WIN32_HAS_ANSI) + hMap = osCreateFileMappingA(pShmNode->hFile.h, + NULL, PAGE_READWRITE, 0, nByte, NULL + ); +#endif + OSTRACE(("SHM-MAP-CREATE pid=%lu, region=%d, size=%d, rc=%s\n", + osGetCurrentProcessId(), pShmNode->nRegion, nByte, + hMap ? "ok" : "failed")); + if( hMap ){ + int iOffset = pShmNode->nRegion*szRegion; + int iOffsetShift = iOffset % winSysInfo.dwAllocationGranularity; +#if SQLITE_OS_WINRT + pMap = osMapViewOfFileFromApp(hMap, FILE_MAP_WRITE | FILE_MAP_READ, + iOffset - iOffsetShift, szRegion + iOffsetShift + ); +#else + pMap = osMapViewOfFile(hMap, FILE_MAP_WRITE | FILE_MAP_READ, + 0, iOffset - iOffsetShift, szRegion + iOffsetShift + ); +#endif + OSTRACE(("SHM-MAP-MAP pid=%lu, region=%d, offset=%d, size=%d, rc=%s\n", + osGetCurrentProcessId(), pShmNode->nRegion, iOffset, + szRegion, pMap ? "ok" : "failed")); + } + if( !pMap ){ + pShmNode->lastErrno = osGetLastError(); + rc = winLogError(SQLITE_IOERR_SHMMAP, pShmNode->lastErrno, + "winShmMap3", pDbFd->zPath); + if( hMap ) osCloseHandle(hMap); + goto shmpage_out; + } + + pShmNode->aRegion[pShmNode->nRegion].pMap = pMap; + pShmNode->aRegion[pShmNode->nRegion].hMap = hMap; + pShmNode->nRegion++; + } + } + +shmpage_out: + if( pShmNode->nRegion>iRegion ){ + int iOffset = iRegion*szRegion; + int iOffsetShift = iOffset % winSysInfo.dwAllocationGranularity; + char *p = (char *)pShmNode->aRegion[iRegion].pMap; + *pp = (void *)&p[iOffsetShift]; + }else{ + *pp = 0; + } + sqlite3_mutex_leave(pShmNode->mutex); + return rc; +} + +#else +# define winShmMap 0 +# define winShmLock 0 +# define winShmBarrier 0 +# define winShmUnmap 0 +#endif /* #ifndef SQLITE_OMIT_WAL */ + +/* +** Cleans up the mapped region of the specified file, if any. +*/ +#if SQLITE_MAX_MMAP_SIZE>0 +static int winUnmapfile(winFile *pFile){ + assert( pFile!=0 ); + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, pMapRegion=%p, " + "mmapSize=%lld, mmapSizeActual=%lld, mmapSizeMax=%lld\n", + osGetCurrentProcessId(), pFile, pFile->hMap, pFile->pMapRegion, + pFile->mmapSize, pFile->mmapSizeActual, pFile->mmapSizeMax)); + if( pFile->pMapRegion ){ + if( !osUnmapViewOfFile(pFile->pMapRegion) ){ + pFile->lastErrno = osGetLastError(); + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, pMapRegion=%p, " + "rc=SQLITE_IOERR_MMAP\n", osGetCurrentProcessId(), pFile, + pFile->pMapRegion)); + return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, + "winUnmapfile1", pFile->zPath); + } + pFile->pMapRegion = 0; + pFile->mmapSize = 0; + pFile->mmapSizeActual = 0; + } + if( pFile->hMap!=NULL ){ + if( !osCloseHandle(pFile->hMap) ){ + pFile->lastErrno = osGetLastError(); + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, rc=SQLITE_IOERR_MMAP\n", + osGetCurrentProcessId(), pFile, pFile->hMap)); + return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, + "winUnmapfile2", pFile->zPath); + } + pFile->hMap = NULL; + } + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile)); + return SQLITE_OK; +} + +/* +** Memory map or remap the file opened by file-descriptor pFd (if the file +** is already mapped, the existing mapping is replaced by the new). Or, if +** there already exists a mapping for this file, and there are still +** outstanding xFetch() references to it, this function is a no-op. +** +** If parameter nByte is non-negative, then it is the requested size of +** the mapping to create. Otherwise, if nByte is less than zero, then the +** requested size is the size of the file on disk. The actual size of the +** created mapping is either the requested size or the value configured +** using SQLITE_FCNTL_MMAP_SIZE, whichever is smaller. +** +** SQLITE_OK is returned if no error occurs (even if the mapping is not +** recreated as a result of outstanding references) or an SQLite error +** code otherwise. +*/ +static int winMapfile(winFile *pFd, sqlite3_int64 nByte){ + sqlite3_int64 nMap = nByte; + int rc; + + assert( nMap>=0 || pFd->nFetchOut==0 ); + OSTRACE(("MAP-FILE pid=%lu, pFile=%p, size=%lld\n", + osGetCurrentProcessId(), pFd, nByte)); + + if( pFd->nFetchOut>0 ) return SQLITE_OK; + + if( nMap<0 ){ + rc = winFileSize((sqlite3_file*)pFd, &nMap); + if( rc ){ + OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_IOERR_FSTAT\n", + osGetCurrentProcessId(), pFd)); + return SQLITE_IOERR_FSTAT; + } + } + if( nMap>pFd->mmapSizeMax ){ + nMap = pFd->mmapSizeMax; + } + nMap &= ~(sqlite3_int64)(winSysInfo.dwPageSize - 1); + + if( nMap==0 && pFd->mmapSize>0 ){ + winUnmapfile(pFd); + } + if( nMap!=pFd->mmapSize ){ + void *pNew = 0; + DWORD protect = PAGE_READONLY; + DWORD flags = FILE_MAP_READ; + + winUnmapfile(pFd); + if( (pFd->ctrlFlags & WINFILE_RDONLY)==0 ){ + protect = PAGE_READWRITE; + flags |= FILE_MAP_WRITE; + } +#if SQLITE_OS_WINRT + pFd->hMap = osCreateFileMappingFromApp(pFd->h, NULL, protect, nMap, NULL); +#elif defined(SQLITE_WIN32_HAS_WIDE) + pFd->hMap = osCreateFileMappingW(pFd->h, NULL, protect, + (DWORD)((nMap>>32) & 0xffffffff), + (DWORD)(nMap & 0xffffffff), NULL); +#elif defined(SQLITE_WIN32_HAS_ANSI) + pFd->hMap = osCreateFileMappingA(pFd->h, NULL, protect, + (DWORD)((nMap>>32) & 0xffffffff), + (DWORD)(nMap & 0xffffffff), NULL); +#endif + if( pFd->hMap==NULL ){ + pFd->lastErrno = osGetLastError(); + rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno, + "winMapfile1", pFd->zPath); + /* Log the error, but continue normal operation using xRead/xWrite */ + OSTRACE(("MAP-FILE-CREATE pid=%lu, pFile=%p, rc=%s\n", + osGetCurrentProcessId(), pFd, sqlite3ErrName(rc))); + return SQLITE_OK; + } + assert( (nMap % winSysInfo.dwPageSize)==0 ); + assert( sizeof(SIZE_T)==sizeof(sqlite3_int64) || nMap<=0xffffffff ); +#if SQLITE_OS_WINRT + pNew = osMapViewOfFileFromApp(pFd->hMap, flags, 0, (SIZE_T)nMap); +#else + pNew = osMapViewOfFile(pFd->hMap, flags, 0, 0, (SIZE_T)nMap); +#endif + if( pNew==NULL ){ + osCloseHandle(pFd->hMap); + pFd->hMap = NULL; + pFd->lastErrno = osGetLastError(); + rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno, + "winMapfile2", pFd->zPath); + /* Log the error, but continue normal operation using xRead/xWrite */ + OSTRACE(("MAP-FILE-MAP pid=%lu, pFile=%p, rc=%s\n", + osGetCurrentProcessId(), pFd, sqlite3ErrName(rc))); + return SQLITE_OK; + } + pFd->pMapRegion = pNew; + pFd->mmapSize = nMap; + pFd->mmapSizeActual = nMap; + } + + OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFd)); + return SQLITE_OK; +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +/* +** If possible, return a pointer to a mapping of file fd starting at offset +** iOff. The mapping must be valid for at least nAmt bytes. +** +** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. +** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. +** Finally, if an error does occur, return an SQLite error code. The final +** value of *pp is undefined in this case. +** +** If this function does return a pointer, the caller must eventually +** release the reference by calling winUnfetch(). +*/ +static int winFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ +#if SQLITE_MAX_MMAP_SIZE>0 + winFile *pFd = (winFile*)fd; /* The underlying database file */ +#endif + *pp = 0; + + OSTRACE(("FETCH pid=%lu, pFile=%p, offset=%lld, amount=%d, pp=%p\n", + osGetCurrentProcessId(), fd, iOff, nAmt, pp)); + +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFd->mmapSizeMax>0 ){ + if( pFd->pMapRegion==0 ){ + int rc = winMapfile(pFd, -1); + if( rc!=SQLITE_OK ){ + OSTRACE(("FETCH pid=%lu, pFile=%p, rc=%s\n", + osGetCurrentProcessId(), pFd, sqlite3ErrName(rc))); + return rc; + } + } + if( pFd->mmapSize >= iOff+nAmt ){ + *pp = &((u8 *)pFd->pMapRegion)[iOff]; + pFd->nFetchOut++; + } + } +#endif + + OSTRACE(("FETCH pid=%lu, pFile=%p, pp=%p, *pp=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), fd, pp, *pp)); + return SQLITE_OK; +} + +/* +** If the third argument is non-NULL, then this function releases a +** reference obtained by an earlier call to winFetch(). The second +** argument passed to this function must be the same as the corresponding +** argument that was passed to the winFetch() invocation. +** +** Or, if the third argument is NULL, then this function is being called +** to inform the VFS layer that, according to POSIX, any existing mapping +** may now be invalid and should be unmapped. +*/ +static int winUnfetch(sqlite3_file *fd, i64 iOff, void *p){ +#if SQLITE_MAX_MMAP_SIZE>0 + winFile *pFd = (winFile*)fd; /* The underlying database file */ + + /* If p==0 (unmap the entire file) then there must be no outstanding + ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), + ** then there must be at least one outstanding. */ + assert( (p==0)==(pFd->nFetchOut==0) ); + + /* If p!=0, it must match the iOff value. */ + assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); + + OSTRACE(("UNFETCH pid=%lu, pFile=%p, offset=%lld, p=%p\n", + osGetCurrentProcessId(), pFd, iOff, p)); + + if( p ){ + pFd->nFetchOut--; + }else{ + /* FIXME: If Windows truly always prevents truncating or deleting a + ** file while a mapping is held, then the following winUnmapfile() call + ** is unnecessary can be omitted - potentially improving + ** performance. */ + winUnmapfile(pFd); + } + + assert( pFd->nFetchOut>=0 ); +#endif + + OSTRACE(("UNFETCH pid=%lu, pFile=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), fd)); + return SQLITE_OK; +} + +/* +** Here ends the implementation of all sqlite3_file methods. +** +********************** End sqlite3_file Methods ******************************* +******************************************************************************/ + +/* +** This vector defines all the methods that can operate on an +** sqlite3_file for win32. +*/ +static const sqlite3_io_methods winIoMethod = { + 3, /* iVersion */ + winClose, /* xClose */ + winRead, /* xRead */ + winWrite, /* xWrite */ + winTruncate, /* xTruncate */ + winSync, /* xSync */ + winFileSize, /* xFileSize */ + winLock, /* xLock */ + winUnlock, /* xUnlock */ + winCheckReservedLock, /* xCheckReservedLock */ + winFileControl, /* xFileControl */ + winSectorSize, /* xSectorSize */ + winDeviceCharacteristics, /* xDeviceCharacteristics */ + winShmMap, /* xShmMap */ + winShmLock, /* xShmLock */ + winShmBarrier, /* xShmBarrier */ + winShmUnmap, /* xShmUnmap */ + winFetch, /* xFetch */ + winUnfetch /* xUnfetch */ +}; + +/**************************************************************************** +**************************** sqlite3_vfs methods **************************** +** +** This division contains the implementation of methods on the +** sqlite3_vfs object. +*/ + +#if defined(__CYGWIN__) +/* +** Convert a filename from whatever the underlying operating system +** supports for filenames into UTF-8. Space to hold the result is +** obtained from malloc and must be freed by the calling function. +*/ +static char *winConvertToUtf8Filename(const void *zFilename){ + char *zConverted = 0; + if( osIsNT() ){ + zConverted = winUnicodeToUtf8(zFilename); + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + zConverted = sqlite3_win32_mbcs_to_utf8(zFilename); + } +#endif + /* caller will handle out of memory */ + return zConverted; +} +#endif + +/* +** Convert a UTF-8 filename into whatever form the underlying +** operating system wants filenames in. Space to hold the result +** is obtained from malloc and must be freed by the calling +** function. +*/ +static void *winConvertFromUtf8Filename(const char *zFilename){ + void *zConverted = 0; + if( osIsNT() ){ + zConverted = winUtf8ToUnicode(zFilename); + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + zConverted = sqlite3_win32_utf8_to_mbcs(zFilename); + } +#endif + /* caller will handle out of memory */ + return zConverted; +} + +/* +** This function returns non-zero if the specified UTF-8 string buffer +** ends with a directory separator character or one was successfully +** added to it. +*/ +static int winMakeEndInDirSep(int nBuf, char *zBuf){ + if( zBuf ){ + int nLen = sqlite3Strlen30(zBuf); + if( nLen>0 ){ + if( winIsDirSep(zBuf[nLen-1]) ){ + return 1; + }else if( nLen+1<nBuf ){ + zBuf[nLen] = winGetDirSep(); + zBuf[nLen+1] = '\0'; + return 1; + } + } + } + return 0; +} + +/* +** Create a temporary file name and store the resulting pointer into pzBuf. +** The pointer returned in pzBuf must be freed via sqlite3_free(). +*/ +static int winGetTempname(sqlite3_vfs *pVfs, char **pzBuf){ + static char zChars[] = + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789"; + size_t i, j; + int nPre = sqlite3Strlen30(SQLITE_TEMP_FILE_PREFIX); + int nMax, nBuf, nDir, nLen; + char *zBuf; + + /* It's odd to simulate an io-error here, but really this is just + ** using the io-error infrastructure to test that SQLite handles this + ** function failing. + */ + SimulateIOError( return SQLITE_IOERR ); + + /* Allocate a temporary buffer to store the fully qualified file + ** name for the temporary file. If this fails, we cannot continue. + */ + nMax = pVfs->mxPathname; nBuf = nMax + 2; + zBuf = sqlite3MallocZero( nBuf ); + if( !zBuf ){ + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); + return SQLITE_IOERR_NOMEM; + } + + /* Figure out the effective temporary directory. First, check if one + ** has been explicitly set by the application; otherwise, use the one + ** configured by the operating system. + */ + nDir = nMax - (nPre + 15); + assert( nDir>0 ); + if( sqlite3_temp_directory ){ + int nDirLen = sqlite3Strlen30(sqlite3_temp_directory); + if( nDirLen>0 ){ + if( !winIsDirSep(sqlite3_temp_directory[nDirLen-1]) ){ + nDirLen++; + } + if( nDirLen>nDir ){ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n")); + return winLogError(SQLITE_ERROR, 0, "winGetTempname1", 0); + } + sqlite3_snprintf(nMax, zBuf, "%s", sqlite3_temp_directory); + } + } +#if defined(__CYGWIN__) + else{ + static const char *azDirs[] = { + 0, /* getenv("SQLITE_TMPDIR") */ + 0, /* getenv("TMPDIR") */ + 0, /* getenv("TMP") */ + 0, /* getenv("TEMP") */ + 0, /* getenv("USERPROFILE") */ + "/var/tmp", + "/usr/tmp", + "/tmp", + ".", + 0 /* List terminator */ + }; + unsigned int i; + const char *zDir = 0; + + if( !azDirs[0] ) azDirs[0] = getenv("SQLITE_TMPDIR"); + if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR"); + if( !azDirs[2] ) azDirs[2] = getenv("TMP"); + if( !azDirs[3] ) azDirs[3] = getenv("TEMP"); + if( !azDirs[4] ) azDirs[4] = getenv("USERPROFILE"); + for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){ + void *zConverted; + if( zDir==0 ) continue; + /* If the path starts with a drive letter followed by the colon + ** character, assume it is already a native Win32 path; otherwise, + ** it must be converted to a native Win32 path via the Cygwin API + ** prior to using it. + */ + if( winIsDriveLetterAndColon(zDir) ){ + zConverted = winConvertFromUtf8Filename(zDir); + if( !zConverted ){ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); + return SQLITE_IOERR_NOMEM; + } + if( winIsDir(zConverted) ){ + sqlite3_snprintf(nMax, zBuf, "%s", zDir); + sqlite3_free(zConverted); + break; + } + sqlite3_free(zConverted); + }else{ + zConverted = sqlite3MallocZero( nMax+1 ); + if( !zConverted ){ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); + return SQLITE_IOERR_NOMEM; + } + if( cygwin_conv_path( + osIsNT() ? CCP_POSIX_TO_WIN_W : CCP_POSIX_TO_WIN_A, zDir, + zConverted, nMax+1)<0 ){ + sqlite3_free(zConverted); + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_CONVPATH\n")); + return winLogError(SQLITE_IOERR_CONVPATH, (DWORD)errno, + "winGetTempname2", zDir); + } + if( winIsDir(zConverted) ){ + /* At this point, we know the candidate directory exists and should + ** be used. However, we may need to convert the string containing + ** its name into UTF-8 (i.e. if it is UTF-16 right now). + */ + char *zUtf8 = winConvertToUtf8Filename(zConverted); + if( !zUtf8 ){ + sqlite3_free(zConverted); + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); + return SQLITE_IOERR_NOMEM; + } + sqlite3_snprintf(nMax, zBuf, "%s", zUtf8); + sqlite3_free(zUtf8); + sqlite3_free(zConverted); + break; + } + sqlite3_free(zConverted); + } + } + } +#elif !SQLITE_OS_WINRT && !defined(__CYGWIN__) + else if( osIsNT() ){ + char *zMulti; + LPWSTR zWidePath = sqlite3MallocZero( nMax*sizeof(WCHAR) ); + if( !zWidePath ){ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); + return SQLITE_IOERR_NOMEM; + } + if( osGetTempPathW(nMax, zWidePath)==0 ){ + sqlite3_free(zWidePath); + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_GETTEMPPATH\n")); + return winLogError(SQLITE_IOERR_GETTEMPPATH, osGetLastError(), + "winGetTempname2", 0); + } + zMulti = winUnicodeToUtf8(zWidePath); + if( zMulti ){ + sqlite3_snprintf(nMax, zBuf, "%s", zMulti); + sqlite3_free(zMulti); + sqlite3_free(zWidePath); + }else{ + sqlite3_free(zWidePath); + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); + return SQLITE_IOERR_NOMEM; + } + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + char *zUtf8; + char *zMbcsPath = sqlite3MallocZero( nMax ); + if( !zMbcsPath ){ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); + return SQLITE_IOERR_NOMEM; + } + if( osGetTempPathA(nMax, zMbcsPath)==0 ){ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_GETTEMPPATH\n")); + return winLogError(SQLITE_IOERR_GETTEMPPATH, osGetLastError(), + "winGetTempname3", 0); + } + zUtf8 = sqlite3_win32_mbcs_to_utf8(zMbcsPath); + if( zUtf8 ){ + sqlite3_snprintf(nMax, zBuf, "%s", zUtf8); + sqlite3_free(zUtf8); + }else{ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); + return SQLITE_IOERR_NOMEM; + } + } +#endif /* SQLITE_WIN32_HAS_ANSI */ +#endif /* !SQLITE_OS_WINRT */ + + /* + ** Check to make sure the temporary directory ends with an appropriate + ** separator. If it does not and there is not enough space left to add + ** one, fail. + */ + if( !winMakeEndInDirSep(nDir+1, zBuf) ){ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n")); + return winLogError(SQLITE_ERROR, 0, "winGetTempname4", 0); + } + + /* + ** Check that the output buffer is large enough for the temporary file + ** name in the following format: + ** + ** "<temporary_directory>/etilqs_XXXXXXXXXXXXXXX\0\0" + ** + ** If not, return SQLITE_ERROR. The number 17 is used here in order to + ** account for the space used by the 15 character random suffix and the + ** two trailing NUL characters. The final directory separator character + ** has already added if it was not already present. + */ + nLen = sqlite3Strlen30(zBuf); + if( (nLen + nPre + 17) > nBuf ){ + sqlite3_free(zBuf); + OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n")); + return winLogError(SQLITE_ERROR, 0, "winGetTempname5", 0); + } + + sqlite3_snprintf(nBuf-16-nLen, zBuf+nLen, SQLITE_TEMP_FILE_PREFIX); + + j = sqlite3Strlen30(zBuf); + sqlite3_randomness(15, &zBuf[j]); + for(i=0; i<15; i++, j++){ + zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ]; + } + zBuf[j] = 0; + zBuf[j+1] = 0; + *pzBuf = zBuf; + + OSTRACE(("TEMP-FILENAME name=%s, rc=SQLITE_OK\n", zBuf)); + return SQLITE_OK; +} + +/* +** Return TRUE if the named file is really a directory. Return false if +** it is something other than a directory, or if there is any kind of memory +** allocation failure. +*/ +static int winIsDir(const void *zConverted){ + DWORD attr; + int rc = 0; + DWORD lastErrno; + + if( osIsNT() ){ + int cnt = 0; + WIN32_FILE_ATTRIBUTE_DATA sAttrData; + memset(&sAttrData, 0, sizeof(sAttrData)); + while( !(rc = osGetFileAttributesExW((LPCWSTR)zConverted, + GetFileExInfoStandard, + &sAttrData)) && winRetryIoerr(&cnt, &lastErrno) ){} + if( !rc ){ + return 0; /* Invalid name? */ + } + attr = sAttrData.dwFileAttributes; +#if SQLITE_OS_WINCE==0 + }else{ + attr = osGetFileAttributesA((char*)zConverted); +#endif + } + return (attr!=INVALID_FILE_ATTRIBUTES) && (attr&FILE_ATTRIBUTE_DIRECTORY); +} + +/* +** Open a file. +*/ +static int winOpen( + sqlite3_vfs *pVfs, /* Used to get maximum path name length */ + const char *zName, /* Name of the file (UTF-8) */ + sqlite3_file *id, /* Write the SQLite file handle here */ + int flags, /* Open mode flags */ + int *pOutFlags /* Status return flags */ +){ + HANDLE h; + DWORD lastErrno = 0; + DWORD dwDesiredAccess; + DWORD dwShareMode; + DWORD dwCreationDisposition; + DWORD dwFlagsAndAttributes = 0; +#if SQLITE_OS_WINCE + int isTemp = 0; +#endif + winFile *pFile = (winFile*)id; + void *zConverted; /* Filename in OS encoding */ + const char *zUtf8Name = zName; /* Filename in UTF-8 encoding */ + int cnt = 0; + + /* If argument zPath is a NULL pointer, this function is required to open + ** a temporary file. Use this buffer to store the file name in. + */ + char *zTmpname = 0; /* For temporary filename, if necessary. */ + + int rc = SQLITE_OK; /* Function Return Code */ +#if !defined(NDEBUG) || SQLITE_OS_WINCE + int eType = flags&0xFFFFFF00; /* Type of file to open */ +#endif + + int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); + int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); + int isCreate = (flags & SQLITE_OPEN_CREATE); + int isReadonly = (flags & SQLITE_OPEN_READONLY); + int isReadWrite = (flags & SQLITE_OPEN_READWRITE); + +#ifndef NDEBUG + int isOpenJournal = (isCreate && ( + eType==SQLITE_OPEN_MASTER_JOURNAL + || eType==SQLITE_OPEN_MAIN_JOURNAL + || eType==SQLITE_OPEN_WAL + )); +#endif + + OSTRACE(("OPEN name=%s, pFile=%p, flags=%x, pOutFlags=%p\n", + zUtf8Name, id, flags, pOutFlags)); + + /* Check the following statements are true: + ** + ** (a) Exactly one of the READWRITE and READONLY flags must be set, and + ** (b) if CREATE is set, then READWRITE must also be set, and + ** (c) if EXCLUSIVE is set, then CREATE must also be set. + ** (d) if DELETEONCLOSE is set, then CREATE must also be set. + */ + assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); + assert(isCreate==0 || isReadWrite); + assert(isExclusive==0 || isCreate); + assert(isDelete==0 || isCreate); + + /* The main DB, main journal, WAL file and master journal are never + ** automatically deleted. Nor are they ever temporary files. */ + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL ); + assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); + + /* Assert that the upper layer has set one of the "file-type" flags. */ + assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB + || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL + || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL + || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL + ); + + assert( pFile!=0 ); + memset(pFile, 0, sizeof(winFile)); + pFile->h = INVALID_HANDLE_VALUE; + +#if SQLITE_OS_WINRT + if( !zUtf8Name && !sqlite3_temp_directory ){ + sqlite3_log(SQLITE_ERROR, + "sqlite3_temp_directory variable should be set for WinRT"); + } +#endif + + /* If the second argument to this function is NULL, generate a + ** temporary file name to use + */ + if( !zUtf8Name ){ + assert( isDelete && !isOpenJournal ); + rc = winGetTempname(pVfs, &zTmpname); + if( rc!=SQLITE_OK ){ + OSTRACE(("OPEN name=%s, rc=%s", zUtf8Name, sqlite3ErrName(rc))); + return rc; + } + zUtf8Name = zTmpname; + } + + /* Database filenames are double-zero terminated if they are not + ** URIs with parameters. Hence, they can always be passed into + ** sqlite3_uri_parameter(). + */ + assert( (eType!=SQLITE_OPEN_MAIN_DB) || (flags & SQLITE_OPEN_URI) || + zUtf8Name[sqlite3Strlen30(zUtf8Name)+1]==0 ); + + /* Convert the filename to the system encoding. */ + zConverted = winConvertFromUtf8Filename(zUtf8Name); + if( zConverted==0 ){ + sqlite3_free(zTmpname); + OSTRACE(("OPEN name=%s, rc=SQLITE_IOERR_NOMEM", zUtf8Name)); + return SQLITE_IOERR_NOMEM; + } + + if( winIsDir(zConverted) ){ + sqlite3_free(zConverted); + sqlite3_free(zTmpname); + OSTRACE(("OPEN name=%s, rc=SQLITE_CANTOPEN_ISDIR", zUtf8Name)); + return SQLITE_CANTOPEN_ISDIR; + } + + if( isReadWrite ){ + dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; + }else{ + dwDesiredAccess = GENERIC_READ; + } + + /* SQLITE_OPEN_EXCLUSIVE is used to make sure that a new file is + ** created. SQLite doesn't use it to indicate "exclusive access" + ** as it is usually understood. + */ + if( isExclusive ){ + /* Creates a new file, only if it does not already exist. */ + /* If the file exists, it fails. */ + dwCreationDisposition = CREATE_NEW; + }else if( isCreate ){ + /* Open existing file, or create if it doesn't exist */ + dwCreationDisposition = OPEN_ALWAYS; + }else{ + /* Opens a file, only if it exists. */ + dwCreationDisposition = OPEN_EXISTING; + } + + dwShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE; + + if( isDelete ){ +#if SQLITE_OS_WINCE + dwFlagsAndAttributes = FILE_ATTRIBUTE_HIDDEN; + isTemp = 1; +#else + dwFlagsAndAttributes = FILE_ATTRIBUTE_TEMPORARY + | FILE_ATTRIBUTE_HIDDEN + | FILE_FLAG_DELETE_ON_CLOSE; +#endif + }else{ + dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; + } + /* Reports from the internet are that performance is always + ** better if FILE_FLAG_RANDOM_ACCESS is used. Ticket #2699. */ +#if SQLITE_OS_WINCE + dwFlagsAndAttributes |= FILE_FLAG_RANDOM_ACCESS; +#endif + + if( osIsNT() ){ +#if SQLITE_OS_WINRT + CREATEFILE2_EXTENDED_PARAMETERS extendedParameters; + extendedParameters.dwSize = sizeof(CREATEFILE2_EXTENDED_PARAMETERS); + extendedParameters.dwFileAttributes = + dwFlagsAndAttributes & FILE_ATTRIBUTE_MASK; + extendedParameters.dwFileFlags = dwFlagsAndAttributes & FILE_FLAG_MASK; + extendedParameters.dwSecurityQosFlags = SECURITY_ANONYMOUS; + extendedParameters.lpSecurityAttributes = NULL; + extendedParameters.hTemplateFile = NULL; + while( (h = osCreateFile2((LPCWSTR)zConverted, + dwDesiredAccess, + dwShareMode, + dwCreationDisposition, + &extendedParameters))==INVALID_HANDLE_VALUE && + winRetryIoerr(&cnt, &lastErrno) ){ + /* Noop */ + } +#else + while( (h = osCreateFileW((LPCWSTR)zConverted, + dwDesiredAccess, + dwShareMode, NULL, + dwCreationDisposition, + dwFlagsAndAttributes, + NULL))==INVALID_HANDLE_VALUE && + winRetryIoerr(&cnt, &lastErrno) ){ + /* Noop */ + } +#endif + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + while( (h = osCreateFileA((LPCSTR)zConverted, + dwDesiredAccess, + dwShareMode, NULL, + dwCreationDisposition, + dwFlagsAndAttributes, + NULL))==INVALID_HANDLE_VALUE && + winRetryIoerr(&cnt, &lastErrno) ){ + /* Noop */ + } + } +#endif + winLogIoerr(cnt, __LINE__); + + OSTRACE(("OPEN file=%p, name=%s, access=%lx, rc=%s\n", h, zUtf8Name, + dwDesiredAccess, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok")); + + if( h==INVALID_HANDLE_VALUE ){ + pFile->lastErrno = lastErrno; + winLogError(SQLITE_CANTOPEN, pFile->lastErrno, "winOpen", zUtf8Name); + sqlite3_free(zConverted); + sqlite3_free(zTmpname); + if( isReadWrite && !isExclusive ){ + return winOpen(pVfs, zName, id, + ((flags|SQLITE_OPEN_READONLY) & + ~(SQLITE_OPEN_CREATE|SQLITE_OPEN_READWRITE)), + pOutFlags); + }else{ + return SQLITE_CANTOPEN_BKPT; + } + } + + if( pOutFlags ){ + if( isReadWrite ){ + *pOutFlags = SQLITE_OPEN_READWRITE; + }else{ + *pOutFlags = SQLITE_OPEN_READONLY; + } + } + + OSTRACE(("OPEN file=%p, name=%s, access=%lx, pOutFlags=%p, *pOutFlags=%d, " + "rc=%s\n", h, zUtf8Name, dwDesiredAccess, pOutFlags, pOutFlags ? + *pOutFlags : 0, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok")); + +#if SQLITE_OS_WINCE + if( isReadWrite && eType==SQLITE_OPEN_MAIN_DB + && (rc = winceCreateLock(zName, pFile))!=SQLITE_OK + ){ + osCloseHandle(h); + sqlite3_free(zConverted); + sqlite3_free(zTmpname); + OSTRACE(("OPEN-CE-LOCK name=%s, rc=%s\n", zName, sqlite3ErrName(rc))); + return rc; + } + if( isTemp ){ + pFile->zDeleteOnClose = zConverted; + }else +#endif + { + sqlite3_free(zConverted); + } + + sqlite3_free(zTmpname); + pFile->pMethod = &winIoMethod; + pFile->pVfs = pVfs; + pFile->h = h; + if( isReadonly ){ + pFile->ctrlFlags |= WINFILE_RDONLY; + } + if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){ + pFile->ctrlFlags |= WINFILE_PSOW; + } + pFile->lastErrno = NO_ERROR; + pFile->zPath = zName; +#if SQLITE_MAX_MMAP_SIZE>0 + pFile->hMap = NULL; + pFile->pMapRegion = 0; + pFile->mmapSize = 0; + pFile->mmapSizeActual = 0; + pFile->mmapSizeMax = sqlite3GlobalConfig.szMmap; +#endif + + OpenCounter(+1); + return rc; +} + +/* +** Delete the named file. +** +** Note that Windows does not allow a file to be deleted if some other +** process has it open. Sometimes a virus scanner or indexing program +** will open a journal file shortly after it is created in order to do +** whatever it does. While this other process is holding the +** file open, we will be unable to delete it. To work around this +** problem, we delay 100 milliseconds and try to delete again. Up +** to MX_DELETION_ATTEMPTs deletion attempts are run before giving +** up and returning an error. +*/ +static int winDelete( + sqlite3_vfs *pVfs, /* Not used on win32 */ + const char *zFilename, /* Name of file to delete */ + int syncDir /* Not used on win32 */ +){ + int cnt = 0; + int rc; + DWORD attr; + DWORD lastErrno = 0; + void *zConverted; + UNUSED_PARAMETER(pVfs); + UNUSED_PARAMETER(syncDir); + + SimulateIOError(return SQLITE_IOERR_DELETE); + OSTRACE(("DELETE name=%s, syncDir=%d\n", zFilename, syncDir)); + + zConverted = winConvertFromUtf8Filename(zFilename); + if( zConverted==0 ){ + OSTRACE(("DELETE name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename)); + return SQLITE_IOERR_NOMEM; + } + if( osIsNT() ){ + do { +#if SQLITE_OS_WINRT + WIN32_FILE_ATTRIBUTE_DATA sAttrData; + memset(&sAttrData, 0, sizeof(sAttrData)); + if ( osGetFileAttributesExW(zConverted, GetFileExInfoStandard, + &sAttrData) ){ + attr = sAttrData.dwFileAttributes; + }else{ + lastErrno = osGetLastError(); + if( lastErrno==ERROR_FILE_NOT_FOUND + || lastErrno==ERROR_PATH_NOT_FOUND ){ + rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */ + }else{ + rc = SQLITE_ERROR; + } + break; + } +#else + attr = osGetFileAttributesW(zConverted); +#endif + if ( attr==INVALID_FILE_ATTRIBUTES ){ + lastErrno = osGetLastError(); + if( lastErrno==ERROR_FILE_NOT_FOUND + || lastErrno==ERROR_PATH_NOT_FOUND ){ + rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */ + }else{ + rc = SQLITE_ERROR; + } + break; + } + if ( attr&FILE_ATTRIBUTE_DIRECTORY ){ + rc = SQLITE_ERROR; /* Files only. */ + break; + } + if ( osDeleteFileW(zConverted) ){ + rc = SQLITE_OK; /* Deleted OK. */ + break; + } + if ( !winRetryIoerr(&cnt, &lastErrno) ){ + rc = SQLITE_ERROR; /* No more retries. */ + break; + } + } while(1); + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + do { + attr = osGetFileAttributesA(zConverted); + if ( attr==INVALID_FILE_ATTRIBUTES ){ + lastErrno = osGetLastError(); + if( lastErrno==ERROR_FILE_NOT_FOUND + || lastErrno==ERROR_PATH_NOT_FOUND ){ + rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */ + }else{ + rc = SQLITE_ERROR; + } + break; + } + if ( attr&FILE_ATTRIBUTE_DIRECTORY ){ + rc = SQLITE_ERROR; /* Files only. */ + break; + } + if ( osDeleteFileA(zConverted) ){ + rc = SQLITE_OK; /* Deleted OK. */ + break; + } + if ( !winRetryIoerr(&cnt, &lastErrno) ){ + rc = SQLITE_ERROR; /* No more retries. */ + break; + } + } while(1); + } +#endif + if( rc && rc!=SQLITE_IOERR_DELETE_NOENT ){ + rc = winLogError(SQLITE_IOERR_DELETE, lastErrno, "winDelete", zFilename); + }else{ + winLogIoerr(cnt, __LINE__); + } + sqlite3_free(zConverted); + OSTRACE(("DELETE name=%s, rc=%s\n", zFilename, sqlite3ErrName(rc))); + return rc; +} + +/* +** Check the existence and status of a file. +*/ +static int winAccess( + sqlite3_vfs *pVfs, /* Not used on win32 */ + const char *zFilename, /* Name of file to check */ + int flags, /* Type of test to make on this file */ + int *pResOut /* OUT: Result */ +){ + DWORD attr; + int rc = 0; + DWORD lastErrno = 0; + void *zConverted; + UNUSED_PARAMETER(pVfs); + + SimulateIOError( return SQLITE_IOERR_ACCESS; ); + OSTRACE(("ACCESS name=%s, flags=%x, pResOut=%p\n", + zFilename, flags, pResOut)); + + zConverted = winConvertFromUtf8Filename(zFilename); + if( zConverted==0 ){ + OSTRACE(("ACCESS name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename)); + return SQLITE_IOERR_NOMEM; + } + if( osIsNT() ){ + int cnt = 0; + WIN32_FILE_ATTRIBUTE_DATA sAttrData; + memset(&sAttrData, 0, sizeof(sAttrData)); + while( !(rc = osGetFileAttributesExW((LPCWSTR)zConverted, + GetFileExInfoStandard, + &sAttrData)) && winRetryIoerr(&cnt, &lastErrno) ){} + if( rc ){ + /* For an SQLITE_ACCESS_EXISTS query, treat a zero-length file + ** as if it does not exist. + */ + if( flags==SQLITE_ACCESS_EXISTS + && sAttrData.nFileSizeHigh==0 + && sAttrData.nFileSizeLow==0 ){ + attr = INVALID_FILE_ATTRIBUTES; + }else{ + attr = sAttrData.dwFileAttributes; + } + }else{ + winLogIoerr(cnt, __LINE__); + if( lastErrno!=ERROR_FILE_NOT_FOUND && lastErrno!=ERROR_PATH_NOT_FOUND ){ + sqlite3_free(zConverted); + return winLogError(SQLITE_IOERR_ACCESS, lastErrno, "winAccess", + zFilename); + }else{ + attr = INVALID_FILE_ATTRIBUTES; + } + } + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + attr = osGetFileAttributesA((char*)zConverted); + } +#endif + sqlite3_free(zConverted); + switch( flags ){ + case SQLITE_ACCESS_READ: + case SQLITE_ACCESS_EXISTS: + rc = attr!=INVALID_FILE_ATTRIBUTES; + break; + case SQLITE_ACCESS_READWRITE: + rc = attr!=INVALID_FILE_ATTRIBUTES && + (attr & FILE_ATTRIBUTE_READONLY)==0; + break; + default: + assert(!"Invalid flags argument"); + } + *pResOut = rc; + OSTRACE(("ACCESS name=%s, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n", + zFilename, pResOut, *pResOut)); + return SQLITE_OK; +} + +/* +** Returns non-zero if the specified path name starts with a drive letter +** followed by a colon character. +*/ +static BOOL winIsDriveLetterAndColon( + const char *zPathname +){ + return ( sqlite3Isalpha(zPathname[0]) && zPathname[1]==':' ); +} + +/* +** Returns non-zero if the specified path name should be used verbatim. If +** non-zero is returned from this function, the calling function must simply +** use the provided path name verbatim -OR- resolve it into a full path name +** using the GetFullPathName Win32 API function (if available). +*/ +static BOOL winIsVerbatimPathname( + const char *zPathname +){ + /* + ** If the path name starts with a forward slash or a backslash, it is either + ** a legal UNC name, a volume relative path, or an absolute path name in the + ** "Unix" format on Windows. There is no easy way to differentiate between + ** the final two cases; therefore, we return the safer return value of TRUE + ** so that callers of this function will simply use it verbatim. + */ + if ( winIsDirSep(zPathname[0]) ){ + return TRUE; + } + + /* + ** If the path name starts with a letter and a colon it is either a volume + ** relative path or an absolute path. Callers of this function must not + ** attempt to treat it as a relative path name (i.e. they should simply use + ** it verbatim). + */ + if ( winIsDriveLetterAndColon(zPathname) ){ + return TRUE; + } + + /* + ** If we get to this point, the path name should almost certainly be a purely + ** relative one (i.e. not a UNC name, not absolute, and not volume relative). + */ + return FALSE; +} + +/* +** Turn a relative pathname into a full pathname. Write the full +** pathname into zOut[]. zOut[] will be at least pVfs->mxPathname +** bytes in size. +*/ +static int winFullPathname( + sqlite3_vfs *pVfs, /* Pointer to vfs object */ + const char *zRelative, /* Possibly relative input path */ + int nFull, /* Size of output buffer in bytes */ + char *zFull /* Output buffer */ +){ + +#if defined(__CYGWIN__) + SimulateIOError( return SQLITE_ERROR ); + UNUSED_PARAMETER(nFull); + assert( nFull>=pVfs->mxPathname ); + char *zOut = sqlite3MallocZero( pVfs->mxPathname+1 ); + if( !zOut ){ + return SQLITE_IOERR_NOMEM; + } + if( cygwin_conv_path( + (osIsNT() ? CCP_POSIX_TO_WIN_W : CCP_POSIX_TO_WIN_A), + zRelative, zOut, pVfs->mxPathname+1)<0 ){ + sqlite3_free(zOut); + return winLogError(SQLITE_CANTOPEN_CONVPATH, (DWORD)errno, + "winFullPathname2", zRelative); + }else{ + char *zUtf8 = winConvertToUtf8Filename(zOut); + if( !zUtf8 ){ + sqlite3_free(zOut); + return SQLITE_IOERR_NOMEM; + } + sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zUtf8); + sqlite3_free(zUtf8); + sqlite3_free(zOut); + } + return SQLITE_OK; +#endif + +#if (SQLITE_OS_WINCE || SQLITE_OS_WINRT) && !defined(__CYGWIN__) + SimulateIOError( return SQLITE_ERROR ); + /* WinCE has no concept of a relative pathname, or so I am told. */ + /* WinRT has no way to convert a relative path to an absolute one. */ + sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zRelative); + return SQLITE_OK; +#endif + +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && !defined(__CYGWIN__) + DWORD nByte; + void *zConverted; + char *zOut; + + /* If this path name begins with "/X:", where "X" is any alphabetic + ** character, discard the initial "/" from the pathname. + */ + if( zRelative[0]=='/' && winIsDriveLetterAndColon(zRelative+1) ){ + zRelative++; + } + + /* It's odd to simulate an io-error here, but really this is just + ** using the io-error infrastructure to test that SQLite handles this + ** function failing. This function could fail if, for example, the + ** current working directory has been unlinked. + */ + SimulateIOError( return SQLITE_ERROR ); + zConverted = winConvertFromUtf8Filename(zRelative); + if( zConverted==0 ){ + return SQLITE_IOERR_NOMEM; + } + if( osIsNT() ){ + LPWSTR zTemp; + nByte = osGetFullPathNameW((LPCWSTR)zConverted, 0, 0, 0); + if( nByte==0 ){ + sqlite3_free(zConverted); + return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), + "winFullPathname1", zRelative); + } + nByte += 3; + zTemp = sqlite3MallocZero( nByte*sizeof(zTemp[0]) ); + if( zTemp==0 ){ + sqlite3_free(zConverted); + return SQLITE_IOERR_NOMEM; + } + nByte = osGetFullPathNameW((LPCWSTR)zConverted, nByte, zTemp, 0); + if( nByte==0 ){ + sqlite3_free(zConverted); + sqlite3_free(zTemp); + return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), + "winFullPathname2", zRelative); + } + sqlite3_free(zConverted); + zOut = winUnicodeToUtf8(zTemp); + sqlite3_free(zTemp); + } +#ifdef SQLITE_WIN32_HAS_ANSI + else{ + char *zTemp; + nByte = osGetFullPathNameA((char*)zConverted, 0, 0, 0); + if( nByte==0 ){ + sqlite3_free(zConverted); + return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), + "winFullPathname3", zRelative); + } + nByte += 3; + zTemp = sqlite3MallocZero( nByte*sizeof(zTemp[0]) ); + if( zTemp==0 ){ + sqlite3_free(zConverted); + return SQLITE_IOERR_NOMEM; + } + nByte = osGetFullPathNameA((char*)zConverted, nByte, zTemp, 0); + if( nByte==0 ){ + sqlite3_free(zConverted); + sqlite3_free(zTemp); + return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), + "winFullPathname4", zRelative); + } + sqlite3_free(zConverted); + zOut = sqlite3_win32_mbcs_to_utf8(zTemp); + sqlite3_free(zTemp); + } +#endif + if( zOut ){ + sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zOut); + sqlite3_free(zOut); + return SQLITE_OK; + }else{ + return SQLITE_IOERR_NOMEM; + } +#endif +} + + #define winDlOpen 0 + #define winDlError 0 + #define winDlSym 0 + #define winDlClose 0 + + +/* +** Write up to nBuf bytes of randomness into zBuf. +*/ +static int winRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ + int n = 0; + UNUSED_PARAMETER(pVfs); +#if defined(SQLITE_TEST) || defined(SQLITE_OMIT_RANDOMNESS) + n = nBuf; + memset(zBuf, 0, nBuf); +#else + if( sizeof(SYSTEMTIME)<=nBuf-n ){ + SYSTEMTIME x; + osGetSystemTime(&x); + memcpy(&zBuf[n], &x, sizeof(x)); + n += sizeof(x); + } + if( sizeof(DWORD)<=nBuf-n ){ + DWORD pid = osGetCurrentProcessId(); + memcpy(&zBuf[n], &pid, sizeof(pid)); + n += sizeof(pid); + } +#if SQLITE_OS_WINRT + if( sizeof(ULONGLONG)<=nBuf-n ){ + ULONGLONG cnt = osGetTickCount64(); + memcpy(&zBuf[n], &cnt, sizeof(cnt)); + n += sizeof(cnt); + } +#else + if( sizeof(DWORD)<=nBuf-n ){ + DWORD cnt = osGetTickCount(); + memcpy(&zBuf[n], &cnt, sizeof(cnt)); + n += sizeof(cnt); + } +#endif + if( sizeof(LARGE_INTEGER)<=nBuf-n ){ + LARGE_INTEGER i; + osQueryPerformanceCounter(&i); + memcpy(&zBuf[n], &i, sizeof(i)); + n += sizeof(i); + } +#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID + if( sizeof(UUID)<=nBuf-n ){ + UUID id; + memset(&id, 0, sizeof(UUID)); + osUuidCreate(&id); + memcpy(&zBuf[n], &id, sizeof(UUID)); + n += sizeof(UUID); + } + if( sizeof(UUID)<=nBuf-n ){ + UUID id; + memset(&id, 0, sizeof(UUID)); + osUuidCreateSequential(&id); + memcpy(&zBuf[n], &id, sizeof(UUID)); + n += sizeof(UUID); + } +#endif +#endif /* defined(SQLITE_TEST) || defined(SQLITE_ZERO_PRNG_SEED) */ + return n; +} + + +/* +** Sleep for a little while. Return the amount of time slept. +*/ +static int winSleep(sqlite3_vfs *pVfs, int microsec){ + sqlite3_win32_sleep((microsec+999)/1000); + UNUSED_PARAMETER(pVfs); + return ((microsec+999)/1000)*1000; +} + +/* +** The following variable, if set to a non-zero value, is interpreted as +** the number of seconds since 1970 and is used to set the result of +** sqlite3OsCurrentTime() during testing. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ +#endif + +/* +** Find the current time (in Universal Coordinated Time). Write into *piNow +** the current time and date as a Julian Day number times 86_400_000. In +** other words, write into *piNow the number of milliseconds since the Julian +** epoch of noon in Greenwich on November 24, 4714 B.C according to the +** proleptic Gregorian calendar. +** +** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date +** cannot be found. +*/ +static int winCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *piNow){ + /* FILETIME structure is a 64-bit value representing the number of + 100-nanosecond intervals since January 1, 1601 (= JD 2305813.5). + */ + FILETIME ft; + static const sqlite3_int64 winFiletimeEpoch = 23058135*(sqlite3_int64)8640000; +#ifdef SQLITE_TEST + static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; +#endif + /* 2^32 - to avoid use of LL and warnings in gcc */ + static const sqlite3_int64 max32BitValue = + (sqlite3_int64)2000000000 + (sqlite3_int64)2000000000 + + (sqlite3_int64)294967296; + +#if SQLITE_OS_WINCE + SYSTEMTIME time; + osGetSystemTime(&time); + /* if SystemTimeToFileTime() fails, it returns zero. */ + if (!osSystemTimeToFileTime(&time,&ft)){ + return SQLITE_ERROR; + } +#else + osGetSystemTimeAsFileTime( &ft ); +#endif + + *piNow = winFiletimeEpoch + + ((((sqlite3_int64)ft.dwHighDateTime)*max32BitValue) + + (sqlite3_int64)ft.dwLowDateTime)/(sqlite3_int64)10000; + +#ifdef SQLITE_TEST + if( sqlite3_current_time ){ + *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; + } +#endif + UNUSED_PARAMETER(pVfs); + return SQLITE_OK; +} + +/* +** Find the current time (in Universal Coordinated Time). Write the +** current time and date as a Julian Day number into *prNow and +** return 0. Return 1 if the time and date cannot be found. +*/ +static int winCurrentTime(sqlite3_vfs *pVfs, double *prNow){ + int rc; + sqlite3_int64 i; + rc = winCurrentTimeInt64(pVfs, &i); + if( !rc ){ + *prNow = i/86400000.0; + } + return rc; +} + +/* +** The idea is that this function works like a combination of +** GetLastError() and FormatMessage() on Windows (or errno and +** strerror_r() on Unix). After an error is returned by an OS +** function, SQLite calls this function with zBuf pointing to +** a buffer of nBuf bytes. The OS layer should populate the +** buffer with a nul-terminated UTF-8 encoded error message +** describing the last IO error to have occurred within the calling +** thread. +** +** If the error message is too large for the supplied buffer, +** it should be truncated. The return value of xGetLastError +** is zero if the error message fits in the buffer, or non-zero +** otherwise (if the message was truncated). If non-zero is returned, +** then it is not necessary to include the nul-terminator character +** in the output buffer. +** +** Not supplying an error message will have no adverse effect +** on SQLite. It is fine to have an implementation that never +** returns an error message: +** +** int xGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ +** assert(zBuf[0]=='\0'); +** return 0; +** } +** +** However if an error message is supplied, it will be incorporated +** by sqlite into the error message available to the user using +** sqlite3_errmsg(), possibly making IO errors easier to debug. +*/ +static int winGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ + UNUSED_PARAMETER(pVfs); + return winGetLastErrorMsg(osGetLastError(), nBuf, zBuf); +} + +/* +** Initialize and deinitialize the operating system interface. +*/ +SQLITE_PRIVATE int sqlite3_os_init(void){ + static sqlite3_vfs winVfs = { + 3, /* iVersion */ + sizeof(winFile), /* szOsFile */ + SQLITE_WIN32_MAX_PATH_BYTES, /* mxPathname */ + 0, /* pNext */ + "win32", /* zName */ + 0, /* pAppData */ + winOpen, /* xOpen */ + winDelete, /* xDelete */ + winAccess, /* xAccess */ + winFullPathname, /* xFullPathname */ + winDlOpen, /* xDlOpen */ + winDlError, /* xDlError */ + winDlSym, /* xDlSym */ + winDlClose, /* xDlClose */ + winRandomness, /* xRandomness */ + winSleep, /* xSleep */ + winCurrentTime, /* xCurrentTime */ + winGetLastError, /* xGetLastError */ + winCurrentTimeInt64, /* xCurrentTimeInt64 */ + winSetSystemCall, /* xSetSystemCall */ + winGetSystemCall, /* xGetSystemCall */ + winNextSystemCall, /* xNextSystemCall */ + }; +#if defined(SQLITE_WIN32_HAS_WIDE) + static sqlite3_vfs winLongPathVfs = { + 3, /* iVersion */ + sizeof(winFile), /* szOsFile */ + SQLITE_WINNT_MAX_PATH_BYTES, /* mxPathname */ + 0, /* pNext */ + "win32-longpath", /* zName */ + 0, /* pAppData */ + winOpen, /* xOpen */ + winDelete, /* xDelete */ + winAccess, /* xAccess */ + winFullPathname, /* xFullPathname */ + winDlOpen, /* xDlOpen */ + winDlError, /* xDlError */ + winDlSym, /* xDlSym */ + winDlClose, /* xDlClose */ + winRandomness, /* xRandomness */ + winSleep, /* xSleep */ + winCurrentTime, /* xCurrentTime */ + winGetLastError, /* xGetLastError */ + winCurrentTimeInt64, /* xCurrentTimeInt64 */ + winSetSystemCall, /* xSetSystemCall */ + winGetSystemCall, /* xGetSystemCall */ + winNextSystemCall, /* xNextSystemCall */ + }; +#endif + + /* Double-check that the aSyscall[] array has been constructed + ** correctly. See ticket [bb3a86e890c8e96ab] */ + assert( ArraySize(aSyscall)==80 ); + + /* get memory map allocation granularity */ + memset(&winSysInfo, 0, sizeof(SYSTEM_INFO)); +#if SQLITE_OS_WINRT + osGetNativeSystemInfo(&winSysInfo); +#else + osGetSystemInfo(&winSysInfo); +#endif + assert( winSysInfo.dwAllocationGranularity>0 ); + assert( winSysInfo.dwPageSize>0 ); + + sqlite3_vfs_register(&winVfs, 1); + +#if defined(SQLITE_WIN32_HAS_WIDE) + sqlite3_vfs_register(&winLongPathVfs, 0); +#endif + + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3_os_end(void){ +#if SQLITE_OS_WINRT + if( sleepObj!=NULL ){ + osCloseHandle(sleepObj); + sleepObj = NULL; + } +#endif + return SQLITE_OK; +} + +#endif /* SQLITE_OS_WIN */ + +/************** End of os_win.c **********************************************/ +/************** Begin file bitvec.c ******************************************/ +/* +** 2008 February 16 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements an object that represents a fixed-length +** bitmap. Bits are numbered starting with 1. +** +** A bitmap is used to record which pages of a database file have been +** journalled during a transaction, or which pages have the "dont-write" +** property. Usually only a few pages are meet either condition. +** So the bitmap is usually sparse and has low cardinality. +** But sometimes (for example when during a DROP of a large table) most +** or all of the pages in a database can get journalled. In those cases, +** the bitmap becomes dense with high cardinality. The algorithm needs +** to handle both cases well. +** +** The size of the bitmap is fixed when the object is created. +** +** All bits are clear when the bitmap is created. Individual bits +** may be set or cleared one at a time. +** +** Test operations are about 100 times more common that set operations. +** Clear operations are exceedingly rare. There are usually between +** 5 and 500 set operations per Bitvec object, though the number of sets can +** sometimes grow into tens of thousands or larger. The size of the +** Bitvec object is the number of pages in the database file at the +** start of a transaction, and is thus usually less than a few thousand, +** but can be as large as 2 billion for a really big database. +*/ +/* #include "sqliteInt.h" */ + +/* Size of the Bitvec structure in bytes. */ +#define BITVEC_SZ 512 + +/* Round the union size down to the nearest pointer boundary, since that's how +** it will be aligned within the Bitvec struct. */ +#define BITVEC_USIZE (((BITVEC_SZ-(3*sizeof(u32)))/sizeof(Bitvec*))*sizeof(Bitvec*)) + +/* Type of the array "element" for the bitmap representation. +** Should be a power of 2, and ideally, evenly divide into BITVEC_USIZE. +** Setting this to the "natural word" size of your CPU may improve +** performance. */ +#define BITVEC_TELEM u8 +/* Size, in bits, of the bitmap element. */ +#define BITVEC_SZELEM 8 +/* Number of elements in a bitmap array. */ +#define BITVEC_NELEM (BITVEC_USIZE/sizeof(BITVEC_TELEM)) +/* Number of bits in the bitmap array. */ +#define BITVEC_NBIT (BITVEC_NELEM*BITVEC_SZELEM) + +/* Number of u32 values in hash table. */ +#define BITVEC_NINT (BITVEC_USIZE/sizeof(u32)) +/* Maximum number of entries in hash table before +** sub-dividing and re-hashing. */ +#define BITVEC_MXHASH (BITVEC_NINT/2) +/* Hashing function for the aHash representation. +** Empirical testing showed that the *37 multiplier +** (an arbitrary prime)in the hash function provided +** no fewer collisions than the no-op *1. */ +#define BITVEC_HASH(X) (((X)*1)%BITVEC_NINT) + +#define BITVEC_NPTR (BITVEC_USIZE/sizeof(Bitvec *)) + + +/* +** A bitmap is an instance of the following structure. +** +** This bitmap records the existence of zero or more bits +** with values between 1 and iSize, inclusive. +** +** There are three possible representations of the bitmap. +** If iSize<=BITVEC_NBIT, then Bitvec.u.aBitmap[] is a straight +** bitmap. The least significant bit is bit 1. +** +** If iSize>BITVEC_NBIT and iDivisor==0 then Bitvec.u.aHash[] is +** a hash table that will hold up to BITVEC_MXHASH distinct values. +** +** Otherwise, the value i is redirected into one of BITVEC_NPTR +** sub-bitmaps pointed to by Bitvec.u.apSub[]. Each subbitmap +** handles up to iDivisor separate values of i. apSub[0] holds +** values between 1 and iDivisor. apSub[1] holds values between +** iDivisor+1 and 2*iDivisor. apSub[N] holds values between +** N*iDivisor+1 and (N+1)*iDivisor. Each subbitmap is normalized +** to hold deal with values between 1 and iDivisor. +*/ +struct Bitvec { + u32 iSize; /* Maximum bit index. Max iSize is 4,294,967,296. */ + u32 nSet; /* Number of bits that are set - only valid for aHash + ** element. Max is BITVEC_NINT. For BITVEC_SZ of 512, + ** this would be 125. */ + u32 iDivisor; /* Number of bits handled by each apSub[] entry. */ + /* Should >=0 for apSub element. */ + /* Max iDivisor is max(u32) / BITVEC_NPTR + 1. */ + /* For a BITVEC_SZ of 512, this would be 34,359,739. */ + union { + BITVEC_TELEM aBitmap[BITVEC_NELEM]; /* Bitmap representation */ + u32 aHash[BITVEC_NINT]; /* Hash table representation */ + Bitvec *apSub[BITVEC_NPTR]; /* Recursive representation */ + } u; +}; + +/* +** Create a new bitmap object able to handle bits between 0 and iSize, +** inclusive. Return a pointer to the new object. Return NULL if +** malloc fails. +*/ +SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32 iSize){ + Bitvec *p; + assert( sizeof(*p)==BITVEC_SZ ); + p = sqlite3MallocZero( sizeof(*p) ); + if( p ){ + p->iSize = iSize; + } + return p; +} + +/* +** Check to see if the i-th bit is set. Return true or false. +** If p is NULL (if the bitmap has not been created) or if +** i is out of range, then return false. +*/ +SQLITE_PRIVATE int sqlite3BitvecTestNotNull(Bitvec *p, u32 i){ + assert( p!=0 ); + i--; + if( i>=p->iSize ) return 0; + while( p->iDivisor ){ + u32 bin = i/p->iDivisor; + i = i%p->iDivisor; + p = p->u.apSub[bin]; + if (!p) { + return 0; + } + } + if( p->iSize<=BITVEC_NBIT ){ + return (p->u.aBitmap[i/BITVEC_SZELEM] & (1<<(i&(BITVEC_SZELEM-1))))!=0; + } else{ + u32 h = BITVEC_HASH(i++); + while( p->u.aHash[h] ){ + if( p->u.aHash[h]==i ) return 1; + h = (h+1) % BITVEC_NINT; + } + return 0; + } +} +SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec *p, u32 i){ + return p!=0 && sqlite3BitvecTestNotNull(p,i); +} + +/* +** Set the i-th bit. Return 0 on success and an error code if +** anything goes wrong. +** +** This routine might cause sub-bitmaps to be allocated. Failing +** to get the memory needed to hold the sub-bitmap is the only +** that can go wrong with an insert, assuming p and i are valid. +** +** The calling function must ensure that p is a valid Bitvec object +** and that the value for "i" is within range of the Bitvec object. +** Otherwise the behavior is undefined. +*/ +SQLITE_PRIVATE int sqlite3BitvecSet(Bitvec *p, u32 i){ + u32 h; + if( p==0 ) return SQLITE_OK; + assert( i>0 ); + assert( i<=p->iSize ); + i--; + while((p->iSize > BITVEC_NBIT) && p->iDivisor) { + u32 bin = i/p->iDivisor; + i = i%p->iDivisor; + if( p->u.apSub[bin]==0 ){ + p->u.apSub[bin] = sqlite3BitvecCreate( p->iDivisor ); + if( p->u.apSub[bin]==0 ) return SQLITE_NOMEM; + } + p = p->u.apSub[bin]; + } + if( p->iSize<=BITVEC_NBIT ){ + p->u.aBitmap[i/BITVEC_SZELEM] |= 1 << (i&(BITVEC_SZELEM-1)); + return SQLITE_OK; + } + h = BITVEC_HASH(i++); + /* if there wasn't a hash collision, and this doesn't */ + /* completely fill the hash, then just add it without */ + /* worring about sub-dividing and re-hashing. */ + if( !p->u.aHash[h] ){ + if (p->nSet<(BITVEC_NINT-1)) { + goto bitvec_set_end; + } else { + goto bitvec_set_rehash; + } + } + /* there was a collision, check to see if it's already */ + /* in hash, if not, try to find a spot for it */ + do { + if( p->u.aHash[h]==i ) return SQLITE_OK; + h++; + if( h>=BITVEC_NINT ) h = 0; + } while( p->u.aHash[h] ); + /* we didn't find it in the hash. h points to the first */ + /* available free spot. check to see if this is going to */ + /* make our hash too "full". */ +bitvec_set_rehash: + if( p->nSet>=BITVEC_MXHASH ){ + unsigned int j; + int rc; + u32 *aiValues = sqlite3StackAllocRaw(0, sizeof(p->u.aHash)); + if( aiValues==0 ){ + return SQLITE_NOMEM; + }else{ + memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash)); + memset(p->u.apSub, 0, sizeof(p->u.apSub)); + p->iDivisor = (p->iSize + BITVEC_NPTR - 1)/BITVEC_NPTR; + rc = sqlite3BitvecSet(p, i); + for(j=0; j<BITVEC_NINT; j++){ + if( aiValues[j] ) rc |= sqlite3BitvecSet(p, aiValues[j]); + } + sqlite3StackFree(0, aiValues); + return rc; + } + } +bitvec_set_end: + p->nSet++; + p->u.aHash[h] = i; + return SQLITE_OK; +} + +/* +** Clear the i-th bit. +** +** pBuf must be a pointer to at least BITVEC_SZ bytes of temporary storage +** that BitvecClear can use to rebuilt its hash table. +*/ +SQLITE_PRIVATE void sqlite3BitvecClear(Bitvec *p, u32 i, void *pBuf){ + if( p==0 ) return; + assert( i>0 ); + i--; + while( p->iDivisor ){ + u32 bin = i/p->iDivisor; + i = i%p->iDivisor; + p = p->u.apSub[bin]; + if (!p) { + return; + } + } + if( p->iSize<=BITVEC_NBIT ){ + p->u.aBitmap[i/BITVEC_SZELEM] &= ~(1 << (i&(BITVEC_SZELEM-1))); + }else{ + unsigned int j; + u32 *aiValues = pBuf; + memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash)); + memset(p->u.aHash, 0, sizeof(p->u.aHash)); + p->nSet = 0; + for(j=0; j<BITVEC_NINT; j++){ + if( aiValues[j] && aiValues[j]!=(i+1) ){ + u32 h = BITVEC_HASH(aiValues[j]-1); + p->nSet++; + while( p->u.aHash[h] ){ + h++; + if( h>=BITVEC_NINT ) h = 0; + } + p->u.aHash[h] = aiValues[j]; + } + } + } +} + +/* +** Destroy a bitmap object. Reclaim all memory used. +*/ +SQLITE_PRIVATE void sqlite3BitvecDestroy(Bitvec *p){ + if( p==0 ) return; + if( p->iDivisor ){ + unsigned int i; + for(i=0; i<BITVEC_NPTR; i++){ + sqlite3BitvecDestroy(p->u.apSub[i]); + } + } + sqlite3_free(p); +} + +/* +** Return the value of the iSize parameter specified when Bitvec *p +** was created. +*/ +SQLITE_PRIVATE u32 sqlite3BitvecSize(Bitvec *p){ + return p->iSize; +} + +/************** End of bitvec.c **********************************************/ +/************** Begin file pcache.c ******************************************/ +/* +** 2008 August 05 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements that page cache. +*/ +/* #include "sqliteInt.h" */ + +/* +** A complete page cache is an instance of this structure. +*/ +struct PCache { + PgHdr *pDirty, *pDirtyTail; /* List of dirty pages in LRU order */ + PgHdr *pSynced; /* Last synced page in dirty page list */ + int nRefSum; /* Sum of ref counts over all pages */ + int szCache; /* Configured cache size */ + int szPage; /* Size of every page in this cache */ + int szExtra; /* Size of extra space for each page */ + u8 bPurgeable; /* True if pages are on backing store */ + u8 eCreate; /* eCreate value for for xFetch() */ + int (*xStress)(void*,PgHdr*); /* Call to try make a page clean */ + void *pStress; /* Argument to xStress */ + sqlite3_pcache *pCache; /* Pluggable cache module */ +}; + +/********************************** Linked List Management ********************/ + +/* Allowed values for second argument to pcacheManageDirtyList() */ +#define PCACHE_DIRTYLIST_REMOVE 1 /* Remove pPage from dirty list */ +#define PCACHE_DIRTYLIST_ADD 2 /* Add pPage to the dirty list */ +#define PCACHE_DIRTYLIST_FRONT 3 /* Move pPage to the front of the list */ + +/* +** Manage pPage's participation on the dirty list. Bits of the addRemove +** argument determines what operation to do. The 0x01 bit means first +** remove pPage from the dirty list. The 0x02 means add pPage back to +** the dirty list. Doing both moves pPage to the front of the dirty list. +*/ +static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove){ + PCache *p = pPage->pCache; + + if( addRemove & PCACHE_DIRTYLIST_REMOVE ){ + assert( pPage->pDirtyNext || pPage==p->pDirtyTail ); + assert( pPage->pDirtyPrev || pPage==p->pDirty ); + + /* Update the PCache1.pSynced variable if necessary. */ + if( p->pSynced==pPage ){ + PgHdr *pSynced = pPage->pDirtyPrev; + while( pSynced && (pSynced->flags&PGHDR_NEED_SYNC) ){ + pSynced = pSynced->pDirtyPrev; + } + p->pSynced = pSynced; + } + + if( pPage->pDirtyNext ){ + pPage->pDirtyNext->pDirtyPrev = pPage->pDirtyPrev; + }else{ + assert( pPage==p->pDirtyTail ); + p->pDirtyTail = pPage->pDirtyPrev; + } + if( pPage->pDirtyPrev ){ + pPage->pDirtyPrev->pDirtyNext = pPage->pDirtyNext; + }else{ + assert( pPage==p->pDirty ); + p->pDirty = pPage->pDirtyNext; + if( p->pDirty==0 && p->bPurgeable ){ + assert( p->eCreate==1 ); + p->eCreate = 2; + } + } + pPage->pDirtyNext = 0; + pPage->pDirtyPrev = 0; + } + if( addRemove & PCACHE_DIRTYLIST_ADD ){ + assert( pPage->pDirtyNext==0 && pPage->pDirtyPrev==0 && p->pDirty!=pPage ); + + pPage->pDirtyNext = p->pDirty; + if( pPage->pDirtyNext ){ + assert( pPage->pDirtyNext->pDirtyPrev==0 ); + pPage->pDirtyNext->pDirtyPrev = pPage; + }else{ + p->pDirtyTail = pPage; + if( p->bPurgeable ){ + assert( p->eCreate==2 ); + p->eCreate = 1; + } + } + p->pDirty = pPage; + if( !p->pSynced && 0==(pPage->flags&PGHDR_NEED_SYNC) ){ + p->pSynced = pPage; + } + } +} + +/* +** Wrapper around the pluggable caches xUnpin method. If the cache is +** being used for an in-memory database, this function is a no-op. +*/ +static void pcacheUnpin(PgHdr *p){ + if( p->pCache->bPurgeable ){ + sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0); + } +} + +/* +** Compute the number of pages of cache requested. p->szCache is the +** cache size requested by the "PRAGMA cache_size" statement. +** +** +*/ +static int numberOfCachePages(PCache *p){ + if( p->szCache>=0 ){ + /* IMPLEMENTATION-OF: R-42059-47211 If the argument N is positive then the + ** suggested cache size is set to N. */ + return p->szCache; + }else{ + /* IMPLEMENTATION-OF: R-61436-13639 If the argument N is negative, then + ** the number of cache pages is adjusted to use approximately abs(N*1024) + ** bytes of memory. */ + return (int)((-1024*(i64)p->szCache)/(p->szPage+p->szExtra)); + } +} + +/*************************************************** General Interfaces ****** +** +** Initialize and shutdown the page cache subsystem. Neither of these +** functions are threadsafe. +*/ +SQLITE_PRIVATE int sqlite3PcacheInitialize(void){ + if( sqlite3GlobalConfig.pcache2.xInit==0 ){ + /* IMPLEMENTATION-OF: R-26801-64137 If the xInit() method is NULL, then the + ** built-in default page cache is used instead of the application defined + ** page cache. */ + sqlite3PCacheSetDefault(); + } + return sqlite3GlobalConfig.pcache2.xInit(sqlite3GlobalConfig.pcache2.pArg); +} +SQLITE_PRIVATE void sqlite3PcacheShutdown(void){ + if( sqlite3GlobalConfig.pcache2.xShutdown ){ + /* IMPLEMENTATION-OF: R-26000-56589 The xShutdown() method may be NULL. */ + sqlite3GlobalConfig.pcache2.xShutdown(sqlite3GlobalConfig.pcache2.pArg); + } +} + +/* +** Return the size in bytes of a PCache object. +*/ +SQLITE_PRIVATE int sqlite3PcacheSize(void){ return sizeof(PCache); } + +/* +** Create a new PCache object. Storage space to hold the object +** has already been allocated and is passed in as the p pointer. +** The caller discovers how much space needs to be allocated by +** calling sqlite3PcacheSize(). +*/ +SQLITE_PRIVATE int sqlite3PcacheOpen( + int szPage, /* Size of every page */ + int szExtra, /* Extra space associated with each page */ + int bPurgeable, /* True if pages are on backing store */ + int (*xStress)(void*,PgHdr*),/* Call to try to make pages clean */ + void *pStress, /* Argument to xStress */ + PCache *p /* Preallocated space for the PCache */ +){ + memset(p, 0, sizeof(PCache)); + p->szPage = 1; + p->szExtra = szExtra; + p->bPurgeable = bPurgeable; + p->eCreate = 2; + p->xStress = xStress; + p->pStress = pStress; + p->szCache = 100; + return sqlite3PcacheSetPageSize(p, szPage); +} + +/* +** Change the page size for PCache object. The caller must ensure that there +** are no outstanding page references when this function is called. +*/ +SQLITE_PRIVATE int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){ + assert( pCache->nRefSum==0 && pCache->pDirty==0 ); + if( pCache->szPage ){ + sqlite3_pcache *pNew; + pNew = sqlite3GlobalConfig.pcache2.xCreate( + szPage, pCache->szExtra + ROUND8(sizeof(PgHdr)), + pCache->bPurgeable + ); + if( pNew==0 ) return SQLITE_NOMEM; + sqlite3GlobalConfig.pcache2.xCachesize(pNew, numberOfCachePages(pCache)); + if( pCache->pCache ){ + sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache); + } + pCache->pCache = pNew; + pCache->szPage = szPage; + } + return SQLITE_OK; +} + +/* +** Try to obtain a page from the cache. +** +** This routine returns a pointer to an sqlite3_pcache_page object if +** such an object is already in cache, or if a new one is created. +** This routine returns a NULL pointer if the object was not in cache +** and could not be created. +** +** The createFlags should be 0 to check for existing pages and should +** be 3 (not 1, but 3) to try to create a new page. +** +** If the createFlag is 0, then NULL is always returned if the page +** is not already in the cache. If createFlag is 1, then a new page +** is created only if that can be done without spilling dirty pages +** and without exceeding the cache size limit. +** +** The caller needs to invoke sqlite3PcacheFetchFinish() to properly +** initialize the sqlite3_pcache_page object and convert it into a +** PgHdr object. The sqlite3PcacheFetch() and sqlite3PcacheFetchFinish() +** routines are split this way for performance reasons. When separated +** they can both (usually) operate without having to push values to +** the stack on entry and pop them back off on exit, which saves a +** lot of pushing and popping. +*/ +SQLITE_PRIVATE sqlite3_pcache_page *sqlite3PcacheFetch( + PCache *pCache, /* Obtain the page from this cache */ + Pgno pgno, /* Page number to obtain */ + int createFlag /* If true, create page if it does not exist already */ +){ + int eCreate; + + assert( pCache!=0 ); + assert( pCache->pCache!=0 ); + assert( createFlag==3 || createFlag==0 ); + assert( pgno>0 ); + + /* eCreate defines what to do if the page does not exist. + ** 0 Do not allocate a new page. (createFlag==0) + ** 1 Allocate a new page if doing so is inexpensive. + ** (createFlag==1 AND bPurgeable AND pDirty) + ** 2 Allocate a new page even it doing so is difficult. + ** (createFlag==1 AND !(bPurgeable AND pDirty) + */ + eCreate = createFlag & pCache->eCreate; + assert( eCreate==0 || eCreate==1 || eCreate==2 ); + assert( createFlag==0 || pCache->eCreate==eCreate ); + assert( createFlag==0 || eCreate==1+(!pCache->bPurgeable||!pCache->pDirty) ); + return sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate); +} + +/* +** If the sqlite3PcacheFetch() routine is unable to allocate a new +** page because new clean pages are available for reuse and the cache +** size limit has been reached, then this routine can be invoked to +** try harder to allocate a page. This routine might invoke the stress +** callback to spill dirty pages to the journal. It will then try to +** allocate the new page and will only fail to allocate a new page on +** an OOM error. +** +** This routine should be invoked only after sqlite3PcacheFetch() fails. +*/ +SQLITE_PRIVATE int sqlite3PcacheFetchStress( + PCache *pCache, /* Obtain the page from this cache */ + Pgno pgno, /* Page number to obtain */ + sqlite3_pcache_page **ppPage /* Write result here */ +){ + PgHdr *pPg; + if( pCache->eCreate==2 ) return 0; + + + /* Find a dirty page to write-out and recycle. First try to find a + ** page that does not require a journal-sync (one with PGHDR_NEED_SYNC + ** cleared), but if that is not possible settle for any other + ** unreferenced dirty page. + */ + for(pPg=pCache->pSynced; + pPg && (pPg->nRef || (pPg->flags&PGHDR_NEED_SYNC)); + pPg=pPg->pDirtyPrev + ); + pCache->pSynced = pPg; + if( !pPg ){ + for(pPg=pCache->pDirtyTail; pPg && pPg->nRef; pPg=pPg->pDirtyPrev); + } + if( pPg ){ + int rc; +#ifdef SQLITE_LOG_CACHE_SPILL + sqlite3_log(SQLITE_FULL, + "spill page %d making room for %d - cache used: %d/%d", + pPg->pgno, pgno, + sqlite3GlobalConfig.pcache.xPagecount(pCache->pCache), + numberOfCachePages(pCache)); +#endif + rc = pCache->xStress(pCache->pStress, pPg); + if( rc!=SQLITE_OK && rc!=SQLITE_BUSY ){ + return rc; + } + } + *ppPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, 2); + return *ppPage==0 ? SQLITE_NOMEM : SQLITE_OK; +} + +/* +** This is a helper routine for sqlite3PcacheFetchFinish() +** +** In the uncommon case where the page being fetched has not been +** initialized, this routine is invoked to do the initialization. +** This routine is broken out into a separate function since it +** requires extra stack manipulation that can be avoided in the common +** case. +*/ +static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit( + PCache *pCache, /* Obtain the page from this cache */ + Pgno pgno, /* Page number obtained */ + sqlite3_pcache_page *pPage /* Page obtained by prior PcacheFetch() call */ +){ + PgHdr *pPgHdr; + assert( pPage!=0 ); + pPgHdr = (PgHdr*)pPage->pExtra; + assert( pPgHdr->pPage==0 ); + memset(pPgHdr, 0, sizeof(PgHdr)); + pPgHdr->pPage = pPage; + pPgHdr->pData = pPage->pBuf; + pPgHdr->pExtra = (void *)&pPgHdr[1]; + memset(pPgHdr->pExtra, 0, pCache->szExtra); + pPgHdr->pCache = pCache; + pPgHdr->pgno = pgno; + pPgHdr->flags = PGHDR_CLEAN; + return sqlite3PcacheFetchFinish(pCache,pgno,pPage); +} + +/* +** This routine converts the sqlite3_pcache_page object returned by +** sqlite3PcacheFetch() into an initialized PgHdr object. This routine +** must be called after sqlite3PcacheFetch() in order to get a usable +** result. +*/ +SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish( + PCache *pCache, /* Obtain the page from this cache */ + Pgno pgno, /* Page number obtained */ + sqlite3_pcache_page *pPage /* Page obtained by prior PcacheFetch() call */ +){ + PgHdr *pPgHdr; + + assert( pPage!=0 ); + pPgHdr = (PgHdr *)pPage->pExtra; + + if( !pPgHdr->pPage ){ + return pcacheFetchFinishWithInit(pCache, pgno, pPage); + } + pCache->nRefSum++; + pPgHdr->nRef++; + return pPgHdr; +} + +/* +** Decrement the reference count on a page. If the page is clean and the +** reference count drops to 0, then it is made eligible for recycling. +*/ +SQLITE_PRIVATE void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){ + assert( p->nRef>0 ); + p->pCache->nRefSum--; + if( (--p->nRef)==0 ){ + if( p->flags&PGHDR_CLEAN ){ + pcacheUnpin(p); + }else if( p->pDirtyPrev!=0 ){ + /* Move the page to the head of the dirty list. */ + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT); + } + } +} + +/* +** Increase the reference count of a supplied page by 1. +*/ +SQLITE_PRIVATE void sqlite3PcacheRef(PgHdr *p){ + assert(p->nRef>0); + p->nRef++; + p->pCache->nRefSum++; +} + +/* +** Drop a page from the cache. There must be exactly one reference to the +** page. This function deletes that reference, so after it returns the +** page pointed to by p is invalid. +*/ +SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr *p){ + assert( p->nRef==1 ); + if( p->flags&PGHDR_DIRTY ){ + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); + } + p->pCache->nRefSum--; + sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1); +} + +/* +** Make sure the page is marked as dirty. If it isn't dirty already, +** make it so. +*/ +SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr *p){ + assert( p->nRef>0 ); + if( p->flags & (PGHDR_CLEAN|PGHDR_DONT_WRITE) ){ + p->flags &= ~PGHDR_DONT_WRITE; + if( p->flags & PGHDR_CLEAN ){ + p->flags ^= (PGHDR_DIRTY|PGHDR_CLEAN); + assert( (p->flags & (PGHDR_DIRTY|PGHDR_CLEAN))==PGHDR_DIRTY ); + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD); + } + } +} + +/* +** Make sure the page is marked as clean. If it isn't clean already, +** make it so. +*/ +SQLITE_PRIVATE void sqlite3PcacheMakeClean(PgHdr *p){ + if( (p->flags & PGHDR_DIRTY) ){ + assert( (p->flags & PGHDR_CLEAN)==0 ); + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); + p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC|PGHDR_WRITEABLE); + p->flags |= PGHDR_CLEAN; + if( p->nRef==0 ){ + pcacheUnpin(p); + } + } +} + +/* +** Make every page in the cache clean. +*/ +SQLITE_PRIVATE void sqlite3PcacheCleanAll(PCache *pCache){ + PgHdr *p; + while( (p = pCache->pDirty)!=0 ){ + sqlite3PcacheMakeClean(p); + } +} + +/* +** Clear the PGHDR_NEED_SYNC flag from all dirty pages. +*/ +SQLITE_PRIVATE void sqlite3PcacheClearSyncFlags(PCache *pCache){ + PgHdr *p; + for(p=pCache->pDirty; p; p=p->pDirtyNext){ + p->flags &= ~PGHDR_NEED_SYNC; + } + pCache->pSynced = pCache->pDirtyTail; +} + +/* +** Change the page number of page p to newPgno. +*/ +SQLITE_PRIVATE void sqlite3PcacheMove(PgHdr *p, Pgno newPgno){ + PCache *pCache = p->pCache; + assert( p->nRef>0 ); + assert( newPgno>0 ); + sqlite3GlobalConfig.pcache2.xRekey(pCache->pCache, p->pPage, p->pgno,newPgno); + p->pgno = newPgno; + if( (p->flags&PGHDR_DIRTY) && (p->flags&PGHDR_NEED_SYNC) ){ + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT); + } +} + +/* +** Drop every cache entry whose page number is greater than "pgno". The +** caller must ensure that there are no outstanding references to any pages +** other than page 1 with a page number greater than pgno. +** +** If there is a reference to page 1 and the pgno parameter passed to this +** function is 0, then the data area associated with page 1 is zeroed, but +** the page object is not dropped. +*/ +SQLITE_PRIVATE void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){ + if( pCache->pCache ){ + PgHdr *p; + PgHdr *pNext; + for(p=pCache->pDirty; p; p=pNext){ + pNext = p->pDirtyNext; + /* This routine never gets call with a positive pgno except right + ** after sqlite3PcacheCleanAll(). So if there are dirty pages, + ** it must be that pgno==0. + */ + assert( p->pgno>0 ); + if( ALWAYS(p->pgno>pgno) ){ + assert( p->flags&PGHDR_DIRTY ); + sqlite3PcacheMakeClean(p); + } + } + if( pgno==0 && pCache->nRefSum ){ + sqlite3_pcache_page *pPage1; + pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0); + if( ALWAYS(pPage1) ){ /* Page 1 is always available in cache, because + ** pCache->nRefSum>0 */ + memset(pPage1->pBuf, 0, pCache->szPage); + pgno = 1; + } + } + sqlite3GlobalConfig.pcache2.xTruncate(pCache->pCache, pgno+1); + } +} + +/* +** Close a cache. +*/ +SQLITE_PRIVATE void sqlite3PcacheClose(PCache *pCache){ + assert( pCache->pCache!=0 ); + sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache); +} + +/* +** Discard the contents of the cache. +*/ +SQLITE_PRIVATE void sqlite3PcacheClear(PCache *pCache){ + sqlite3PcacheTruncate(pCache, 0); +} + +/* +** Merge two lists of pages connected by pDirty and in pgno order. +** Do not both fixing the pDirtyPrev pointers. +*/ +static PgHdr *pcacheMergeDirtyList(PgHdr *pA, PgHdr *pB){ + PgHdr result, *pTail; + pTail = &result; + while( pA && pB ){ + if( pA->pgno<pB->pgno ){ + pTail->pDirty = pA; + pTail = pA; + pA = pA->pDirty; + }else{ + pTail->pDirty = pB; + pTail = pB; + pB = pB->pDirty; + } + } + if( pA ){ + pTail->pDirty = pA; + }else if( pB ){ + pTail->pDirty = pB; + }else{ + pTail->pDirty = 0; + } + return result.pDirty; +} + +/* +** Sort the list of pages in accending order by pgno. Pages are +** connected by pDirty pointers. The pDirtyPrev pointers are +** corrupted by this sort. +** +** Since there cannot be more than 2^31 distinct pages in a database, +** there cannot be more than 31 buckets required by the merge sorter. +** One extra bucket is added to catch overflow in case something +** ever changes to make the previous sentence incorrect. +*/ +#define N_SORT_BUCKET 32 +static PgHdr *pcacheSortDirtyList(PgHdr *pIn){ + PgHdr *a[N_SORT_BUCKET], *p; + int i; + memset(a, 0, sizeof(a)); + while( pIn ){ + p = pIn; + pIn = p->pDirty; + p->pDirty = 0; + for(i=0; ALWAYS(i<N_SORT_BUCKET-1); i++){ + if( a[i]==0 ){ + a[i] = p; + break; + }else{ + p = pcacheMergeDirtyList(a[i], p); + a[i] = 0; + } + } + if( NEVER(i==N_SORT_BUCKET-1) ){ + /* To get here, there need to be 2^(N_SORT_BUCKET) elements in + ** the input list. But that is impossible. + */ + a[i] = pcacheMergeDirtyList(a[i], p); + } + } + p = a[0]; + for(i=1; i<N_SORT_BUCKET; i++){ + p = pcacheMergeDirtyList(p, a[i]); + } + return p; +} + +/* +** Return a list of all dirty pages in the cache, sorted by page number. +*/ +SQLITE_PRIVATE PgHdr *sqlite3PcacheDirtyList(PCache *pCache){ + PgHdr *p; + for(p=pCache->pDirty; p; p=p->pDirtyNext){ + p->pDirty = p->pDirtyNext; + } + return pcacheSortDirtyList(pCache->pDirty); +} + +/* +** Return the total number of references to all pages held by the cache. +** +** This is not the total number of pages referenced, but the sum of the +** reference count for all pages. +*/ +SQLITE_PRIVATE int sqlite3PcacheRefCount(PCache *pCache){ + return pCache->nRefSum; +} + +/* +** Return the number of references to the page supplied as an argument. +*/ +SQLITE_PRIVATE int sqlite3PcachePageRefcount(PgHdr *p){ + return p->nRef; +} + +#ifdef SQLITE_TEST +/* +** Return the total number of pages in the cache. +*/ +SQLITE_PRIVATE int sqlite3PcachePagecount(PCache *pCache){ + assert( pCache->pCache!=0 ); + return sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache); +} + +/* +** Get the suggested cache-size value. +*/ +SQLITE_PRIVATE int sqlite3PcacheGetCachesize(PCache *pCache){ + return numberOfCachePages(pCache); +} +#endif + +/* +** Set the suggested cache-size value. +*/ +SQLITE_PRIVATE void sqlite3PcacheSetCachesize(PCache *pCache, int mxPage){ + assert( pCache->pCache!=0 ); + pCache->szCache = mxPage; + sqlite3GlobalConfig.pcache2.xCachesize(pCache->pCache, + numberOfCachePages(pCache)); +} + +#if 0 +/* +** Free up as much memory as possible from the page cache. +*/ +SQLITE_PRIVATE void sqlite3PcacheShrink(PCache *pCache){ + assert( pCache->pCache!=0 ); + sqlite3GlobalConfig.pcache2.xShrink(pCache->pCache); +} +#endif + +/* +** Return the size of the header added by this middleware layer +** in the page-cache hierarchy. +*/ +SQLITE_PRIVATE int sqlite3HeaderSizePcache(void){ return ROUND8(sizeof(PgHdr)); } + + +#if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG) +/* +** For all dirty pages currently in the cache, invoke the specified +** callback. This is only used if the SQLITE_CHECK_PAGES macro is +** defined. +*/ +SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *)){ + PgHdr *pDirty; + for(pDirty=pCache->pDirty; pDirty; pDirty=pDirty->pDirtyNext){ + xIter(pDirty); + } +} +#endif + +/************** End of pcache.c **********************************************/ +/************** Begin file pcache1.c *****************************************/ +/* +** 2008 November 05 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file implements the default page cache implementation (the +** sqlite3_pcache interface). It also contains part of the implementation +** of the SQLITE_CONFIG_PAGECACHE and sqlite3_release_memory() features. +** If the default page cache implementation is overridden, then neither of +** these two features are available. +** +** A Page cache line looks like this: +** +** ------------------------------------------------------------- +** | database page content | PgHdr1 | MemPage | PgHdr | +** ------------------------------------------------------------- +** +** The database page content is up front (so that buffer overreads tend to +** flow harmlessly into the PgHdr1, MemPage, and PgHdr extensions). MemPage +** is the extension added by the btree.c module containing information such +** as the database page number and how that database page is used. PgHdr +** is added by the pcache.c layer and contains information used to keep track +** of which pages are "dirty". PgHdr1 is an extension added by this +** module (pcache1.c). The PgHdr1 header is a subclass of sqlite3_pcache_page. +** PgHdr1 contains information needed to look up a page by its page number. +** The superclass sqlite3_pcache_page.pBuf points to the start of the +** database page content and sqlite3_pcache_page.pExtra points to PgHdr. +** +** The size of the extension (MemPage+PgHdr+PgHdr1) can be determined at +** runtime using sqlite3_config(SQLITE_CONFIG_PCACHE_HDRSZ, &size). The +** sizes of the extensions sum to 272 bytes on x64 for 3.8.10, but this +** size can vary according to architecture, compile-time options, and +** SQLite library version number. +** +** If SQLITE_PCACHE_SEPARATE_HEADER is defined, then the extension is obtained +** using a separate memory allocation from the database page content. This +** seeks to overcome the "clownshoe" problem (also called "internal +** fragmentation" in academic literature) of allocating a few bytes more +** than a power of two with the memory allocator rounding up to the next +** power of two, and leaving the rounded-up space unused. +** +** This module tracks pointers to PgHdr1 objects. Only pcache.c communicates +** with this module. Information is passed back and forth as PgHdr1 pointers. +** +** The pcache.c and pager.c modules deal pointers to PgHdr objects. +** The btree.c module deals with pointers to MemPage objects. +** +** SOURCE OF PAGE CACHE MEMORY: +** +** Memory for a page might come from any of three sources: +** +** (1) The general-purpose memory allocator - sqlite3Malloc() +** (2) Global page-cache memory provided using sqlite3_config() with +** SQLITE_CONFIG_PAGECACHE. +** (3) PCache-local bulk allocation. +** +** The third case is a chunk of heap memory (defaulting to 100 pages worth) +** that is allocated when the page cache is created. The size of the local +** bulk allocation can be adjusted using +** +** sqlite3_config(SQLITE_CONFIG_PAGECACHE, 0, 0, N). +** +** If N is positive, then N pages worth of memory are allocated using a single +** sqlite3Malloc() call and that memory is used for the first N pages allocated. +** Or if N is negative, then -1024*N bytes of memory are allocated and used +** for as many pages as can be accomodated. +** +** Only one of (2) or (3) can be used. Once the memory available to (2) or +** (3) is exhausted, subsequent allocations fail over to the general-purpose +** memory allocator (1). +** +** Earlier versions of SQLite used only methods (1) and (2). But experiments +** show that method (3) with N==100 provides about a 5% performance boost for +** common workloads. +*/ +/* #include "sqliteInt.h" */ + +typedef struct PCache1 PCache1; +typedef struct PgHdr1 PgHdr1; +typedef struct PgFreeslot PgFreeslot; +typedef struct PGroup PGroup; + +/* +** Each cache entry is represented by an instance of the following +** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of +** PgHdr1.pCache->szPage bytes is allocated directly before this structure +** in memory. +*/ +struct PgHdr1 { + sqlite3_pcache_page page; /* Base class. Must be first. pBuf & pExtra */ + unsigned int iKey; /* Key value (page number) */ + u8 isPinned; /* Page in use, not on the LRU list */ + u8 isBulkLocal; /* This page from bulk local storage */ + u8 isAnchor; /* This is the PGroup.lru element */ + PgHdr1 *pNext; /* Next in hash table chain */ + PCache1 *pCache; /* Cache that currently owns this page */ + PgHdr1 *pLruNext; /* Next in LRU list of unpinned pages */ + PgHdr1 *pLruPrev; /* Previous in LRU list of unpinned pages */ +}; + +/* Each page cache (or PCache) belongs to a PGroup. A PGroup is a set +** of one or more PCaches that are able to recycle each other's unpinned +** pages when they are under memory pressure. A PGroup is an instance of +** the following object. +** +** This page cache implementation works in one of two modes: +** +** (1) Every PCache is the sole member of its own PGroup. There is +** one PGroup per PCache. +** +** (2) There is a single global PGroup that all PCaches are a member +** of. +** +** Mode 1 uses more memory (since PCache instances are not able to rob +** unused pages from other PCaches) but it also operates without a mutex, +** and is therefore often faster. Mode 2 requires a mutex in order to be +** threadsafe, but recycles pages more efficiently. +** +** For mode (1), PGroup.mutex is NULL. For mode (2) there is only a single +** PGroup which is the pcache1.grp global variable and its mutex is +** SQLITE_MUTEX_STATIC_LRU. +*/ +struct PGroup { + sqlite3_mutex *mutex; /* MUTEX_STATIC_LRU or NULL */ + unsigned int nMaxPage; /* Sum of nMax for purgeable caches */ + unsigned int nMinPage; /* Sum of nMin for purgeable caches */ + unsigned int mxPinned; /* nMaxpage + 10 - nMinPage */ + unsigned int nCurrentPage; /* Number of purgeable pages allocated */ + PgHdr1 lru; /* The beginning and end of the LRU list */ +}; + +/* Each page cache is an instance of the following object. Every +** open database file (including each in-memory database and each +** temporary or transient database) has a single page cache which +** is an instance of this object. +** +** Pointers to structures of this type are cast and returned as +** opaque sqlite3_pcache* handles. +*/ +struct PCache1 { + /* Cache configuration parameters. Page size (szPage) and the purgeable + ** flag (bPurgeable) are set when the cache is created. nMax may be + ** modified at any time by a call to the pcache1Cachesize() method. + ** The PGroup mutex must be held when accessing nMax. + */ + PGroup *pGroup; /* PGroup this cache belongs to */ + int szPage; /* Size of database content section */ + int szExtra; /* sizeof(MemPage)+sizeof(PgHdr) */ + int szAlloc; /* Total size of one pcache line */ + int bPurgeable; /* True if cache is purgeable */ + unsigned int nMin; /* Minimum number of pages reserved */ + unsigned int nMax; /* Configured "cache_size" value */ + unsigned int n90pct; /* nMax*9/10 */ + unsigned int iMaxKey; /* Largest key seen since xTruncate() */ + + /* Hash table of all pages. The following variables may only be accessed + ** when the accessor is holding the PGroup mutex. + */ + unsigned int nRecyclable; /* Number of pages in the LRU list */ + unsigned int nPage; /* Total number of pages in apHash */ + unsigned int nHash; /* Number of slots in apHash[] */ + PgHdr1 **apHash; /* Hash table for fast lookup by key */ + PgHdr1 *pFree; /* List of unused pcache-local pages */ + void *pBulk; /* Bulk memory used by pcache-local */ +}; + +/* +** Free slots in the allocator used to divide up the global page cache +** buffer provided using the SQLITE_CONFIG_PAGECACHE mechanism. +*/ +struct PgFreeslot { + PgFreeslot *pNext; /* Next free slot */ +}; + +/* +** Global data used by this cache. +*/ +static SQLITE_WSD struct PCacheGlobal { + PGroup grp; /* The global PGroup for mode (2) */ + + /* Variables related to SQLITE_CONFIG_PAGECACHE settings. The + ** szSlot, nSlot, pStart, pEnd, nReserve, and isInit values are all + ** fixed at sqlite3BtreeInitialize() time and do not require mutex protection. + ** The nFreeSlot and pFree values do require mutex protection. + */ + int isInit; /* True if initialized */ + int separateCache; /* Use a new PGroup for each PCache */ + int nInitPage; /* Initial bulk allocation size */ + int szSlot; /* Size of each free slot */ + int nSlot; /* The number of pcache slots */ + int nReserve; /* Try to keep nFreeSlot above this */ + void *pStart, *pEnd; /* Bounds of global page cache memory */ + /* Above requires no mutex. Use mutex below for variable that follow. */ + sqlite3_mutex *mutex; /* Mutex for accessing the following: */ + PgFreeslot *pFree; /* Free page blocks */ + int nFreeSlot; /* Number of unused pcache slots */ + /* The following value requires a mutex to change. We skip the mutex on + ** reading because (1) most platforms read a 32-bit integer atomically and + ** (2) even if an incorrect value is read, no great harm is done since this + ** is really just an optimization. */ + int bUnderPressure; /* True if low on PAGECACHE memory */ +} pcache1_g; + +/* +** All code in this file should access the global structure above via the +** alias "pcache1". This ensures that the WSD emulation is used when +** compiling for systems that do not support real WSD. +*/ +#define pcache1 (GLOBAL(struct PCacheGlobal, pcache1_g)) + +/* +** Macros to enter and leave the PCache LRU mutex. +*/ +#if !defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) || SQLITE_THREADSAFE==0 +# define pcache1EnterMutex(X) assert((X)->mutex==0) +# define pcache1LeaveMutex(X) assert((X)->mutex==0) +# define PCACHE1_MIGHT_USE_GROUP_MUTEX 0 +#else +# define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex) +# define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex) +# define PCACHE1_MIGHT_USE_GROUP_MUTEX 1 +#endif + +/******************************************************************************/ +/******** Page Allocation/SQLITE_CONFIG_PCACHE Related Functions **************/ + + +/* +** This function is called during initialization if a static buffer is +** supplied to use for the page-cache by passing the SQLITE_CONFIG_PAGECACHE +** verb to sqlite3_config(). Parameter pBuf points to an allocation large +** enough to contain 'n' buffers of 'sz' bytes each. +** +** This routine is called from sqlite3_initialize() and so it is guaranteed +** to be serialized already. There is no need for further mutexing. +*/ +SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *pBuf, int sz, int n){ + if( pcache1.isInit ){ + PgFreeslot *p; + if( pBuf==0 ) sz = n = 0; + sz = ROUNDDOWN8(sz); + pcache1.szSlot = sz; + pcache1.nSlot = pcache1.nFreeSlot = n; + pcache1.nReserve = n>90 ? 10 : (n/10 + 1); + pcache1.pStart = pBuf; + pcache1.pFree = 0; + pcache1.bUnderPressure = 0; + while( n-- ){ + p = (PgFreeslot*)pBuf; + p->pNext = pcache1.pFree; + pcache1.pFree = p; + pBuf = (void*)&((char*)pBuf)[sz]; + } + pcache1.pEnd = pBuf; + } +} + +/* +** Try to initialize the pCache->pFree and pCache->pBulk fields. Return +** true if pCache->pFree ends up containing one or more free pages. +*/ +static int pcache1InitBulk(PCache1 *pCache){ + i64 szBulk; + char *zBulk; + if( pcache1.nInitPage==0 ) return 0; + /* Do not bother with a bulk allocation if the cache size very small */ + if( pCache->nMax<3 ) return 0; + if( pcache1.nInitPage>0 ){ + szBulk = pCache->szAlloc * (i64)pcache1.nInitPage; + }else{ + szBulk = -1024 * (i64)pcache1.nInitPage; + } + if( szBulk > pCache->szAlloc*(i64)pCache->nMax ){ + szBulk = pCache->szAlloc*pCache->nMax; + } + zBulk = pCache->pBulk = sqlite3Malloc( szBulk ); + if( zBulk ){ + int nBulk = sqlite3MallocSize(zBulk)/pCache->szAlloc; + int i; + for(i=0; i<nBulk; i++){ + PgHdr1 *pX = (PgHdr1*)&zBulk[pCache->szPage]; + pX->page.pBuf = zBulk; + pX->page.pExtra = &pX[1]; + pX->isBulkLocal = 1; + pX->isAnchor = 0; + pX->pNext = pCache->pFree; + pCache->pFree = pX; + zBulk += pCache->szAlloc; + } + } + return pCache->pFree!=0; +} + +/* +** Malloc function used within this file to allocate space from the buffer +** configured using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no +** such buffer exists or there is no space left in it, this function falls +** back to sqlite3Malloc(). +** +** Multiple threads can run this routine at the same time. Global variables +** in pcache1 need to be protected via mutex. +*/ +static void *pcache1Alloc(int nByte){ + void *p = 0; + assert( sqlite3_mutex_notheld(pcache1.grp.mutex) ); + if( nByte<=pcache1.szSlot ){ + sqlite3_mutex_enter(pcache1.mutex); + p = (PgHdr1 *)pcache1.pFree; + if( p ){ + pcache1.pFree = pcache1.pFree->pNext; + pcache1.nFreeSlot--; + pcache1.bUnderPressure = pcache1.nFreeSlot<pcache1.nReserve; + assert( pcache1.nFreeSlot>=0 ); + sqlite3StatusSet(SQLITE_STATUS_PAGECACHE_SIZE, nByte); + sqlite3StatusUp(SQLITE_STATUS_PAGECACHE_USED, 1); + } + sqlite3_mutex_leave(pcache1.mutex); + } + if( p==0 ){ + /* Memory is not available in the SQLITE_CONFIG_PAGECACHE pool. Get + ** it from sqlite3Malloc instead. + */ + p = sqlite3Malloc(nByte); +#ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS + if( p ){ + int sz = sqlite3MallocSize(p); + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusSet(SQLITE_STATUS_PAGECACHE_SIZE, nByte); + sqlite3StatusUp(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz); + sqlite3_mutex_leave(pcache1.mutex); + } +#endif + sqlite3MemdebugSetType(p, MEMTYPE_PCACHE); + } + return p; +} + +/* +** Free an allocated buffer obtained from pcache1Alloc(). +*/ +static void pcache1Free(void *p){ + int nFreed = 0; + if( p==0 ) return; + if( p>=pcache1.pStart && p<pcache1.pEnd ){ + PgFreeslot *pSlot; + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusDown(SQLITE_STATUS_PAGECACHE_USED, 1); + pSlot = (PgFreeslot*)p; + pSlot->pNext = pcache1.pFree; + pcache1.pFree = pSlot; + pcache1.nFreeSlot++; + pcache1.bUnderPressure = pcache1.nFreeSlot<pcache1.nReserve; + assert( pcache1.nFreeSlot<=pcache1.nSlot ); + sqlite3_mutex_leave(pcache1.mutex); + }else{ + assert( sqlite3MemdebugHasType(p, MEMTYPE_PCACHE) ); + sqlite3MemdebugSetType(p, MEMTYPE_HEAP); +#ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS + nFreed = sqlite3MallocSize(p); + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusDown(SQLITE_STATUS_PAGECACHE_OVERFLOW, nFreed); + sqlite3_mutex_leave(pcache1.mutex); +#endif + sqlite3_free(p); + } +} + +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT +/* +** Return the size of a pcache allocation +*/ +static int pcache1MemSize(void *p){ + if( p>=pcache1.pStart && p<pcache1.pEnd ){ + return pcache1.szSlot; + }else{ + int iSize; + assert( sqlite3MemdebugHasType(p, MEMTYPE_PCACHE) ); + sqlite3MemdebugSetType(p, MEMTYPE_HEAP); + iSize = sqlite3MallocSize(p); + sqlite3MemdebugSetType(p, MEMTYPE_PCACHE); + return iSize; + } +} +#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */ + +/* +** Allocate a new page object initially associated with cache pCache. +*/ +static PgHdr1 *pcache1AllocPage(PCache1 *pCache, int benignMalloc){ + PgHdr1 *p = 0; + void *pPg; + + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); + if( pCache->pFree || (pCache->nPage==0 && pcache1InitBulk(pCache)) ){ + p = pCache->pFree; + pCache->pFree = p->pNext; + p->pNext = 0; + }else{ +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + /* The group mutex must be released before pcache1Alloc() is called. This + ** is because it might call sqlite3_release_memory(), which assumes that + ** this mutex is not held. */ + assert( pcache1.separateCache==0 ); + assert( pCache->pGroup==&pcache1.grp ); + pcache1LeaveMutex(pCache->pGroup); +#endif +#ifdef SQLITE_PCACHE_SEPARATE_HEADER + pPg = pcache1Alloc(pCache->szPage); + p = sqlite3Malloc(sizeof(PgHdr1) + pCache->szExtra); + if( !pPg || !p ){ + pcache1Free(pPg); + sqlite3_free(p); + pPg = 0; + } +#else + pPg = pcache1Alloc(pCache->szAlloc); + p = (PgHdr1 *)&((u8 *)pPg)[pCache->szPage]; +#endif +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + pcache1EnterMutex(pCache->pGroup); +#endif + if( pPg==0 ) return 0; + p->page.pBuf = pPg; + p->page.pExtra = &p[1]; + p->isBulkLocal = 0; + p->isAnchor = 0; + } + if( pCache->bPurgeable ){ + pCache->pGroup->nCurrentPage++; + } + return p; +} + +/* +** Free a page object allocated by pcache1AllocPage(). +*/ +static void pcache1FreePage(PgHdr1 *p){ + PCache1 *pCache; + assert( p!=0 ); + pCache = p->pCache; + assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) ); + if( p->isBulkLocal ){ + p->pNext = pCache->pFree; + pCache->pFree = p; + }else{ + pcache1Free(p->page.pBuf); +#ifdef SQLITE_PCACHE_SEPARATE_HEADER + sqlite3_free(p); +#endif + } + if( pCache->bPurgeable ){ + pCache->pGroup->nCurrentPage--; + } +} + +/* +** Malloc function used by SQLite to obtain space from the buffer configured +** using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no such buffer +** exists, this function falls back to sqlite3Malloc(). +*/ +SQLITE_PRIVATE void *sqlite3PageMalloc(int sz){ + return pcache1Alloc(sz); +} + +/* +** Free an allocated buffer obtained from sqlite3PageMalloc(). +*/ +SQLITE_PRIVATE void sqlite3PageFree(void *p){ + pcache1Free(p); +} + + +/* +** Return true if it desirable to avoid allocating a new page cache +** entry. +** +** If memory was allocated specifically to the page cache using +** SQLITE_CONFIG_PAGECACHE but that memory has all been used, then +** it is desirable to avoid allocating a new page cache entry because +** presumably SQLITE_CONFIG_PAGECACHE was suppose to be sufficient +** for all page cache needs and we should not need to spill the +** allocation onto the heap. +** +** Or, the heap is used for all page cache memory but the heap is +** under memory pressure, then again it is desirable to avoid +** allocating a new page cache entry in order to avoid stressing +** the heap even further. +*/ +static int pcache1UnderMemoryPressure(PCache1 *pCache){ + if( pcache1.nSlot && (pCache->szPage+pCache->szExtra)<=pcache1.szSlot ){ + return pcache1.bUnderPressure; + }else{ + return sqlite3HeapNearlyFull(); + } +} + +/******************************************************************************/ +/******** General Implementation Functions ************************************/ + +/* +** This function is used to resize the hash table used by the cache passed +** as the first argument. +** +** The PCache mutex must be held when this function is called. +*/ +static void pcache1ResizeHash(PCache1 *p){ + PgHdr1 **apNew; + unsigned int nNew; + unsigned int i; + + assert( sqlite3_mutex_held(p->pGroup->mutex) ); + + nNew = p->nHash*2; + if( nNew<256 ){ + nNew = 256; + } + + pcache1LeaveMutex(p->pGroup); + apNew = (PgHdr1 **)sqlite3MallocZero(sizeof(PgHdr1 *)*nNew); + pcache1EnterMutex(p->pGroup); + if( apNew ){ + for(i=0; i<p->nHash; i++){ + PgHdr1 *pPage; + PgHdr1 *pNext = p->apHash[i]; + while( (pPage = pNext)!=0 ){ + unsigned int h = pPage->iKey % nNew; + pNext = pPage->pNext; + pPage->pNext = apNew[h]; + apNew[h] = pPage; + } + } + sqlite3_free(p->apHash); + p->apHash = apNew; + p->nHash = nNew; + } +} + +/* +** This function is used internally to remove the page pPage from the +** PGroup LRU list, if is part of it. If pPage is not part of the PGroup +** LRU list, then this function is a no-op. +** +** The PGroup mutex must be held when this function is called. +*/ +static PgHdr1 *pcache1PinPage(PgHdr1 *pPage){ + PCache1 *pCache; + + assert( pPage!=0 ); + assert( pPage->isPinned==0 ); + pCache = pPage->pCache; + assert( pPage->pLruNext ); + assert( pPage->pLruPrev ); + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); + pPage->pLruPrev->pLruNext = pPage->pLruNext; + pPage->pLruNext->pLruPrev = pPage->pLruPrev; + pPage->pLruNext = 0; + pPage->pLruPrev = 0; + pPage->isPinned = 1; + assert( pPage->isAnchor==0 ); + assert( pCache->pGroup->lru.isAnchor==1 ); + pCache->nRecyclable--; + return pPage; +} + + +/* +** Remove the page supplied as an argument from the hash table +** (PCache1.apHash structure) that it is currently stored in. +** Also free the page if freePage is true. +** +** The PGroup mutex must be held when this function is called. +*/ +static void pcache1RemoveFromHash(PgHdr1 *pPage, int freeFlag){ + unsigned int h; + PCache1 *pCache = pPage->pCache; + PgHdr1 **pp; + + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); + h = pPage->iKey % pCache->nHash; + for(pp=&pCache->apHash[h]; (*pp)!=pPage; pp=&(*pp)->pNext); + *pp = (*pp)->pNext; + + pCache->nPage--; + if( freeFlag ) pcache1FreePage(pPage); +} + +/* +** If there are currently more than nMaxPage pages allocated, try +** to recycle pages to reduce the number allocated to nMaxPage. +*/ +static void pcache1EnforceMaxPage(PCache1 *pCache){ + PGroup *pGroup = pCache->pGroup; + PgHdr1 *p; + assert( sqlite3_mutex_held(pGroup->mutex) ); + while( pGroup->nCurrentPage>pGroup->nMaxPage + && (p=pGroup->lru.pLruPrev)->isAnchor==0 + ){ + assert( p->pCache->pGroup==pGroup ); + assert( p->isPinned==0 ); + pcache1PinPage(p); + pcache1RemoveFromHash(p, 1); + } + if( pCache->nPage==0 && pCache->pBulk ){ + sqlite3_free(pCache->pBulk); + pCache->pBulk = pCache->pFree = 0; + } +} + +/* +** Discard all pages from cache pCache with a page number (key value) +** greater than or equal to iLimit. Any pinned pages that meet this +** criteria are unpinned before they are discarded. +** +** The PCache mutex must be held when this function is called. +*/ +static void pcache1TruncateUnsafe( + PCache1 *pCache, /* The cache to truncate */ + unsigned int iLimit /* Drop pages with this pgno or larger */ +){ + TESTONLY( unsigned int nPage = 0; ) /* To assert pCache->nPage is correct */ + unsigned int h; + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); + for(h=0; h<pCache->nHash; h++){ + PgHdr1 **pp = &pCache->apHash[h]; + PgHdr1 *pPage; + while( (pPage = *pp)!=0 ){ + if( pPage->iKey>=iLimit ){ + pCache->nPage--; + *pp = pPage->pNext; + if( !pPage->isPinned ) pcache1PinPage(pPage); + pcache1FreePage(pPage); + }else{ + pp = &pPage->pNext; + TESTONLY( nPage++; ) + } + } + } + assert( pCache->nPage==nPage ); +} + +/******************************************************************************/ +/******** sqlite3_pcache Methods **********************************************/ + +/* +** Implementation of the sqlite3_pcache.xInit method. +*/ +static int pcache1Init(void *NotUsed){ + UNUSED_PARAMETER(NotUsed); + assert( pcache1.isInit==0 ); + memset(&pcache1, 0, sizeof(pcache1)); + + + /* + ** The pcache1.separateCache variable is true if each PCache has its own + ** private PGroup (mode-1). pcache1.separateCache is false if the single + ** PGroup in pcache1.grp is used for all page caches (mode-2). + ** + ** * Always use a unified cache (mode-2) if ENABLE_MEMORY_MANAGEMENT + ** + ** * Use a unified cache in single-threaded applications that have + ** configured a start-time buffer for use as page-cache memory using + ** sqlite3_config(SQLITE_CONFIG_PAGECACHE, pBuf, sz, N) with non-NULL + ** pBuf argument. + ** + ** * Otherwise use separate caches (mode-1) + */ +#if defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) + pcache1.separateCache = 0; +#elif SQLITE_THREADSAFE + pcache1.separateCache = sqlite3GlobalConfig.pPage==0 + || sqlite3GlobalConfig.bCoreMutex>0; +#else + pcache1.separateCache = sqlite3GlobalConfig.pPage==0; +#endif + +#if SQLITE_THREADSAFE + if( sqlite3GlobalConfig.bCoreMutex ){ + pcache1.grp.mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU); + pcache1.mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_PMEM); + } +#endif + if( pcache1.separateCache + && sqlite3GlobalConfig.nPage!=0 + && sqlite3GlobalConfig.pPage==0 + ){ + pcache1.nInitPage = sqlite3GlobalConfig.nPage; + }else{ + pcache1.nInitPage = 0; + } + pcache1.grp.mxPinned = 10; + pcache1.isInit = 1; + return SQLITE_OK; +} + +/* +** Implementation of the sqlite3_pcache.xShutdown method. +** Note that the static mutex allocated in xInit does +** not need to be freed. +*/ +static void pcache1Shutdown(void *NotUsed){ + UNUSED_PARAMETER(NotUsed); + assert( pcache1.isInit!=0 ); + memset(&pcache1, 0, sizeof(pcache1)); +} + +/* forward declaration */ +static void pcache1Destroy(sqlite3_pcache *p); + +/* +** Implementation of the sqlite3_pcache.xCreate method. +** +** Allocate a new cache. +*/ +static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable){ + PCache1 *pCache; /* The newly created page cache */ + PGroup *pGroup; /* The group the new page cache will belong to */ + int sz; /* Bytes of memory required to allocate the new cache */ + + assert( (szPage & (szPage-1))==0 && szPage>=512 && szPage<=65536 ); + assert( szExtra < 300 ); + + sz = sizeof(PCache1) + sizeof(PGroup)*pcache1.separateCache; + pCache = (PCache1 *)sqlite3MallocZero(sz); + if( pCache ){ + if( pcache1.separateCache ){ + pGroup = (PGroup*)&pCache[1]; + pGroup->mxPinned = 10; + }else{ + pGroup = &pcache1.grp; + } + if( pGroup->lru.isAnchor==0 ){ + pGroup->lru.isAnchor = 1; + pGroup->lru.pLruPrev = pGroup->lru.pLruNext = &pGroup->lru; + } + pCache->pGroup = pGroup; + pCache->szPage = szPage; + pCache->szExtra = szExtra; + pCache->szAlloc = szPage + szExtra + ROUND8(sizeof(PgHdr1)); + pCache->bPurgeable = (bPurgeable ? 1 : 0); + pcache1EnterMutex(pGroup); + pcache1ResizeHash(pCache); + if( bPurgeable ){ + pCache->nMin = 10; + pGroup->nMinPage += pCache->nMin; + pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; + } + pcache1LeaveMutex(pGroup); + if( pCache->nHash==0 ){ + pcache1Destroy((sqlite3_pcache*)pCache); + pCache = 0; + } + } + return (sqlite3_pcache *)pCache; +} + +/* +** Implementation of the sqlite3_pcache.xCachesize method. +** +** Configure the cache_size limit for a cache. +*/ +static void pcache1Cachesize(sqlite3_pcache *p, int nMax){ + PCache1 *pCache = (PCache1 *)p; + if( pCache->bPurgeable ){ + PGroup *pGroup = pCache->pGroup; + pcache1EnterMutex(pGroup); + pGroup->nMaxPage += (nMax - pCache->nMax); + pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; + pCache->nMax = nMax; + pCache->n90pct = pCache->nMax*9/10; + pcache1EnforceMaxPage(pCache); + pcache1LeaveMutex(pGroup); + } +} + +/* +** Implementation of the sqlite3_pcache.xShrink method. +** +** Free up as much memory as possible. +*/ +static void pcache1Shrink(sqlite3_pcache *p){ + PCache1 *pCache = (PCache1*)p; + if( pCache->bPurgeable ){ + PGroup *pGroup = pCache->pGroup; + int savedMaxPage; + pcache1EnterMutex(pGroup); + savedMaxPage = pGroup->nMaxPage; + pGroup->nMaxPage = 0; + pcache1EnforceMaxPage(pCache); + pGroup->nMaxPage = savedMaxPage; + pcache1LeaveMutex(pGroup); + } +} + +/* +** Implementation of the sqlite3_pcache.xPagecount method. +*/ +static int pcache1Pagecount(sqlite3_pcache *p){ + int n; + PCache1 *pCache = (PCache1*)p; + pcache1EnterMutex(pCache->pGroup); + n = pCache->nPage; + pcache1LeaveMutex(pCache->pGroup); + return n; +} + + +/* +** Implement steps 3, 4, and 5 of the pcache1Fetch() algorithm described +** in the header of the pcache1Fetch() procedure. +** +** This steps are broken out into a separate procedure because they are +** usually not needed, and by avoiding the stack initialization required +** for these steps, the main pcache1Fetch() procedure can run faster. +*/ +static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( + PCache1 *pCache, + unsigned int iKey, + int createFlag +){ + unsigned int nPinned; + PGroup *pGroup = pCache->pGroup; + PgHdr1 *pPage = 0; + + /* Step 3: Abort if createFlag is 1 but the cache is nearly full */ + assert( pCache->nPage >= pCache->nRecyclable ); + nPinned = pCache->nPage - pCache->nRecyclable; + assert( pGroup->mxPinned == pGroup->nMaxPage + 10 - pGroup->nMinPage ); + assert( pCache->n90pct == pCache->nMax*9/10 ); + if( createFlag==1 && ( + nPinned>=pGroup->mxPinned + || nPinned>=pCache->n90pct + || (pcache1UnderMemoryPressure(pCache) && pCache->nRecyclable<nPinned) + )){ + return 0; + } + + if( pCache->nPage>=pCache->nHash ) pcache1ResizeHash(pCache); + assert( pCache->nHash>0 && pCache->apHash ); + + /* Step 4. Try to recycle a page. */ + if( pCache->bPurgeable + && !pGroup->lru.pLruPrev->isAnchor + && ((pCache->nPage+1>=pCache->nMax) || pcache1UnderMemoryPressure(pCache)) + ){ + PCache1 *pOther; + pPage = pGroup->lru.pLruPrev; + assert( pPage->isPinned==0 ); + pcache1RemoveFromHash(pPage, 0); + pcache1PinPage(pPage); + pOther = pPage->pCache; + if( pOther->szAlloc != pCache->szAlloc ){ + pcache1FreePage(pPage); + pPage = 0; + }else{ + pGroup->nCurrentPage -= (pOther->bPurgeable - pCache->bPurgeable); + } + } + + /* Step 5. If a usable page buffer has still not been found, + ** attempt to allocate a new one. + */ + if( !pPage ){ + pPage = pcache1AllocPage(pCache, createFlag==1); + } + + if( pPage ){ + unsigned int h = iKey % pCache->nHash; + pCache->nPage++; + pPage->iKey = iKey; + pPage->pNext = pCache->apHash[h]; + pPage->pCache = pCache; + pPage->pLruPrev = 0; + pPage->pLruNext = 0; + pPage->isPinned = 1; + *(void **)pPage->page.pExtra = 0; + pCache->apHash[h] = pPage; + if( iKey>pCache->iMaxKey ){ + pCache->iMaxKey = iKey; + } + } + return pPage; +} + +/* +** Implementation of the sqlite3_pcache.xFetch method. +** +** Fetch a page by key value. +** +** Whether or not a new page may be allocated by this function depends on +** the value of the createFlag argument. 0 means do not allocate a new +** page. 1 means allocate a new page if space is easily available. 2 +** means to try really hard to allocate a new page. +** +** For a non-purgeable cache (a cache used as the storage for an in-memory +** database) there is really no difference between createFlag 1 and 2. So +** the calling function (pcache.c) will never have a createFlag of 1 on +** a non-purgeable cache. +** +** There are three different approaches to obtaining space for a page, +** depending on the value of parameter createFlag (which may be 0, 1 or 2). +** +** 1. Regardless of the value of createFlag, the cache is searched for a +** copy of the requested page. If one is found, it is returned. +** +** 2. If createFlag==0 and the page is not already in the cache, NULL is +** returned. +** +** 3. If createFlag is 1, and the page is not already in the cache, then +** return NULL (do not allocate a new page) if any of the following +** conditions are true: +** +** (a) the number of pages pinned by the cache is greater than +** PCache1.nMax, or +** +** (b) the number of pages pinned by the cache is greater than +** the sum of nMax for all purgeable caches, less the sum of +** nMin for all other purgeable caches, or +** +** 4. If none of the first three conditions apply and the cache is marked +** as purgeable, and if one of the following is true: +** +** (a) The number of pages allocated for the cache is already +** PCache1.nMax, or +** +** (b) The number of pages allocated for all purgeable caches is +** already equal to or greater than the sum of nMax for all +** purgeable caches, +** +** (c) The system is under memory pressure and wants to avoid +** unnecessary pages cache entry allocations +** +** then attempt to recycle a page from the LRU list. If it is the right +** size, return the recycled buffer. Otherwise, free the buffer and +** proceed to step 5. +** +** 5. Otherwise, allocate and return a new page buffer. +** +** There are two versions of this routine. pcache1FetchWithMutex() is +** the general case. pcache1FetchNoMutex() is a faster implementation for +** the common case where pGroup->mutex is NULL. The pcache1Fetch() wrapper +** invokes the appropriate routine. +*/ +static PgHdr1 *pcache1FetchNoMutex( + sqlite3_pcache *p, + unsigned int iKey, + int createFlag +){ + PCache1 *pCache = (PCache1 *)p; + PgHdr1 *pPage = 0; + + /* Step 1: Search the hash table for an existing entry. */ + pPage = pCache->apHash[iKey % pCache->nHash]; + while( pPage && pPage->iKey!=iKey ){ pPage = pPage->pNext; } + + /* Step 2: If the page was found in the hash table, then return it. + ** If the page was not in the hash table and createFlag is 0, abort. + ** Otherwise (page not in hash and createFlag!=0) continue with + ** subsequent steps to try to create the page. */ + if( pPage ){ + if( !pPage->isPinned ){ + return pcache1PinPage(pPage); + }else{ + return pPage; + } + }else if( createFlag ){ + /* Steps 3, 4, and 5 implemented by this subroutine */ + return pcache1FetchStage2(pCache, iKey, createFlag); + }else{ + return 0; + } +} +#if PCACHE1_MIGHT_USE_GROUP_MUTEX +static PgHdr1 *pcache1FetchWithMutex( + sqlite3_pcache *p, + unsigned int iKey, + int createFlag +){ + PCache1 *pCache = (PCache1 *)p; + PgHdr1 *pPage; + + pcache1EnterMutex(pCache->pGroup); + pPage = pcache1FetchNoMutex(p, iKey, createFlag); + assert( pPage==0 || pCache->iMaxKey>=iKey ); + pcache1LeaveMutex(pCache->pGroup); + return pPage; +} +#endif +static sqlite3_pcache_page *pcache1Fetch( + sqlite3_pcache *p, + unsigned int iKey, + int createFlag +){ +#if PCACHE1_MIGHT_USE_GROUP_MUTEX || defined(SQLITE_DEBUG) + PCache1 *pCache = (PCache1 *)p; +#endif + + assert( offsetof(PgHdr1,page)==0 ); + assert( pCache->bPurgeable || createFlag!=1 ); + assert( pCache->bPurgeable || pCache->nMin==0 ); + assert( pCache->bPurgeable==0 || pCache->nMin==10 ); + assert( pCache->nMin==0 || pCache->bPurgeable ); + assert( pCache->nHash>0 ); +#if PCACHE1_MIGHT_USE_GROUP_MUTEX + if( pCache->pGroup->mutex ){ + return (sqlite3_pcache_page*)pcache1FetchWithMutex(p, iKey, createFlag); + }else +#endif + { + return (sqlite3_pcache_page*)pcache1FetchNoMutex(p, iKey, createFlag); + } +} + + +/* +** Implementation of the sqlite3_pcache.xUnpin method. +** +** Mark a page as unpinned (eligible for asynchronous recycling). +*/ +static void pcache1Unpin( + sqlite3_pcache *p, + sqlite3_pcache_page *pPg, + int reuseUnlikely +){ + PCache1 *pCache = (PCache1 *)p; + PgHdr1 *pPage = (PgHdr1 *)pPg; + PGroup *pGroup = pCache->pGroup; + + assert( pPage->pCache==pCache ); + pcache1EnterMutex(pGroup); + + /* It is an error to call this function if the page is already + ** part of the PGroup LRU list. + */ + assert( pPage->pLruPrev==0 && pPage->pLruNext==0 ); + assert( pPage->isPinned==1 ); + + if( reuseUnlikely || pGroup->nCurrentPage>pGroup->nMaxPage ){ + pcache1RemoveFromHash(pPage, 1); + }else{ + /* Add the page to the PGroup LRU list. */ + PgHdr1 **ppFirst = &pGroup->lru.pLruNext; + pPage->pLruPrev = &pGroup->lru; + (pPage->pLruNext = *ppFirst)->pLruPrev = pPage; + *ppFirst = pPage; + pCache->nRecyclable++; + pPage->isPinned = 0; + } + + pcache1LeaveMutex(pCache->pGroup); +} + +/* +** Implementation of the sqlite3_pcache.xRekey method. +*/ +static void pcache1Rekey( + sqlite3_pcache *p, + sqlite3_pcache_page *pPg, + unsigned int iOld, + unsigned int iNew +){ + PCache1 *pCache = (PCache1 *)p; + PgHdr1 *pPage = (PgHdr1 *)pPg; + PgHdr1 **pp; + unsigned int h; + assert( pPage->iKey==iOld ); + assert( pPage->pCache==pCache ); + + pcache1EnterMutex(pCache->pGroup); + + h = iOld%pCache->nHash; + pp = &pCache->apHash[h]; + while( (*pp)!=pPage ){ + pp = &(*pp)->pNext; + } + *pp = pPage->pNext; + + h = iNew%pCache->nHash; + pPage->iKey = iNew; + pPage->pNext = pCache->apHash[h]; + pCache->apHash[h] = pPage; + if( iNew>pCache->iMaxKey ){ + pCache->iMaxKey = iNew; + } + + pcache1LeaveMutex(pCache->pGroup); +} + +/* +** Implementation of the sqlite3_pcache.xTruncate method. +** +** Discard all unpinned pages in the cache with a page number equal to +** or greater than parameter iLimit. Any pinned pages with a page number +** equal to or greater than iLimit are implicitly unpinned. +*/ +static void pcache1Truncate(sqlite3_pcache *p, unsigned int iLimit){ + PCache1 *pCache = (PCache1 *)p; + pcache1EnterMutex(pCache->pGroup); + if( iLimit<=pCache->iMaxKey ){ + pcache1TruncateUnsafe(pCache, iLimit); + pCache->iMaxKey = iLimit-1; + } + pcache1LeaveMutex(pCache->pGroup); +} + +/* +** Implementation of the sqlite3_pcache.xDestroy method. +** +** Destroy a cache allocated using pcache1Create(). +*/ +static void pcache1Destroy(sqlite3_pcache *p){ + PCache1 *pCache = (PCache1 *)p; + PGroup *pGroup = pCache->pGroup; + assert( pCache->bPurgeable || (pCache->nMax==0 && pCache->nMin==0) ); + pcache1EnterMutex(pGroup); + pcache1TruncateUnsafe(pCache, 0); + assert( pGroup->nMaxPage >= pCache->nMax ); + pGroup->nMaxPage -= pCache->nMax; + assert( pGroup->nMinPage >= pCache->nMin ); + pGroup->nMinPage -= pCache->nMin; + pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; + pcache1EnforceMaxPage(pCache); + pcache1LeaveMutex(pGroup); + sqlite3_free(pCache->pBulk); + sqlite3_free(pCache->apHash); + sqlite3_free(pCache); +} + +/* +** This function is called during initialization (sqlite3BtreeInitialize()) to +** install the default pluggable cache module, assuming the user has not +** already provided an alternative. +*/ +SQLITE_PRIVATE void sqlite3PCacheSetDefault(void){ + static const sqlite3_pcache_methods2 defaultMethods = { + 1, /* iVersion */ + 0, /* pArg */ + pcache1Init, /* xInit */ + pcache1Shutdown, /* xShutdown */ + pcache1Create, /* xCreate */ + pcache1Cachesize, /* xCachesize */ + pcache1Pagecount, /* xPagecount */ + pcache1Fetch, /* xFetch */ + pcache1Unpin, /* xUnpin */ + pcache1Rekey, /* xRekey */ + pcache1Truncate, /* xTruncate */ + pcache1Destroy, /* xDestroy */ + pcache1Shrink /* xShrink */ + }; + sqlite3_config(SQLITE_CONFIG_PCACHE2, &defaultMethods); +} + +/* +** Return the size of the header on each page of this PCACHE implementation. +*/ +SQLITE_PRIVATE int sqlite3HeaderSizePcache1(void){ return ROUND8(sizeof(PgHdr1)); } + +/* +** Return the global mutex used by this PCACHE implementation. The +** sqlite3_status() routine needs access to this mutex. +*/ +#ifndef NDEBUG +SQLITE_PRIVATE sqlite3_mutex *sqlite3Pcache1Mutex(void){ + return pcache1.mutex; +} +#endif + +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT +/* +** This function is called to free superfluous dynamically allocated memory +** held by the pager system. Memory in use by any SQLite pager allocated +** by the current thread may be sqlite3_free()ed. +** +** nReq is the number of bytes of memory required. Once this much has +** been released, the function returns. The return value is the total number +** of bytes of memory released. +*/ +SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int nReq){ + int nFree = 0; + assert( sqlite3_mutex_notheld(pcache1.grp.mutex) ); + assert( sqlite3_mutex_notheld(pcache1.mutex) ); + if( sqlite3GlobalConfig.nPage==0 ){ + PgHdr1 *p; + pcache1EnterMutex(&pcache1.grp); + while( (nReq<0 || nFree<nReq) + && (p=pcache1.grp.lru.pLruPrev)!=0 + && p->isAnchor==0 + ){ + nFree += pcache1MemSize(p->page.pBuf); +#ifdef SQLITE_PCACHE_SEPARATE_HEADER + nFree += sqlite3MemSize(p); +#endif + assert( p->isPinned==0 ); + pcache1PinPage(p); + pcache1RemoveFromHash(p, 1); + } + pcache1LeaveMutex(&pcache1.grp); + } + return nFree; +} +#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */ + +#ifdef SQLITE_TEST +/* +** This function is used by test procedures to inspect the internal state +** of the global cache. +*/ +SQLITE_PRIVATE void sqlite3PcacheStats( + int *pnCurrent, /* OUT: Total number of pages cached */ + int *pnMax, /* OUT: Global maximum cache size */ + int *pnMin, /* OUT: Sum of PCache1.nMin for purgeable caches */ + int *pnRecyclable /* OUT: Total number of pages available for recycling */ +){ + PgHdr1 *p; + int nRecyclable = 0; + for(p=pcache1.grp.lru.pLruNext; p && !p->isAnchor; p=p->pLruNext){ + assert( p->isPinned==0 ); + nRecyclable++; + } + *pnCurrent = pcache1.grp.nCurrentPage; + *pnMax = (int)pcache1.grp.nMaxPage; + *pnMin = (int)pcache1.grp.nMinPage; + *pnRecyclable = nRecyclable; +} +#endif + +/************** End of pcache1.c *********************************************/ +/************** Begin file pager.c *******************************************/ +/* +** 2001 September 15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the implementation of the page cache subsystem or "pager". +** +** The pager is used to access a database disk file. It implements +** atomic commit and rollback through the use of a journal file that +** is separate from the database file. The pager also implements file +** locking to prevent two processes from writing the same database +** file simultaneously, or one process from reading the database while +** another is writing. +*/ +#ifndef SQLITE_OMIT_DISKIO +/* #include "sqliteInt.h" */ +/************** Include wal.h in the middle of pager.c ***********************/ +/************** Begin file wal.h *********************************************/ +/* +** 2010 February 1 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the interface to the write-ahead logging +** system. Refer to the comments below and the header comment attached to +** the implementation of each function in log.c for further details. +*/ + +#ifndef _WAL_H_ +#define _WAL_H_ + +/* #include "sqliteInt.h" */ + +/* Additional values that can be added to the sync_flags argument of +** sqlite3WalFrames(): +*/ +#define WAL_SYNC_TRANSACTIONS 0x20 /* Sync at the end of each transaction */ +#define SQLITE_SYNC_MASK 0x13 /* Mask off the SQLITE_SYNC_* values */ + +#ifdef SQLITE_OMIT_WAL +# define sqlite3WalOpen(x,y,z) 0 +#if 0 +# define sqlite3WalLimit(x,y) +#endif +# define sqlite3WalClose(w,x,y,z) 0 +# define sqlite3WalBeginReadTransaction(y,z) 0 +# define sqlite3WalEndReadTransaction(z) +# define sqlite3WalDbsize(y) 0 +# define sqlite3WalBeginWriteTransaction(y) 0 +# define sqlite3WalEndWriteTransaction(x) 0 +# define sqlite3WalUndo(x,y,z) 0 +# define sqlite3WalSavepoint(y,z) +# define sqlite3WalSavepointUndo(y,z) 0 +# define sqlite3WalFrames(u,v,w,x,y,z) 0 +# define sqlite3WalCheckpoint(r,s,t,u,v,w,x,y,z) 0 +#if 0 +# define sqlite3WalCallback(z) 0 +#endif +# define sqlite3WalExclusiveMode(y,z) 0 +#if 0 +# define sqlite3WalHeapMemory(z) 0 +#endif +# define sqlite3WalFramesize(z) 0 +# define sqlite3WalFindFrame(x,y,z) 0 +#else + +#define WAL_SAVEPOINT_NDATA 4 + +/* Connection to a write-ahead log (WAL) file. +** There is one object of this type for each pager. +*/ +typedef struct Wal Wal; + +/* Open and close a connection to a write-ahead log. */ +SQLITE_PRIVATE int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *, int, i64, Wal**); +SQLITE_PRIVATE int sqlite3WalClose(Wal *pWal, int sync_flags, int, u8 *); + +#if 0 +/* Set the limiting size of a WAL file. */ +SQLITE_PRIVATE void sqlite3WalLimit(Wal*, i64); +#endif + +/* Used by readers to open (lock) and close (unlock) a snapshot. A +** snapshot is like a read-transaction. It is the state of the database +** at an instant in time. sqlite3WalOpenSnapshot gets a read lock and +** preserves the current state even if the other threads or processes +** write to or checkpoint the WAL. sqlite3WalCloseSnapshot() closes the +** transaction and releases the lock. +*/ +SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *); +SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal); + +/* Read a page from the write-ahead log, if it is present. */ +SQLITE_PRIVATE int sqlite3WalFindFrame(Wal *, Pgno, u32 *); +SQLITE_PRIVATE int sqlite3WalReadFrame(Wal *, u32, int, u8 *); + +/* If the WAL is not empty, return the size of the database. */ +SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal); + +/* Obtain or release the WRITER lock. */ +SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal); +SQLITE_PRIVATE int sqlite3WalEndWriteTransaction(Wal *pWal); + +/* Undo any frames written (but not committed) to the log */ +SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx); + +/* Return an integer that records the current (uncommitted) write +** position in the WAL */ +SQLITE_PRIVATE void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData); + +/* Move the write position of the WAL back to iFrame. Called in +** response to a ROLLBACK TO command. */ +SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData); + +/* Write a frame or frames to the log. */ +SQLITE_PRIVATE int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int); + +/* Copy pages from the log to the database file */ +SQLITE_PRIVATE int sqlite3WalCheckpoint( + Wal *pWal, /* Write-ahead log connection */ + int eMode, /* One of PASSIVE, FULL and RESTART */ + int (*xBusy)(void*), /* Function to call when busy */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int sync_flags, /* Flags to sync db file with (or 0) */ + int nBuf, /* Size of buffer nBuf */ + u8 *zBuf, /* Temporary buffer to use */ + int *pnLog, /* OUT: Number of frames in WAL */ + int *pnCkpt /* OUT: Number of backfilled frames in WAL */ +); + +#if 0 +/* Return the value to pass to a sqlite3_wal_hook callback, the +** number of frames in the WAL at the point of the last commit since +** sqlite3WalCallback() was called. If no commits have occurred since +** the last call, then return 0. +*/ +SQLITE_PRIVATE int sqlite3WalCallback(Wal *pWal); +#endif + +/* Tell the wal layer that an EXCLUSIVE lock has been obtained (or released) +** by the pager layer on the database file. +*/ +SQLITE_PRIVATE int sqlite3WalExclusiveMode(Wal *pWal, int op); + +#if 0 +/* Return true if the argument is non-NULL and the WAL module is using +** heap-memory for the wal-index. Otherwise, if the argument is NULL or the +** WAL module is using shared-memory, return false. +*/ +SQLITE_PRIVATE int sqlite3WalHeapMemory(Wal *pWal); +#endif + +#ifdef SQLITE_ENABLE_ZIPVFS +/* If the WAL file is not empty, return the number of bytes of content +** stored in each frame (i.e. the db page-size when the WAL was created). +*/ +SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal); +#endif + +#endif /* ifndef SQLITE_OMIT_WAL */ +#endif /* _WAL_H_ */ + +/************** End of wal.h *************************************************/ +/************** Continuing where we left off in pager.c **********************/ + + +/******************* NOTES ON THE DESIGN OF THE PAGER ************************ +** +** This comment block describes invariants that hold when using a rollback +** journal. These invariants do not apply for journal_mode=WAL, +** journal_mode=MEMORY, or journal_mode=OFF. +** +** Within this comment block, a page is deemed to have been synced +** automatically as soon as it is written when PRAGMA synchronous=OFF. +** Otherwise, the page is not synced until the xSync method of the VFS +** is called successfully on the file containing the page. +** +** Definition: A page of the database file is said to be "overwriteable" if +** one or more of the following are true about the page: +** +** (a) The original content of the page as it was at the beginning of +** the transaction has been written into the rollback journal and +** synced. +** +** (b) The page was a freelist leaf page at the start of the transaction. +** +** (c) The page number is greater than the largest page that existed in +** the database file at the start of the transaction. +** +** (1) A page of the database file is never overwritten unless one of the +** following are true: +** +** (a) The page and all other pages on the same sector are overwriteable. +** +** (b) The atomic page write optimization is enabled, and the entire +** transaction other than the update of the transaction sequence +** number consists of a single page change. +** +** (2) The content of a page written into the rollback journal exactly matches +** both the content in the database when the rollback journal was written +** and the content in the database at the beginning of the current +** transaction. +** +** (3) Writes to the database file are an integer multiple of the page size +** in length and are aligned on a page boundary. +** +** (4) Reads from the database file are either aligned on a page boundary and +** an integer multiple of the page size in length or are taken from the +** first 100 bytes of the database file. +** +** (5) All writes to the database file are synced prior to the rollback journal +** being deleted, truncated, or zeroed. +** +** (6) If a master journal file is used, then all writes to the database file +** are synced prior to the master journal being deleted. +** +** Definition: Two databases (or the same database at two points it time) +** are said to be "logically equivalent" if they give the same answer to +** all queries. Note in particular the content of freelist leaf +** pages can be changed arbitrarily without affecting the logical equivalence +** of the database. +** +** (7) At any time, if any subset, including the empty set and the total set, +** of the unsynced changes to a rollback journal are removed and the +** journal is rolled back, the resulting database file will be logically +** equivalent to the database file at the beginning of the transaction. +** +** (8) When a transaction is rolled back, the xTruncate method of the VFS +** is called to restore the database file to the same size it was at +** the beginning of the transaction. (In some VFSes, the xTruncate +** method is a no-op, but that does not change the fact the SQLite will +** invoke it.) +** +** (9) Whenever the database file is modified, at least one bit in the range +** of bytes from 24 through 39 inclusive will be changed prior to releasing +** the EXCLUSIVE lock, thus signaling other connections on the same +** database to flush their caches. +** +** (10) The pattern of bits in bytes 24 through 39 shall not repeat in less +** than one billion transactions. +** +** (11) A database file is well-formed at the beginning and at the conclusion +** of every transaction. +** +** (12) An EXCLUSIVE lock is held on the database file when writing to +** the database file. +** +** (13) A SHARED lock is held on the database file while reading any +** content out of the database file. +** +******************************************************************************/ + +/* +** Macros for troubleshooting. Normally turned off +*/ +#if 0 +int sqlite3PagerTrace=1; /* True to enable tracing */ +#define sqlite3DebugPrintf printf +#define PAGERTRACE(X) if( sqlite3PagerTrace ){ sqlite3DebugPrintf X; } +#else +#define PAGERTRACE(X) +#endif + +/* +** The following two macros are used within the PAGERTRACE() macros above +** to print out file-descriptors. +** +** PAGERID() takes a pointer to a Pager struct as its argument. The +** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file +** struct as its argument. +*/ +#define PAGERID(p) ((int)(p->fd)) +#define FILEHANDLEID(fd) ((int)fd) + +/* +** The Pager.eState variable stores the current 'state' of a pager. A +** pager may be in any one of the seven states shown in the following +** state diagram. +** +** OPEN <------+------+ +** | | | +** V | | +** +---------> READER-------+ | +** | | | +** | V | +** |<-------WRITER_LOCKED------> ERROR +** | | ^ +** | V | +** |<------WRITER_CACHEMOD-------->| +** | | | +** | V | +** |<-------WRITER_DBMOD---------->| +** | | | +** | V | +** +<------WRITER_FINISHED-------->+ +** +** +** List of state transitions and the C [function] that performs each: +** +** OPEN -> READER [sqlite3PagerSharedLock] +** READER -> OPEN [pager_unlock] +** +** READER -> WRITER_LOCKED [sqlite3PagerBegin] +** WRITER_LOCKED -> WRITER_CACHEMOD [pager_open_journal] +** WRITER_CACHEMOD -> WRITER_DBMOD [syncJournal] +** WRITER_DBMOD -> WRITER_FINISHED [sqlite3PagerCommitPhaseOne] +** WRITER_*** -> READER [pager_end_transaction] +** +** WRITER_*** -> ERROR [pager_error] +** ERROR -> OPEN [pager_unlock] +** +** +** OPEN: +** +** The pager starts up in this state. Nothing is guaranteed in this +** state - the file may or may not be locked and the database size is +** unknown. The database may not be read or written. +** +** * No read or write transaction is active. +** * Any lock, or no lock at all, may be held on the database file. +** * The dbSize, dbOrigSize and dbFileSize variables may not be trusted. +** +** READER: +** +** In this state all the requirements for reading the database in +** rollback (non-WAL) mode are met. Unless the pager is (or recently +** was) in exclusive-locking mode, a user-level read transaction is +** open. The database size is known in this state. +** +** A connection running with locking_mode=normal enters this state when +** it opens a read-transaction on the database and returns to state +** OPEN after the read-transaction is completed. However a connection +** running in locking_mode=exclusive (including temp databases) remains in +** this state even after the read-transaction is closed. The only way +** a locking_mode=exclusive connection can transition from READER to OPEN +** is via the ERROR state (see below). +** +** * A read transaction may be active (but a write-transaction cannot). +** * A SHARED or greater lock is held on the database file. +** * The dbSize variable may be trusted (even if a user-level read +** transaction is not active). The dbOrigSize and dbFileSize variables +** may not be trusted at this point. +** * If the database is a WAL database, then the WAL connection is open. +** * Even if a read-transaction is not open, it is guaranteed that +** there is no hot-journal in the file-system. +** +** WRITER_LOCKED: +** +** The pager moves to this state from READER when a write-transaction +** is first opened on the database. In WRITER_LOCKED state, all locks +** required to start a write-transaction are held, but no actual +** modifications to the cache or database have taken place. +** +** In rollback mode, a RESERVED or (if the transaction was opened with +** BEGIN EXCLUSIVE) EXCLUSIVE lock is obtained on the database file when +** moving to this state, but the journal file is not written to or opened +** to in this state. If the transaction is committed or rolled back while +** in WRITER_LOCKED state, all that is required is to unlock the database +** file. +** +** IN WAL mode, WalBeginWriteTransaction() is called to lock the log file. +** If the connection is running with locking_mode=exclusive, an attempt +** is made to obtain an EXCLUSIVE lock on the database file. +** +** * A write transaction is active. +** * If the connection is open in rollback-mode, a RESERVED or greater +** lock is held on the database file. +** * If the connection is open in WAL-mode, a WAL write transaction +** is open (i.e. sqlite3WalBeginWriteTransaction() has been successfully +** called). +** * The dbSize, dbOrigSize and dbFileSize variables are all valid. +** * The contents of the pager cache have not been modified. +** * The journal file may or may not be open. +** * Nothing (not even the first header) has been written to the journal. +** +** WRITER_CACHEMOD: +** +** A pager moves from WRITER_LOCKED state to this state when a page is +** first modified by the upper layer. In rollback mode the journal file +** is opened (if it is not already open) and a header written to the +** start of it. The database file on disk has not been modified. +** +** * A write transaction is active. +** * A RESERVED or greater lock is held on the database file. +** * The journal file is open and the first header has been written +** to it, but the header has not been synced to disk. +** * The contents of the page cache have been modified. +** +** WRITER_DBMOD: +** +** The pager transitions from WRITER_CACHEMOD into WRITER_DBMOD state +** when it modifies the contents of the database file. WAL connections +** never enter this state (since they do not modify the database file, +** just the log file). +** +** * A write transaction is active. +** * An EXCLUSIVE or greater lock is held on the database file. +** * The journal file is open and the first header has been written +** and synced to disk. +** * The contents of the page cache have been modified (and possibly +** written to disk). +** +** WRITER_FINISHED: +** +** It is not possible for a WAL connection to enter this state. +** +** A rollback-mode pager changes to WRITER_FINISHED state from WRITER_DBMOD +** state after the entire transaction has been successfully written into the +** database file. In this state the transaction may be committed simply +** by finalizing the journal file. Once in WRITER_FINISHED state, it is +** not possible to modify the database further. At this point, the upper +** layer must either commit or rollback the transaction. +** +** * A write transaction is active. +** * An EXCLUSIVE or greater lock is held on the database file. +** * All writing and syncing of journal and database data has finished. +** If no error occurred, all that remains is to finalize the journal to +** commit the transaction. If an error did occur, the caller will need +** to rollback the transaction. +** +** ERROR: +** +** The ERROR state is entered when an IO or disk-full error (including +** SQLITE_IOERR_NOMEM) occurs at a point in the code that makes it +** difficult to be sure that the in-memory pager state (cache contents, +** db size etc.) are consistent with the contents of the file-system. +** +** Temporary pager files may enter the ERROR state, but in-memory pagers +** cannot. +** +** For example, if an IO error occurs while performing a rollback, +** the contents of the page-cache may be left in an inconsistent state. +** At this point it would be dangerous to change back to READER state +** (as usually happens after a rollback). Any subsequent readers might +** report database corruption (due to the inconsistent cache), and if +** they upgrade to writers, they may inadvertently corrupt the database +** file. To avoid this hazard, the pager switches into the ERROR state +** instead of READER following such an error. +** +** Once it has entered the ERROR state, any attempt to use the pager +** to read or write data returns an error. Eventually, once all +** outstanding transactions have been abandoned, the pager is able to +** transition back to OPEN state, discarding the contents of the +** page-cache and any other in-memory state at the same time. Everything +** is reloaded from disk (and, if necessary, hot-journal rollback peformed) +** when a read-transaction is next opened on the pager (transitioning +** the pager into READER state). At that point the system has recovered +** from the error. +** +** Specifically, the pager jumps into the ERROR state if: +** +** 1. An error occurs while attempting a rollback. This happens in +** function sqlite3PagerRollback(). +** +** 2. An error occurs while attempting to finalize a journal file +** following a commit in function sqlite3PagerCommitPhaseTwo(). +** +** 3. An error occurs while attempting to write to the journal or +** database file in function pagerStress() in order to free up +** memory. +** +** In other cases, the error is returned to the b-tree layer. The b-tree +** layer then attempts a rollback operation. If the error condition +** persists, the pager enters the ERROR state via condition (1) above. +** +** Condition (3) is necessary because it can be triggered by a read-only +** statement executed within a transaction. In this case, if the error +** code were simply returned to the user, the b-tree layer would not +** automatically attempt a rollback, as it assumes that an error in a +** read-only statement cannot leave the pager in an internally inconsistent +** state. +** +** * The Pager.errCode variable is set to something other than SQLITE_OK. +** * There are one or more outstanding references to pages (after the +** last reference is dropped the pager should move back to OPEN state). +** * The pager is not an in-memory pager. +** +** +** Notes: +** +** * A pager is never in WRITER_DBMOD or WRITER_FINISHED state if the +** connection is open in WAL mode. A WAL connection is always in one +** of the first four states. +** +** * Normally, a connection open in exclusive mode is never in PAGER_OPEN +** state. There are two exceptions: immediately after exclusive-mode has +** been turned on (and before any read or write transactions are +** executed), and when the pager is leaving the "error state". +** +** * See also: assert_pager_state(). +*/ +#define PAGER_OPEN 0 +#define PAGER_READER 1 +#define PAGER_WRITER_LOCKED 2 +#define PAGER_WRITER_CACHEMOD 3 +#define PAGER_WRITER_DBMOD 4 +#define PAGER_WRITER_FINISHED 5 +#define PAGER_ERROR 6 + +/* +** The Pager.eLock variable is almost always set to one of the +** following locking-states, according to the lock currently held on +** the database file: NO_LOCK, SHARED_LOCK, RESERVED_LOCK or EXCLUSIVE_LOCK. +** This variable is kept up to date as locks are taken and released by +** the pagerLockDb() and pagerUnlockDb() wrappers. +** +** If the VFS xLock() or xUnlock() returns an error other than SQLITE_BUSY +** (i.e. one of the SQLITE_IOERR subtypes), it is not clear whether or not +** the operation was successful. In these circumstances pagerLockDb() and +** pagerUnlockDb() take a conservative approach - eLock is always updated +** when unlocking the file, and only updated when locking the file if the +** VFS call is successful. This way, the Pager.eLock variable may be set +** to a less exclusive (lower) value than the lock that is actually held +** at the system level, but it is never set to a more exclusive value. +** +** This is usually safe. If an xUnlock fails or appears to fail, there may +** be a few redundant xLock() calls or a lock may be held for longer than +** required, but nothing really goes wrong. +** +** The exception is when the database file is unlocked as the pager moves +** from ERROR to OPEN state. At this point there may be a hot-journal file +** in the file-system that needs to be rolled back (as part of an OPEN->SHARED +** transition, by the same pager or any other). If the call to xUnlock() +** fails at this point and the pager is left holding an EXCLUSIVE lock, this +** can confuse the call to xCheckReservedLock() call made later as part +** of hot-journal detection. +** +** xCheckReservedLock() is defined as returning true "if there is a RESERVED +** lock held by this process or any others". So xCheckReservedLock may +** return true because the caller itself is holding an EXCLUSIVE lock (but +** doesn't know it because of a previous error in xUnlock). If this happens +** a hot-journal may be mistaken for a journal being created by an active +** transaction in another process, causing SQLite to read from the database +** without rolling it back. +** +** To work around this, if a call to xUnlock() fails when unlocking the +** database in the ERROR state, Pager.eLock is set to UNKNOWN_LOCK. It +** is only changed back to a real locking state after a successful call +** to xLock(EXCLUSIVE). Also, the code to do the OPEN->SHARED state transition +** omits the check for a hot-journal if Pager.eLock is set to UNKNOWN_LOCK +** lock. Instead, it assumes a hot-journal exists and obtains an EXCLUSIVE +** lock on the database file before attempting to roll it back. See function +** PagerSharedLock() for more detail. +** +** Pager.eLock may only be set to UNKNOWN_LOCK when the pager is in +** PAGER_OPEN state. +*/ +#define UNKNOWN_LOCK (EXCLUSIVE_LOCK+1) + +/* +** A macro used for invoking the codec if there is one +*/ +#ifdef SQLITE_HAS_CODEC +# define CODEC1(P,D,N,X,E) \ + if( P->xCodec && P->xCodec(P->pCodec,D,N,X)==0 ){ E; } +# define CODEC2(P,D,N,X,E,O) \ + if( P->xCodec==0 ){ O=(char*)D; }else \ + if( (O=(char*)(P->xCodec(P->pCodec,D,N,X)))==0 ){ E; } +#else +# define CODEC1(P,D,N,X,E) /* NO-OP */ +# define CODEC2(P,D,N,X,E,O) O=(char*)D +#endif + +/* +** The maximum allowed sector size. 64KiB. If the xSectorsize() method +** returns a value larger than this, then MAX_SECTOR_SIZE is used instead. +** This could conceivably cause corruption following a power failure on +** such a system. This is currently an undocumented limit. +*/ +#define MAX_SECTOR_SIZE 0x10000 + +/* +** An instance of the following structure is allocated for each active +** savepoint and statement transaction in the system. All such structures +** are stored in the Pager.aSavepoint[] array, which is allocated and +** resized using sqlite3Realloc(). +** +** When a savepoint is created, the PagerSavepoint.iHdrOffset field is +** set to 0. If a journal-header is written into the main journal while +** the savepoint is active, then iHdrOffset is set to the byte offset +** immediately following the last journal record written into the main +** journal before the journal-header. This is required during savepoint +** rollback (see pagerPlaybackSavepoint()). +*/ +typedef struct PagerSavepoint PagerSavepoint; +struct PagerSavepoint { + i64 iOffset; /* Starting offset in main journal */ + i64 iHdrOffset; /* See above */ + Bitvec *pInSavepoint; /* Set of pages in this savepoint */ + Pgno nOrig; /* Original number of pages in file */ + Pgno iSubRec; /* Index of first record in sub-journal */ +#ifndef SQLITE_OMIT_WAL + u32 aWalData[WAL_SAVEPOINT_NDATA]; /* WAL savepoint context */ +#endif +}; + +/* +** Bits of the Pager.doNotSpill flag. See further description below. +*/ +#define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */ +#define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */ +#define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */ + +/* +** An open page cache is an instance of struct Pager. A description of +** some of the more important member variables follows: +** +** eState +** +** The current 'state' of the pager object. See the comment and state +** diagram above for a description of the pager state. +** +** eLock +** +** For a real on-disk database, the current lock held on the database file - +** NO_LOCK, SHARED_LOCK, RESERVED_LOCK or EXCLUSIVE_LOCK. +** +** For a temporary or in-memory database (neither of which require any +** locks), this variable is always set to EXCLUSIVE_LOCK. Since such +** databases always have Pager.exclusiveMode==1, this tricks the pager +** logic into thinking that it already has all the locks it will ever +** need (and no reason to release them). +** +** In some (obscure) circumstances, this variable may also be set to +** UNKNOWN_LOCK. See the comment above the #define of UNKNOWN_LOCK for +** details. +** +** changeCountDone +** +** This boolean variable is used to make sure that the change-counter +** (the 4-byte header field at byte offset 24 of the database file) is +** not updated more often than necessary. +** +** It is set to true when the change-counter field is updated, which +** can only happen if an exclusive lock is held on the database file. +** It is cleared (set to false) whenever an exclusive lock is +** relinquished on the database file. Each time a transaction is committed, +** The changeCountDone flag is inspected. If it is true, the work of +** updating the change-counter is omitted for the current transaction. +** +** This mechanism means that when running in exclusive mode, a connection +** need only update the change-counter once, for the first transaction +** committed. +** +** setMaster +** +** When PagerCommitPhaseOne() is called to commit a transaction, it may +** (or may not) specify a master-journal name to be written into the +** journal file before it is synced to disk. +** +** Whether or not a journal file contains a master-journal pointer affects +** the way in which the journal file is finalized after the transaction is +** committed or rolled back when running in "journal_mode=PERSIST" mode. +** If a journal file does not contain a master-journal pointer, it is +** finalized by overwriting the first journal header with zeroes. If +** it does contain a master-journal pointer the journal file is finalized +** by truncating it to zero bytes, just as if the connection were +** running in "journal_mode=truncate" mode. +** +** Journal files that contain master journal pointers cannot be finalized +** simply by overwriting the first journal-header with zeroes, as the +** master journal pointer could interfere with hot-journal rollback of any +** subsequently interrupted transaction that reuses the journal file. +** +** The flag is cleared as soon as the journal file is finalized (either +** by PagerCommitPhaseTwo or PagerRollback). If an IO error prevents the +** journal file from being successfully finalized, the setMaster flag +** is cleared anyway (and the pager will move to ERROR state). +** +** doNotSpill +** +** This variables control the behavior of cache-spills (calls made by +** the pcache module to the pagerStress() routine to write cached data +** to the file-system in order to free up memory). +** +** When bits SPILLFLAG_OFF or SPILLFLAG_ROLLBACK of doNotSpill are set, +** writing to the database from pagerStress() is disabled altogether. +** The SPILLFLAG_ROLLBACK case is done in a very obscure case that +** comes up during savepoint rollback that requires the pcache module +** to allocate a new page to prevent the journal file from being written +** while it is being traversed by code in pager_playback(). The SPILLFLAG_OFF +** case is a user preference. +** +** If the SPILLFLAG_NOSYNC bit is set, writing to the database from +** pagerStress() is permitted, but syncing the journal file is not. +** This flag is set by sqlite3PagerWrite() when the file-system sector-size +** is larger than the database page-size in order to prevent a journal sync +** from happening in between the journalling of two pages on the same sector. +** +** subjInMemory +** +** This is a boolean variable. If true, then any required sub-journal +** is opened as an in-memory journal file. If false, then in-memory +** sub-journals are only used for in-memory pager files. +** +** This variable is updated by the upper layer each time a new +** write-transaction is opened. +** +** dbSize, dbOrigSize, dbFileSize +** +** Variable dbSize is set to the number of pages in the database file. +** It is valid in PAGER_READER and higher states (all states except for +** OPEN and ERROR). +** +** dbSize is set based on the size of the database file, which may be +** larger than the size of the database (the value stored at offset +** 28 of the database header by the btree). If the size of the file +** is not an integer multiple of the page-size, the value stored in +** dbSize is rounded down (i.e. a 5KB file with 2K page-size has dbSize==2). +** Except, any file that is greater than 0 bytes in size is considered +** to have at least one page. (i.e. a 1KB file with 2K page-size leads +** to dbSize==1). +** +** During a write-transaction, if pages with page-numbers greater than +** dbSize are modified in the cache, dbSize is updated accordingly. +** Similarly, if the database is truncated using PagerTruncateImage(), +** dbSize is updated. +** +** Variables dbOrigSize and dbFileSize are valid in states +** PAGER_WRITER_LOCKED and higher. dbOrigSize is a copy of the dbSize +** variable at the start of the transaction. It is used during rollback, +** and to determine whether or not pages need to be journalled before +** being modified. +** +** Throughout a write-transaction, dbFileSize contains the size of +** the file on disk in pages. It is set to a copy of dbSize when the +** write-transaction is first opened, and updated when VFS calls are made +** to write or truncate the database file on disk. +** +** The only reason the dbFileSize variable is required is to suppress +** unnecessary calls to xTruncate() after committing a transaction. If, +** when a transaction is committed, the dbFileSize variable indicates +** that the database file is larger than the database image (Pager.dbSize), +** pager_truncate() is called. The pager_truncate() call uses xFilesize() +** to measure the database file on disk, and then truncates it if required. +** dbFileSize is not used when rolling back a transaction. In this case +** pager_truncate() is called unconditionally (which means there may be +** a call to xFilesize() that is not strictly required). In either case, +** pager_truncate() may cause the file to become smaller or larger. +** +** dbHintSize +** +** The dbHintSize variable is used to limit the number of calls made to +** the VFS xFileControl(FCNTL_SIZE_HINT) method. +** +** dbHintSize is set to a copy of the dbSize variable when a +** write-transaction is opened (at the same time as dbFileSize and +** dbOrigSize). If the xFileControl(FCNTL_SIZE_HINT) method is called, +** dbHintSize is increased to the number of pages that correspond to the +** size-hint passed to the method call. See pager_write_pagelist() for +** details. +** +** errCode +** +** The Pager.errCode variable is only ever used in PAGER_ERROR state. It +** is set to zero in all other states. In PAGER_ERROR state, Pager.errCode +** is always set to SQLITE_FULL, SQLITE_IOERR or one of the SQLITE_IOERR_XXX +** sub-codes. +*/ +struct Pager { + sqlite3_vfs *pVfs; /* OS functions to use for IO */ + u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */ + u8 journalMode; /* One of the PAGER_JOURNALMODE_* values */ + u8 useJournal; /* Use a rollback journal on this file */ + u8 noSync; /* Do not sync the journal if true */ + u8 fullSync; /* Do extra syncs of the journal for robustness */ + u8 ckptSyncFlags; /* SYNC_NORMAL or SYNC_FULL for checkpoint */ + u8 walSyncFlags; /* SYNC_NORMAL or SYNC_FULL for wal writes */ + u8 syncFlags; /* SYNC_NORMAL or SYNC_FULL otherwise */ + u8 tempFile; /* zFilename is a temporary or immutable file */ + u8 noLock; /* Do not lock (except in WAL mode) */ + u8 readOnly; /* True for a read-only database */ + u8 memDb; /* True to inhibit all file I/O */ + + /************************************************************************** + ** The following block contains those class members that change during + ** routine operation. Class members not in this block are either fixed + ** when the pager is first created or else only change when there is a + ** significant mode change (such as changing the page_size, locking_mode, + ** or the journal_mode). From another view, these class members describe + ** the "state" of the pager, while other class members describe the + ** "configuration" of the pager. + */ + u8 eState; /* Pager state (OPEN, READER, WRITER_LOCKED..) */ + u8 eLock; /* Current lock held on database file */ + u8 changeCountDone; /* Set after incrementing the change-counter */ + u8 setMaster; /* True if a m-j name has been written to jrnl */ + u8 doNotSpill; /* Do not spill the cache when non-zero */ + u8 subjInMemory; /* True to use in-memory sub-journals */ + u8 bUseFetch; /* True to use xFetch() */ + u8 hasHeldSharedLock; /* True if a shared lock has ever been held */ + Pgno dbSize; /* Number of pages in the database */ + Pgno dbOrigSize; /* dbSize before the current transaction */ + Pgno dbFileSize; /* Number of pages in the database file */ + Pgno dbHintSize; /* Value passed to FCNTL_SIZE_HINT call */ + int errCode; /* One of several kinds of errors */ + int nRec; /* Pages journalled since last j-header written */ + u32 cksumInit; /* Quasi-random value added to every checksum */ + u32 nSubRec; /* Number of records written to sub-journal */ + Bitvec *pInJournal; /* One bit for each page in the database file */ + sqlite3_file *fd; /* File descriptor for database */ + sqlite3_file *jfd; /* File descriptor for main journal */ + sqlite3_file *sjfd; /* File descriptor for sub-journal */ + i64 journalOff; /* Current write offset in the journal file */ + i64 journalHdr; /* Byte offset to previous journal header */ + sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ + PagerSavepoint *aSavepoint; /* Array of active savepoints */ + int nSavepoint; /* Number of elements in aSavepoint[] */ + u32 iDataVersion; /* Changes whenever database content changes */ + char dbFileVers[16]; /* Changes whenever database file changes */ + + int nMmapOut; /* Number of mmap pages currently outstanding */ + sqlite3_int64 szMmap; /* Desired maximum mmap size */ + PgHdr *pMmapFreelist; /* List of free mmap page headers (pDirty) */ + /* + ** End of the routinely-changing class members + ***************************************************************************/ + + u16 nExtra; /* Add this many bytes to each in-memory page */ + i16 nReserve; /* Number of unused bytes at end of each page */ + u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */ + u32 sectorSize; /* Assumed sector size during rollback */ + int pageSize; /* Number of bytes in a page */ + Pgno mxPgno; /* Maximum allowed size of the database */ + i64 journalSizeLimit; /* Size limit for persistent journal files */ + char *zFilename; /* Name of the database file */ + char *zJournal; /* Name of the journal file */ + int (*xBusyHandler)(void*); /* Function to call when busy */ + void *pBusyHandlerArg; /* Context argument for xBusyHandler */ + int aStat[3]; /* Total cache hits, misses and writes */ +#ifdef SQLITE_TEST + int nRead; /* Database pages read */ +#endif + void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */ +#ifdef SQLITE_HAS_CODEC + void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */ + void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */ + void (*xCodecFree)(void*); /* Destructor for the codec */ + void *pCodec; /* First argument to xCodec... methods */ +#endif + char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ + PCache *pPCache; /* Pointer to page cache object */ +#ifndef SQLITE_OMIT_WAL + Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */ + char *zWal; /* File name for write-ahead log */ +#endif +}; + +/* +** Indexes for use with Pager.aStat[]. The Pager.aStat[] array contains +** the values accessed by passing SQLITE_DBSTATUS_CACHE_HIT, CACHE_MISS +** or CACHE_WRITE to sqlite3_db_status(). +*/ +#define PAGER_STAT_HIT 0 +#define PAGER_STAT_MISS 1 +#define PAGER_STAT_WRITE 2 + +/* +** The following global variables hold counters used for +** testing purposes only. These variables do not exist in +** a non-testing build. These variables are not thread-safe. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */ +SQLITE_PRIVATE int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */ +SQLITE_PRIVATE int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */ +# define PAGER_INCR(v) v++ +#else +# define PAGER_INCR(v) +#endif + + + +/* +** Journal files begin with the following magic string. The data +** was obtained from /dev/random. It is used only as a sanity check. +** +** Since version 2.8.0, the journal format contains additional sanity +** checking information. If the power fails while the journal is being +** written, semi-random garbage data might appear in the journal +** file after power is restored. If an attempt is then made +** to roll the journal back, the database could be corrupted. The additional +** sanity checking data is an attempt to discover the garbage in the +** journal and ignore it. +** +** The sanity checking information for the new journal format consists +** of a 32-bit checksum on each page of data. The checksum covers both +** the page number and the pPager->pageSize bytes of data for the page. +** This cksum is initialized to a 32-bit random value that appears in the +** journal file right after the header. The random initializer is important, +** because garbage data that appears at the end of a journal is likely +** data that was once in other files that have now been deleted. If the +** garbage data came from an obsolete journal file, the checksums might +** be correct. But by initializing the checksum to random value which +** is different for every journal, we minimize that risk. +*/ +static const unsigned char aJournalMagic[] = { + 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7, +}; + +/* +** The size of the of each page record in the journal is given by +** the following macro. +*/ +#define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8) + +/* +** The journal header size for this pager. This is usually the same +** size as a single disk sector. See also setSectorSize(). +*/ +#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize) + +/* +** The macro USEFETCH is true if we are allowed to use the xFetch and xUnfetch +** interfaces to access the database using memory-mapped I/O. +*/ +#if SQLITE_MAX_MMAP_SIZE>0 +# define USEFETCH(x) ((x)->bUseFetch) +#else +# define USEFETCH(x) 0 +#endif + +/* +** The maximum legal page number is (2^31 - 1). +*/ +#define PAGER_MAX_PGNO 2147483647 + +/* +** The argument to this macro is a file descriptor (type sqlite3_file*). +** Return 0 if it is not open, or non-zero (but not 1) if it is. +** +** This is so that expressions can be written as: +** +** if( isOpen(pPager->jfd) ){ ... +** +** instead of +** +** if( pPager->jfd->pMethods ){ ... +*/ +#define isOpen(pFd) ((pFd)->pMethods!=0) + +/* +** Return true if this pager uses a write-ahead log instead of the usual +** rollback journal. Otherwise false. +*/ +#ifndef SQLITE_OMIT_WAL +static int pagerUseWal(Pager *pPager){ + return (pPager->pWal!=0); +} +#else +# define pagerUseWal(x) 0 +# define pagerRollbackWal(x) 0 +# define pagerWalFrames(v,w,x,y) 0 +# define pagerOpenWalIfPresent(z) SQLITE_OK +# define pagerBeginReadTransaction(z) SQLITE_OK +#endif + +#ifndef NDEBUG +/* +** Usage: +** +** assert( assert_pager_state(pPager) ); +** +** This function runs many asserts to try to find inconsistencies in +** the internal state of the Pager object. +*/ +static int assert_pager_state(Pager *p){ + Pager *pPager = p; + + /* State must be valid. */ + assert( p->eState==PAGER_OPEN + || p->eState==PAGER_READER + || p->eState==PAGER_WRITER_LOCKED + || p->eState==PAGER_WRITER_CACHEMOD + || p->eState==PAGER_WRITER_DBMOD + || p->eState==PAGER_WRITER_FINISHED + || p->eState==PAGER_ERROR + ); + + /* Regardless of the current state, a temp-file connection always behaves + ** as if it has an exclusive lock on the database file. It never updates + ** the change-counter field, so the changeCountDone flag is always set. + */ + assert( p->tempFile==0 || p->eLock==EXCLUSIVE_LOCK ); + assert( p->tempFile==0 || pPager->changeCountDone ); + + /* If the useJournal flag is clear, the journal-mode must be "OFF". + ** And if the journal-mode is "OFF", the journal file must not be open. + */ + assert( p->journalMode==PAGER_JOURNALMODE_OFF || p->useJournal ); + assert( p->journalMode!=PAGER_JOURNALMODE_OFF || !isOpen(p->jfd) ); + + /* If changeCountDone is set, a RESERVED lock or greater must be held + ** on the file. + */ + assert( pPager->changeCountDone==0 || pPager->eLock>=RESERVED_LOCK ); + assert( p->eLock!=PENDING_LOCK ); + + switch( p->eState ){ + case PAGER_OPEN: + assert( pPager->errCode==SQLITE_OK ); + assert( sqlite3PcacheRefCount(pPager->pPCache)==0 || pPager->tempFile ); + break; + + case PAGER_READER: + assert( pPager->errCode==SQLITE_OK ); + assert( p->eLock!=UNKNOWN_LOCK ); + assert( p->eLock>=SHARED_LOCK ); + break; + + case PAGER_WRITER_LOCKED: + assert( p->eLock!=UNKNOWN_LOCK ); + assert( pPager->errCode==SQLITE_OK ); + if( !pagerUseWal(pPager) ){ + assert( p->eLock>=RESERVED_LOCK ); + } + assert( pPager->dbSize==pPager->dbOrigSize ); + assert( pPager->dbOrigSize==pPager->dbFileSize ); + assert( pPager->dbOrigSize==pPager->dbHintSize ); + assert( pPager->setMaster==0 ); + break; + + case PAGER_WRITER_CACHEMOD: + assert( p->eLock!=UNKNOWN_LOCK ); + assert( pPager->errCode==SQLITE_OK ); + if( !pagerUseWal(pPager) ){ + /* It is possible that if journal_mode=wal here that neither the + ** journal file nor the WAL file are open. This happens during + ** a rollback transaction that switches from journal_mode=off + ** to journal_mode=wal. + */ + assert( p->eLock>=RESERVED_LOCK ); + assert( isOpen(p->jfd) + || p->journalMode==PAGER_JOURNALMODE_OFF + || p->journalMode==PAGER_JOURNALMODE_WAL + ); + } + assert( pPager->dbOrigSize==pPager->dbFileSize ); + assert( pPager->dbOrigSize==pPager->dbHintSize ); + break; + + case PAGER_WRITER_DBMOD: + assert( p->eLock==EXCLUSIVE_LOCK ); + assert( pPager->errCode==SQLITE_OK ); + assert( !pagerUseWal(pPager) ); + assert( p->eLock>=EXCLUSIVE_LOCK ); + assert( isOpen(p->jfd) + || p->journalMode==PAGER_JOURNALMODE_OFF + || p->journalMode==PAGER_JOURNALMODE_WAL + ); + assert( pPager->dbOrigSize<=pPager->dbHintSize ); + break; + + case PAGER_WRITER_FINISHED: + assert( p->eLock==EXCLUSIVE_LOCK ); + assert( pPager->errCode==SQLITE_OK ); + assert( !pagerUseWal(pPager) ); + assert( isOpen(p->jfd) + || p->journalMode==PAGER_JOURNALMODE_OFF + || p->journalMode==PAGER_JOURNALMODE_WAL + ); + break; + + case PAGER_ERROR: + /* There must be at least one outstanding reference to the pager if + ** in ERROR state. Otherwise the pager should have already dropped + ** back to OPEN state. + */ + assert( pPager->errCode!=SQLITE_OK ); + assert( sqlite3PcacheRefCount(pPager->pPCache)>0 ); + break; + } + + return 1; +} +#endif /* ifndef NDEBUG */ + +#ifdef SQLITE_DEBUG +/* +** Return a pointer to a human readable string in a static buffer +** containing the state of the Pager object passed as an argument. This +** is intended to be used within debuggers. For example, as an alternative +** to "print *pPager" in gdb: +** +** (gdb) printf "%s", print_pager_state(pPager) +*/ +static char *print_pager_state(Pager *p){ + static char zRet[1024]; + + sqlite3_snprintf(1024, zRet, + "Filename: %s\n" + "State: %s errCode=%d\n" + "Lock: %s\n" + "Locking mode: locking_mode=%s\n" + "Journal mode: journal_mode=%s\n" + "Backing store: tempFile=%d memDb=%d useJournal=%d\n" + "Journal: journalOff=%lld journalHdr=%lld\n" + "Size: dbsize=%d dbOrigSize=%d dbFileSize=%d\n" + , p->zFilename + , p->eState==PAGER_OPEN ? "OPEN" : + p->eState==PAGER_READER ? "READER" : + p->eState==PAGER_WRITER_LOCKED ? "WRITER_LOCKED" : + p->eState==PAGER_WRITER_CACHEMOD ? "WRITER_CACHEMOD" : + p->eState==PAGER_WRITER_DBMOD ? "WRITER_DBMOD" : + p->eState==PAGER_WRITER_FINISHED ? "WRITER_FINISHED" : + p->eState==PAGER_ERROR ? "ERROR" : "?error?" + , (int)p->errCode + , p->eLock==NO_LOCK ? "NO_LOCK" : + p->eLock==RESERVED_LOCK ? "RESERVED" : + p->eLock==EXCLUSIVE_LOCK ? "EXCLUSIVE" : + p->eLock==SHARED_LOCK ? "SHARED" : + p->eLock==UNKNOWN_LOCK ? "UNKNOWN" : "?error?" + , p->exclusiveMode ? "exclusive" : "normal" + , p->journalMode==PAGER_JOURNALMODE_MEMORY ? "memory" : + p->journalMode==PAGER_JOURNALMODE_OFF ? "off" : + p->journalMode==PAGER_JOURNALMODE_DELETE ? "delete" : + p->journalMode==PAGER_JOURNALMODE_PERSIST ? "persist" : + p->journalMode==PAGER_JOURNALMODE_TRUNCATE ? "truncate" : + p->journalMode==PAGER_JOURNALMODE_WAL ? "wal" : "?error?" + , (int)p->tempFile, (int)p->memDb, (int)p->useJournal + , p->journalOff, p->journalHdr + , (int)p->dbSize, (int)p->dbOrigSize, (int)p->dbFileSize + ); + + return zRet; +} +#endif + +/* +** Return true if it is necessary to write page *pPg into the sub-journal. +** A page needs to be written into the sub-journal if there exists one +** or more open savepoints for which: +** +** * The page-number is less than or equal to PagerSavepoint.nOrig, and +** * The bit corresponding to the page-number is not set in +** PagerSavepoint.pInSavepoint. +*/ +static int subjRequiresPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + PagerSavepoint *p; + Pgno pgno = pPg->pgno; + int i; + for(i=0; i<pPager->nSavepoint; i++){ + p = &pPager->aSavepoint[i]; + if( p->nOrig>=pgno && 0==sqlite3BitvecTestNotNull(p->pInSavepoint, pgno) ){ + return 1; + } + } + return 0; +} + +#ifdef SQLITE_DEBUG +/* +** Return true if the page is already in the journal file. +*/ +static int pageInJournal(Pager *pPager, PgHdr *pPg){ + return sqlite3BitvecTest(pPager->pInJournal, pPg->pgno); +} +#endif + +/* +** Read a 32-bit integer from the given file descriptor. Store the integer +** that is read in *pRes. Return SQLITE_OK if everything worked, or an +** error code is something goes wrong. +** +** All values are stored on disk as big-endian. +*/ +static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){ + unsigned char ac[4]; + int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset); + if( rc==SQLITE_OK ){ + *pRes = sqlite3Get4byte(ac); + } + return rc; +} + +/* +** Write a 32-bit integer into a string buffer in big-endian byte order. +*/ +#define put32bits(A,B) sqlite3Put4byte((u8*)A,B) + + +/* +** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK +** on success or an error code is something goes wrong. +*/ +static int write32bits(sqlite3_file *fd, i64 offset, u32 val){ + char ac[4]; + put32bits(ac, val); + return sqlite3OsWrite(fd, ac, 4, offset); +} + +/* +** Unlock the database file to level eLock, which must be either NO_LOCK +** or SHARED_LOCK. Regardless of whether or not the call to xUnlock() +** succeeds, set the Pager.eLock variable to match the (attempted) new lock. +** +** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is +** called, do not modify it. See the comment above the #define of +** UNKNOWN_LOCK for an explanation of this. +*/ +static int pagerUnlockDb(Pager *pPager, int eLock){ + int rc = SQLITE_OK; + + assert( !pPager->exclusiveMode || pPager->eLock==eLock ); + assert( eLock==NO_LOCK || eLock==SHARED_LOCK ); + assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 ); + if( isOpen(pPager->fd) ){ + assert( pPager->eLock>=eLock ); + rc = pPager->noLock ? SQLITE_OK : sqlite3OsUnlock(pPager->fd, eLock); + if( pPager->eLock!=UNKNOWN_LOCK ){ + pPager->eLock = (u8)eLock; + } + IOTRACE(("UNLOCK %p %d\n", pPager, eLock)) + } + return rc; +} + +/* +** Lock the database file to level eLock, which must be either SHARED_LOCK, +** RESERVED_LOCK or EXCLUSIVE_LOCK. If the caller is successful, set the +** Pager.eLock variable to the new locking state. +** +** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is +** called, do not modify it unless the new locking state is EXCLUSIVE_LOCK. +** See the comment above the #define of UNKNOWN_LOCK for an explanation +** of this. +*/ +static int pagerLockDb(Pager *pPager, int eLock){ + int rc = SQLITE_OK; + + assert( eLock==SHARED_LOCK || eLock==RESERVED_LOCK || eLock==EXCLUSIVE_LOCK ); + if( pPager->eLock<eLock || pPager->eLock==UNKNOWN_LOCK ){ + rc = pPager->noLock ? SQLITE_OK : sqlite3OsLock(pPager->fd, eLock); + if( rc==SQLITE_OK && (pPager->eLock!=UNKNOWN_LOCK||eLock==EXCLUSIVE_LOCK) ){ + pPager->eLock = (u8)eLock; + IOTRACE(("LOCK %p %d\n", pPager, eLock)) + } + } + return rc; +} + +/* +** This function determines whether or not the atomic-write optimization +** can be used with this pager. The optimization can be used if: +** +** (a) the value returned by OsDeviceCharacteristics() indicates that +** a database page may be written atomically, and +** (b) the value returned by OsSectorSize() is less than or equal +** to the page size. +** +** The optimization is also always enabled for temporary files. It is +** an error to call this function if pPager is opened on an in-memory +** database. +** +** If the optimization cannot be used, 0 is returned. If it can be used, +** then the value returned is the size of the journal file when it +** contains rollback data for exactly one page. +*/ +#ifdef SQLITE_ENABLE_ATOMIC_WRITE +static int jrnlBufferSize(Pager *pPager){ + if( !pPager->tempFile ){ + int dc; /* Device characteristics */ + int nSector; /* Sector size */ + int szPage; /* Page size */ + + assert( isOpen(pPager->fd) ); + dc = sqlite3OsDeviceCharacteristics(pPager->fd); + nSector = pPager->sectorSize; + szPage = pPager->pageSize; + + assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); + assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); + if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){ + return 0; + } + } + + return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager); +} +#endif + +/* +** If SQLITE_CHECK_PAGES is defined then we do some sanity checking +** on the cache using a hash function. This is used for testing +** and debugging only. +*/ +#ifdef SQLITE_CHECK_PAGES +/* +** Return a 32-bit hash of the page data for pPage. +*/ +static u32 pager_datahash(int nByte, unsigned char *pData){ + u32 hash = 0; + int i; + for(i=0; i<nByte; i++){ + hash = (hash*1039) + pData[i]; + } + return hash; +} +static u32 pager_pagehash(PgHdr *pPage){ + return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData); +} +static void pager_set_pagehash(PgHdr *pPage){ + pPage->pageHash = pager_pagehash(pPage); +} + +/* +** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES +** is defined, and NDEBUG is not defined, an assert() statement checks +** that the page is either dirty or still matches the calculated page-hash. +*/ +#define CHECK_PAGE(x) checkPage(x) +static void checkPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + assert( pPager->eState!=PAGER_ERROR ); + assert( (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) ); +} + +#else +#define pager_datahash(X,Y) 0 +#define pager_pagehash(X) 0 +#define pager_set_pagehash(X) +#define CHECK_PAGE(x) +#endif /* SQLITE_CHECK_PAGES */ + +/* +** When this is called the journal file for pager pPager must be open. +** This function attempts to read a master journal file name from the +** end of the file and, if successful, copies it into memory supplied +** by the caller. See comments above writeMasterJournal() for the format +** used to store a master journal file name at the end of a journal file. +** +** zMaster must point to a buffer of at least nMaster bytes allocated by +** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is +** enough space to write the master journal name). If the master journal +** name in the journal is longer than nMaster bytes (including a +** nul-terminator), then this is handled as if no master journal name +** were present in the journal. +** +** If a master journal file name is present at the end of the journal +** file, then it is copied into the buffer pointed to by zMaster. A +** nul-terminator byte is appended to the buffer following the master +** journal file name. +** +** If it is determined that no master journal file name is present +** zMaster[0] is set to 0 and SQLITE_OK returned. +** +** If an error occurs while reading from the journal file, an SQLite +** error code is returned. +*/ +static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, u32 nMaster){ + int rc; /* Return code */ + u32 len; /* Length in bytes of master journal name */ + i64 szJ; /* Total size in bytes of journal file pJrnl */ + u32 cksum; /* MJ checksum value read from journal */ + u32 u; /* Unsigned loop counter */ + unsigned char aMagic[8]; /* A buffer to hold the magic header */ + zMaster[0] = '\0'; + + if( SQLITE_OK!=(rc = sqlite3OsFileSize(pJrnl, &szJ)) + || szJ<16 + || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-16, &len)) + || len>=nMaster + || len==0 + || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-12, &cksum)) + || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8)) + || memcmp(aMagic, aJournalMagic, 8) + || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len)) + ){ + return rc; + } + + /* See if the checksum matches the master journal name */ + for(u=0; u<len; u++){ + cksum -= zMaster[u]; + } + if( cksum ){ + /* If the checksum doesn't add up, then one or more of the disk sectors + ** containing the master journal filename is corrupted. This means + ** definitely roll back, so just return SQLITE_OK and report a (nul) + ** master-journal filename. + */ + len = 0; + } + zMaster[len] = '\0'; + + return SQLITE_OK; +} + +/* +** Return the offset of the sector boundary at or immediately +** following the value in pPager->journalOff, assuming a sector +** size of pPager->sectorSize bytes. +** +** i.e for a sector size of 512: +** +** Pager.journalOff Return value +** --------------------------------------- +** 0 0 +** 512 512 +** 100 512 +** 2000 2048 +** +*/ +static i64 journalHdrOffset(Pager *pPager){ + i64 offset = 0; + i64 c = pPager->journalOff; + if( c ){ + offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager); + } + assert( offset%JOURNAL_HDR_SZ(pPager)==0 ); + assert( offset>=c ); + assert( (offset-c)<JOURNAL_HDR_SZ(pPager) ); + return offset; +} + +/* +** The journal file must be open when this function is called. +** +** This function is a no-op if the journal file has not been written to +** within the current transaction (i.e. if Pager.journalOff==0). +** +** If doTruncate is non-zero or the Pager.journalSizeLimit variable is +** set to 0, then truncate the journal file to zero bytes in size. Otherwise, +** zero the 28-byte header at the start of the journal file. In either case, +** if the pager is not in no-sync mode, sync the journal file immediately +** after writing or truncating it. +** +** If Pager.journalSizeLimit is set to a positive, non-zero value, and +** following the truncation or zeroing described above the size of the +** journal file in bytes is larger than this value, then truncate the +** journal file to Pager.journalSizeLimit bytes. The journal file does +** not need to be synced following this operation. +** +** If an IO error occurs, abandon processing and return the IO error code. +** Otherwise, return SQLITE_OK. +*/ +static int zeroJournalHdr(Pager *pPager, int doTruncate){ + int rc = SQLITE_OK; /* Return code */ + assert( isOpen(pPager->jfd) ); + if( pPager->journalOff ){ + const i64 iLimit = pPager->journalSizeLimit; /* Local cache of jsl */ + + IOTRACE(("JZEROHDR %p\n", pPager)) + if( doTruncate || iLimit==0 ){ + rc = sqlite3OsTruncate(pPager->jfd, 0); + }else{ + static const char zeroHdr[28] = {0}; + rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0); + } + if( rc==SQLITE_OK && !pPager->noSync ){ + rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->syncFlags); + } + + /* At this point the transaction is committed but the write lock + ** is still held on the file. If there is a size limit configured for + ** the persistent journal and the journal file currently consumes more + ** space than that limit allows for, truncate it now. There is no need + ** to sync the file following this operation. + */ + if( rc==SQLITE_OK && iLimit>0 ){ + i64 sz; + rc = sqlite3OsFileSize(pPager->jfd, &sz); + if( rc==SQLITE_OK && sz>iLimit ){ + rc = sqlite3OsTruncate(pPager->jfd, iLimit); + } + } + } + return rc; +} + +/* +** The journal file must be open when this routine is called. A journal +** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the +** current location. +** +** The format for the journal header is as follows: +** - 8 bytes: Magic identifying journal format. +** - 4 bytes: Number of records in journal, or -1 no-sync mode is on. +** - 4 bytes: Random number used for page hash. +** - 4 bytes: Initial database page count. +** - 4 bytes: Sector size used by the process that wrote this journal. +** - 4 bytes: Database page size. +** +** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space. +*/ +static int writeJournalHdr(Pager *pPager){ + int rc = SQLITE_OK; /* Return code */ + char *zHeader = pPager->pTmpSpace; /* Temporary space used to build header */ + u32 nHeader = (u32)pPager->pageSize;/* Size of buffer pointed to by zHeader */ + u32 nWrite; /* Bytes of header sector written */ + int ii; /* Loop counter */ + + assert( isOpen(pPager->jfd) ); /* Journal file must be open. */ + + if( nHeader>JOURNAL_HDR_SZ(pPager) ){ + nHeader = JOURNAL_HDR_SZ(pPager); + } + + /* If there are active savepoints and any of them were created + ** since the most recent journal header was written, update the + ** PagerSavepoint.iHdrOffset fields now. + */ + for(ii=0; ii<pPager->nSavepoint; ii++){ + if( pPager->aSavepoint[ii].iHdrOffset==0 ){ + pPager->aSavepoint[ii].iHdrOffset = pPager->journalOff; + } + } + + pPager->journalHdr = pPager->journalOff = journalHdrOffset(pPager); + + /* + ** Write the nRec Field - the number of page records that follow this + ** journal header. Normally, zero is written to this value at this time. + ** After the records are added to the journal (and the journal synced, + ** if in full-sync mode), the zero is overwritten with the true number + ** of records (see syncJournal()). + ** + ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When + ** reading the journal this value tells SQLite to assume that the + ** rest of the journal file contains valid page records. This assumption + ** is dangerous, as if a failure occurred whilst writing to the journal + ** file it may contain some garbage data. There are two scenarios + ** where this risk can be ignored: + ** + ** * When the pager is in no-sync mode. Corruption can follow a + ** power failure in this case anyway. + ** + ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees + ** that garbage data is never appended to the journal file. + */ + assert( isOpen(pPager->fd) || pPager->noSync ); + if( pPager->noSync || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY) + || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) + ){ + memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); + put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff); + }else{ + memset(zHeader, 0, sizeof(aJournalMagic)+4); + } + + /* The random check-hash initializer */ + sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit); + put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit); + /* The initial database size */ + put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize); + /* The assumed sector size for this process */ + put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize); + + /* The page size */ + put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize); + + /* Initializing the tail of the buffer is not necessary. Everything + ** works find if the following memset() is omitted. But initializing + ** the memory prevents valgrind from complaining, so we are willing to + ** take the performance hit. + */ + memset(&zHeader[sizeof(aJournalMagic)+20], 0, + nHeader-(sizeof(aJournalMagic)+20)); + + /* In theory, it is only necessary to write the 28 bytes that the + ** journal header consumes to the journal file here. Then increment the + ** Pager.journalOff variable by JOURNAL_HDR_SZ so that the next + ** record is written to the following sector (leaving a gap in the file + ** that will be implicitly filled in by the OS). + ** + ** However it has been discovered that on some systems this pattern can + ** be significantly slower than contiguously writing data to the file, + ** even if that means explicitly writing data to the block of + ** (JOURNAL_HDR_SZ - 28) bytes that will not be used. So that is what + ** is done. + ** + ** The loop is required here in case the sector-size is larger than the + ** database page size. Since the zHeader buffer is only Pager.pageSize + ** bytes in size, more than one call to sqlite3OsWrite() may be required + ** to populate the entire journal header sector. + */ + for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){ + IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader)) + rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff); + assert( pPager->journalHdr <= pPager->journalOff ); + pPager->journalOff += nHeader; + } + + return rc; +} + +/* +** The journal file must be open when this is called. A journal header file +** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal +** file. The current location in the journal file is given by +** pPager->journalOff. See comments above function writeJournalHdr() for +** a description of the journal header format. +** +** If the header is read successfully, *pNRec is set to the number of +** page records following this header and *pDbSize is set to the size of the +** database before the transaction began, in pages. Also, pPager->cksumInit +** is set to the value read from the journal header. SQLITE_OK is returned +** in this case. +** +** If the journal header file appears to be corrupted, SQLITE_DONE is +** returned and *pNRec and *PDbSize are undefined. If JOURNAL_HDR_SZ bytes +** cannot be read from the journal file an error code is returned. +*/ +static int readJournalHdr( + Pager *pPager, /* Pager object */ + int isHot, + i64 journalSize, /* Size of the open journal file in bytes */ + u32 *pNRec, /* OUT: Value read from the nRec field */ + u32 *pDbSize /* OUT: Value of original database size field */ +){ + int rc; /* Return code */ + unsigned char aMagic[8]; /* A buffer to hold the magic header */ + i64 iHdrOff; /* Offset of journal header being read */ + + assert( isOpen(pPager->jfd) ); /* Journal file must be open. */ + + /* Advance Pager.journalOff to the start of the next sector. If the + ** journal file is too small for there to be a header stored at this + ** point, return SQLITE_DONE. + */ + pPager->journalOff = journalHdrOffset(pPager); + if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){ + return SQLITE_DONE; + } + iHdrOff = pPager->journalOff; + + /* Read in the first 8 bytes of the journal header. If they do not match + ** the magic string found at the start of each journal header, return + ** SQLITE_DONE. If an IO error occurs, return an error code. Otherwise, + ** proceed. + */ + if( isHot || iHdrOff!=pPager->journalHdr ){ + rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), iHdrOff); + if( rc ){ + return rc; + } + if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){ + return SQLITE_DONE; + } + } + + /* Read the first three 32-bit fields of the journal header: The nRec + ** field, the checksum-initializer and the database size at the start + ** of the transaction. Return an error code if anything goes wrong. + */ + if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+8, pNRec)) + || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+12, &pPager->cksumInit)) + || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+16, pDbSize)) + ){ + return rc; + } + + if( pPager->journalOff==0 ){ + u32 iPageSize; /* Page-size field of journal header */ + u32 iSectorSize; /* Sector-size field of journal header */ + + /* Read the page-size and sector-size journal header fields. */ + if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+20, &iSectorSize)) + || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+24, &iPageSize)) + ){ + return rc; + } + + /* Versions of SQLite prior to 3.5.8 set the page-size field of the + ** journal header to zero. In this case, assume that the Pager.pageSize + ** variable is already set to the correct page size. + */ + if( iPageSize==0 ){ + iPageSize = pPager->pageSize; + } + + /* Check that the values read from the page-size and sector-size fields + ** are within range. To be 'in range', both values need to be a power + ** of two greater than or equal to 512 or 32, and not greater than their + ** respective compile time maximum limits. + */ + if( iPageSize<512 || iSectorSize<32 + || iPageSize>SQLITE_MAX_PAGE_SIZE || iSectorSize>MAX_SECTOR_SIZE + || ((iPageSize-1)&iPageSize)!=0 || ((iSectorSize-1)&iSectorSize)!=0 + ){ + /* If the either the page-size or sector-size in the journal-header is + ** invalid, then the process that wrote the journal-header must have + ** crashed before the header was synced. In this case stop reading + ** the journal file here. + */ + return SQLITE_DONE; + } + + /* Update the page-size to match the value read from the journal. + ** Use a testcase() macro to make sure that malloc failure within + ** PagerSetPagesize() is tested. + */ + rc = sqlite3PagerSetPagesize(pPager, &iPageSize, -1); + testcase( rc!=SQLITE_OK ); + + /* Update the assumed sector-size to match the value used by + ** the process that created this journal. If this journal was + ** created by a process other than this one, then this routine + ** is being called from within pager_playback(). The local value + ** of Pager.sectorSize is restored at the end of that routine. + */ + pPager->sectorSize = iSectorSize; + } + + pPager->journalOff += JOURNAL_HDR_SZ(pPager); + return rc; +} + + +/* +** Write the supplied master journal name into the journal file for pager +** pPager at the current location. The master journal name must be the last +** thing written to a journal file. If the pager is in full-sync mode, the +** journal file descriptor is advanced to the next sector boundary before +** anything is written. The format is: +** +** + 4 bytes: PAGER_MJ_PGNO. +** + N bytes: Master journal filename in utf-8. +** + 4 bytes: N (length of master journal name in bytes, no nul-terminator). +** + 4 bytes: Master journal name checksum. +** + 8 bytes: aJournalMagic[]. +** +** The master journal page checksum is the sum of the bytes in the master +** journal name, where each byte is interpreted as a signed 8-bit integer. +** +** If zMaster is a NULL pointer (occurs for a single database transaction), +** this call is a no-op. +*/ +static int writeMasterJournal(Pager *pPager, const char *zMaster){ + int rc; /* Return code */ + int nMaster; /* Length of string zMaster */ + i64 iHdrOff; /* Offset of header in journal file */ + i64 jrnlSize; /* Size of journal file on disk */ + u32 cksum = 0; /* Checksum of string zMaster */ + + assert( pPager->setMaster==0 ); + assert( !pagerUseWal(pPager) ); + + if( !zMaster + || pPager->journalMode==PAGER_JOURNALMODE_MEMORY + || !isOpen(pPager->jfd) + ){ + return SQLITE_OK; + } + pPager->setMaster = 1; + assert( pPager->journalHdr <= pPager->journalOff ); + + /* Calculate the length in bytes and the checksum of zMaster */ + for(nMaster=0; zMaster[nMaster]; nMaster++){ + cksum += zMaster[nMaster]; + } + + /* If in full-sync mode, advance to the next disk sector before writing + ** the master journal name. This is in case the previous page written to + ** the journal has already been synced. + */ + if( pPager->fullSync ){ + pPager->journalOff = journalHdrOffset(pPager); + } + iHdrOff = pPager->journalOff; + + /* Write the master journal data to the end of the journal file. If + ** an error occurs, return the error code to the caller. + */ + if( (0 != (rc = write32bits(pPager->jfd, iHdrOff, PAGER_MJ_PGNO(pPager)))) + || (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4))) + || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster))) + || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum))) + || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, + iHdrOff+4+nMaster+8))) + ){ + return rc; + } + pPager->journalOff += (nMaster+20); + + /* If the pager is in peristent-journal mode, then the physical + ** journal-file may extend past the end of the master-journal name + ** and 8 bytes of magic data just written to the file. This is + ** dangerous because the code to rollback a hot-journal file + ** will not be able to find the master-journal name to determine + ** whether or not the journal is hot. + ** + ** Easiest thing to do in this scenario is to truncate the journal + ** file to the required size. + */ + if( SQLITE_OK==(rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize)) + && jrnlSize>pPager->journalOff + ){ + rc = sqlite3OsTruncate(pPager->jfd, pPager->journalOff); + } + return rc; +} + +/* +** Discard the entire contents of the in-memory page-cache. +*/ +static void pager_reset(Pager *pPager){ + pPager->iDataVersion++; + sqlite3BackupRestart(pPager->pBackup); + sqlite3PcacheClear(pPager->pPCache); +} + +/* +** Return the pPager->iDataVersion value +*/ +SQLITE_PRIVATE u32 sqlite3PagerDataVersion(Pager *pPager){ + assert( pPager->eState>PAGER_OPEN ); + return pPager->iDataVersion; +} + +/* +** Free all structures in the Pager.aSavepoint[] array and set both +** Pager.aSavepoint and Pager.nSavepoint to zero. Close the sub-journal +** if it is open and the pager is not in exclusive mode. +*/ +static void releaseAllSavepoints(Pager *pPager){ + int ii; /* Iterator for looping through Pager.aSavepoint */ + for(ii=0; ii<pPager->nSavepoint; ii++){ + sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint); + } + if( !pPager->exclusiveMode || sqlite3IsMemJournal(pPager->sjfd) ){ + sqlite3OsClose(pPager->sjfd); + } + sqlite3_free(pPager->aSavepoint); + pPager->aSavepoint = 0; + pPager->nSavepoint = 0; + pPager->nSubRec = 0; +} + +/* +** Set the bit number pgno in the PagerSavepoint.pInSavepoint +** bitvecs of all open savepoints. Return SQLITE_OK if successful +** or SQLITE_NOMEM if a malloc failure occurs. +*/ +static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ + int ii; /* Loop counter */ + int rc = SQLITE_OK; /* Result code */ + + for(ii=0; ii<pPager->nSavepoint; ii++){ + PagerSavepoint *p = &pPager->aSavepoint[ii]; + if( pgno<=p->nOrig ){ + rc |= sqlite3BitvecSet(p->pInSavepoint, pgno); + testcase( rc==SQLITE_NOMEM ); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + } + } + return rc; +} + +/* +** This function is a no-op if the pager is in exclusive mode and not +** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN +** state. +** +** If the pager is not in exclusive-access mode, the database file is +** completely unlocked. If the file is unlocked and the file-system does +** not exhibit the UNDELETABLE_WHEN_OPEN property, the journal file is +** closed (if it is open). +** +** If the pager is in ERROR state when this function is called, the +** contents of the pager cache are discarded before switching back to +** the OPEN state. Regardless of whether the pager is in exclusive-mode +** or not, any journal file left in the file-system will be treated +** as a hot-journal and rolled back the next time a read-transaction +** is opened (by this or by any other connection). +*/ +static void pager_unlock(Pager *pPager){ + + assert( pPager->eState==PAGER_READER + || pPager->eState==PAGER_OPEN + || pPager->eState==PAGER_ERROR + ); + + sqlite3BitvecDestroy(pPager->pInJournal); + pPager->pInJournal = 0; + releaseAllSavepoints(pPager); + + if( pagerUseWal(pPager) ){ + assert( !isOpen(pPager->jfd) ); + sqlite3WalEndReadTransaction(pPager->pWal); + pPager->eState = PAGER_OPEN; + }else if( !pPager->exclusiveMode ){ + int rc; /* Error code returned by pagerUnlockDb() */ + int iDc = isOpen(pPager->fd)?sqlite3OsDeviceCharacteristics(pPager->fd):0; + + /* If the operating system support deletion of open files, then + ** close the journal file when dropping the database lock. Otherwise + ** another connection with journal_mode=delete might delete the file + ** out from under us. + */ + assert( (PAGER_JOURNALMODE_MEMORY & 5)!=1 ); + assert( (PAGER_JOURNALMODE_OFF & 5)!=1 ); + assert( (PAGER_JOURNALMODE_WAL & 5)!=1 ); + assert( (PAGER_JOURNALMODE_DELETE & 5)!=1 ); + assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 ); + assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 ); + if( 0==(iDc & SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN) + || 1!=(pPager->journalMode & 5) + ){ + sqlite3OsClose(pPager->jfd); + } + + /* If the pager is in the ERROR state and the call to unlock the database + ** file fails, set the current lock to UNKNOWN_LOCK. See the comment + ** above the #define for UNKNOWN_LOCK for an explanation of why this + ** is necessary. + */ + rc = pagerUnlockDb(pPager, NO_LOCK); + if( rc!=SQLITE_OK && pPager->eState==PAGER_ERROR ){ + pPager->eLock = UNKNOWN_LOCK; + } + + /* The pager state may be changed from PAGER_ERROR to PAGER_OPEN here + ** without clearing the error code. This is intentional - the error + ** code is cleared and the cache reset in the block below. + */ + assert( pPager->errCode || pPager->eState!=PAGER_ERROR ); + pPager->changeCountDone = 0; + pPager->eState = PAGER_OPEN; + } + + /* If Pager.errCode is set, the contents of the pager cache cannot be + ** trusted. Now that there are no outstanding references to the pager, + ** it can safely move back to PAGER_OPEN state. This happens in both + ** normal and exclusive-locking mode. + */ + if( pPager->errCode ){ + pager_reset(pPager); + pPager->changeCountDone = pPager->tempFile; + pPager->eState = PAGER_OPEN; + pPager->errCode = SQLITE_OK; + if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0); + } + + pPager->journalOff = 0; + pPager->journalHdr = 0; + pPager->setMaster = 0; +} + +/* +** This function is called whenever an IOERR or FULL error that requires +** the pager to transition into the ERROR state may ahve occurred. +** The first argument is a pointer to the pager structure, the second +** the error-code about to be returned by a pager API function. The +** value returned is a copy of the second argument to this function. +** +** If the second argument is SQLITE_FULL, SQLITE_IOERR or one of the +** IOERR sub-codes, the pager enters the ERROR state and the error code +** is stored in Pager.errCode. While the pager remains in the ERROR state, +** all major API calls on the Pager will immediately return Pager.errCode. +** +** The ERROR state indicates that the contents of the pager-cache +** cannot be trusted. This state can be cleared by completely discarding +** the contents of the pager-cache. If a transaction was active when +** the persistent error occurred, then the rollback journal may need +** to be replayed to restore the contents of the database file (as if +** it were a hot-journal). +*/ +static int pager_error(Pager *pPager, int rc){ + int rc2 = rc & 0xff; + assert( rc==SQLITE_OK ); + assert( + pPager->errCode==SQLITE_FULL || + pPager->errCode==SQLITE_OK || + (pPager->errCode & 0xff)==SQLITE_IOERR + ); + if( rc2==SQLITE_FULL || rc2==SQLITE_IOERR ){ + pPager->errCode = rc; + pPager->eState = PAGER_ERROR; + } + return rc; +} + +static int pager_truncate(Pager *pPager, Pgno nPage); + +/* +** This routine ends a transaction. A transaction is usually ended by +** either a COMMIT or a ROLLBACK operation. This routine may be called +** after rollback of a hot-journal, or if an error occurs while opening +** the journal file or writing the very first journal-header of a +** database transaction. +** +** This routine is never called in PAGER_ERROR state. If it is called +** in PAGER_NONE or PAGER_SHARED state and the lock held is less +** exclusive than a RESERVED lock, it is a no-op. +** +** Otherwise, any active savepoints are released. +** +** If the journal file is open, then it is "finalized". Once a journal +** file has been finalized it is not possible to use it to roll back a +** transaction. Nor will it be considered to be a hot-journal by this +** or any other database connection. Exactly how a journal is finalized +** depends on whether or not the pager is running in exclusive mode and +** the current journal-mode (Pager.journalMode value), as follows: +** +** journalMode==MEMORY +** Journal file descriptor is simply closed. This destroys an +** in-memory journal. +** +** journalMode==TRUNCATE +** Journal file is truncated to zero bytes in size. +** +** journalMode==PERSIST +** The first 28 bytes of the journal file are zeroed. This invalidates +** the first journal header in the file, and hence the entire journal +** file. An invalid journal file cannot be rolled back. +** +** journalMode==DELETE +** The journal file is closed and deleted using sqlite3OsDelete(). +** +** If the pager is running in exclusive mode, this method of finalizing +** the journal file is never used. Instead, if the journalMode is +** DELETE and the pager is in exclusive mode, the method described under +** journalMode==PERSIST is used instead. +** +** After the journal is finalized, the pager moves to PAGER_READER state. +** If running in non-exclusive rollback mode, the lock on the file is +** downgraded to a SHARED_LOCK. +** +** SQLITE_OK is returned if no error occurs. If an error occurs during +** any of the IO operations to finalize the journal file or unlock the +** database then the IO error code is returned to the user. If the +** operation to finalize the journal file fails, then the code still +** tries to unlock the database file if not in exclusive mode. If the +** unlock operation fails as well, then the first error code related +** to the first error encountered (the journal finalization one) is +** returned. +*/ +static int pager_end_transaction(Pager *pPager, int hasMaster, int bCommit){ + int rc = SQLITE_OK; /* Error code from journal finalization operation */ + int rc2 = SQLITE_OK; /* Error code from db file unlock operation */ + + /* Do nothing if the pager does not have an open write transaction + ** or at least a RESERVED lock. This function may be called when there + ** is no write-transaction active but a RESERVED or greater lock is + ** held under two circumstances: + ** + ** 1. After a successful hot-journal rollback, it is called with + ** eState==PAGER_NONE and eLock==EXCLUSIVE_LOCK. + ** + ** 2. If a connection with locking_mode=exclusive holding an EXCLUSIVE + ** lock switches back to locking_mode=normal and then executes a + ** read-transaction, this function is called with eState==PAGER_READER + ** and eLock==EXCLUSIVE_LOCK when the read-transaction is closed. + */ + assert( assert_pager_state(pPager) ); + assert( pPager->eState!=PAGER_ERROR ); + if( pPager->eState<PAGER_WRITER_LOCKED && pPager->eLock<RESERVED_LOCK ){ + return SQLITE_OK; + } + + releaseAllSavepoints(pPager); + assert( isOpen(pPager->jfd) || pPager->pInJournal==0 ); + if( isOpen(pPager->jfd) ){ + assert( !pagerUseWal(pPager) ); + + /* Finalize the journal file. */ + if( sqlite3IsMemJournal(pPager->jfd) ){ + assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); + sqlite3OsClose(pPager->jfd); + }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){ + if( pPager->journalOff==0 ){ + rc = SQLITE_OK; + }else{ + rc = sqlite3OsTruncate(pPager->jfd, 0); + if( rc==SQLITE_OK && pPager->fullSync ){ + /* Make sure the new file size is written into the inode right away. + ** Otherwise the journal might resurrect following a power loss and + ** cause the last transaction to roll back. See + ** https://bugzilla.mozilla.org/show_bug.cgi?id=1072773 + */ + rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags); + } + } + pPager->journalOff = 0; + }else if( pPager->journalMode==PAGER_JOURNALMODE_PERSIST + || (pPager->exclusiveMode && pPager->journalMode!=PAGER_JOURNALMODE_WAL) + ){ + rc = zeroJournalHdr(pPager, hasMaster); + pPager->journalOff = 0; + }else{ + /* This branch may be executed with Pager.journalMode==MEMORY if + ** a hot-journal was just rolled back. In this case the journal + ** file should be closed and deleted. If this connection writes to + ** the database file, it will do so using an in-memory journal. + */ + int bDelete = (!pPager->tempFile && sqlite3JournalExists(pPager->jfd)); + assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE + || pPager->journalMode==PAGER_JOURNALMODE_MEMORY + || pPager->journalMode==PAGER_JOURNALMODE_WAL + ); + sqlite3OsClose(pPager->jfd); + if( bDelete ){ + rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); + } + } + } + +#ifdef SQLITE_CHECK_PAGES + sqlite3PcacheIterateDirty(pPager->pPCache, pager_set_pagehash); + if( pPager->dbSize==0 && sqlite3PcacheRefCount(pPager->pPCache)>0 ){ + PgHdr *p = sqlite3PagerLookup(pPager, 1); + if( p ){ + p->pageHash = 0; + sqlite3PagerUnrefNotNull(p); + } + } +#endif + + sqlite3BitvecDestroy(pPager->pInJournal); + pPager->pInJournal = 0; + pPager->nRec = 0; + sqlite3PcacheCleanAll(pPager->pPCache); + sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize); + + if( pagerUseWal(pPager) ){ + /* Drop the WAL write-lock, if any. Also, if the connection was in + ** locking_mode=exclusive mode but is no longer, drop the EXCLUSIVE + ** lock held on the database file. + */ + rc2 = sqlite3WalEndWriteTransaction(pPager->pWal); + assert( rc2==SQLITE_OK ); + }else if( rc==SQLITE_OK && bCommit && pPager->dbFileSize>pPager->dbSize ){ + /* This branch is taken when committing a transaction in rollback-journal + ** mode if the database file on disk is larger than the database image. + ** At this point the journal has been finalized and the transaction + ** successfully committed, but the EXCLUSIVE lock is still held on the + ** file. So it is safe to truncate the database file to its minimum + ** required size. */ + assert( pPager->eLock==EXCLUSIVE_LOCK ); + rc = pager_truncate(pPager, pPager->dbSize); + } + + if( rc==SQLITE_OK && bCommit && isOpen(pPager->fd) ){ + rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_COMMIT_PHASETWO, 0); + if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; + } + + if( !pPager->exclusiveMode + && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0)) + ){ + rc2 = pagerUnlockDb(pPager, SHARED_LOCK); + pPager->changeCountDone = 0; + } + pPager->eState = PAGER_READER; + pPager->setMaster = 0; + + return (rc==SQLITE_OK?rc2:rc); +} + +/* +** Execute a rollback if a transaction is active and unlock the +** database file. +** +** If the pager has already entered the ERROR state, do not attempt +** the rollback at this time. Instead, pager_unlock() is called. The +** call to pager_unlock() will discard all in-memory pages, unlock +** the database file and move the pager back to OPEN state. If this +** means that there is a hot-journal left in the file-system, the next +** connection to obtain a shared lock on the pager (which may be this one) +** will roll it back. +** +** If the pager has not already entered the ERROR state, but an IO or +** malloc error occurs during a rollback, then this will itself cause +** the pager to enter the ERROR state. Which will be cleared by the +** call to pager_unlock(), as described above. +*/ +static void pagerUnlockAndRollback(Pager *pPager){ + if( pPager->eState!=PAGER_ERROR && pPager->eState!=PAGER_OPEN ){ + assert( assert_pager_state(pPager) ); + if( pPager->eState>=PAGER_WRITER_LOCKED ){ + sqlite3PagerRollback(pPager); + }else if( !pPager->exclusiveMode ){ + assert( pPager->eState==PAGER_READER ); + pager_end_transaction(pPager, 0, 0); + } + } + pager_unlock(pPager); +} + +/* +** Parameter aData must point to a buffer of pPager->pageSize bytes +** of data. Compute and return a checksum based ont the contents of the +** page of data and the current value of pPager->cksumInit. +** +** This is not a real checksum. It is really just the sum of the +** random initial value (pPager->cksumInit) and every 200th byte +** of the page data, starting with byte offset (pPager->pageSize%200). +** Each byte is interpreted as an 8-bit unsigned integer. +** +** Changing the formula used to compute this checksum results in an +** incompatible journal file format. +** +** If journal corruption occurs due to a power failure, the most likely +** scenario is that one end or the other of the record will be changed. +** It is much less likely that the two ends of the journal record will be +** correct and the middle be corrupt. Thus, this "checksum" scheme, +** though fast and simple, catches the mostly likely kind of corruption. +*/ +static u32 pager_cksum(Pager *pPager, const u8 *aData){ + u32 cksum = pPager->cksumInit; /* Checksum value to return */ + int i = pPager->pageSize-200; /* Loop counter */ + while( i>0 ){ + cksum += aData[i]; + i -= 200; + } + return cksum; +} + +/* +** Report the current page size and number of reserved bytes back +** to the codec. +*/ +#ifdef SQLITE_HAS_CODEC +static void pagerReportSize(Pager *pPager){ + if( pPager->xCodecSizeChng ){ + pPager->xCodecSizeChng(pPager->pCodec, pPager->pageSize, + (int)pPager->nReserve); + } +} +#else +# define pagerReportSize(X) /* No-op if we do not support a codec */ +#endif + +#ifdef SQLITE_HAS_CODEC +/* +** Make sure the number of reserved bits is the same in the destination +** pager as it is in the source. This comes up when a VACUUM changes the +** number of reserved bits to the "optimal" amount. +*/ +SQLITE_PRIVATE void sqlite3PagerAlignReserve(Pager *pDest, Pager *pSrc){ + if( pDest->nReserve!=pSrc->nReserve ){ + pDest->nReserve = pSrc->nReserve; + pagerReportSize(pDest); + } +} +#endif + +/* +** Read a single page from either the journal file (if isMainJrnl==1) or +** from the sub-journal (if isMainJrnl==0) and playback that page. +** The page begins at offset *pOffset into the file. The *pOffset +** value is increased to the start of the next page in the journal. +** +** The main rollback journal uses checksums - the statement journal does +** not. +** +** If the page number of the page record read from the (sub-)journal file +** is greater than the current value of Pager.dbSize, then playback is +** skipped and SQLITE_OK is returned. +** +** If pDone is not NULL, then it is a record of pages that have already +** been played back. If the page at *pOffset has already been played back +** (if the corresponding pDone bit is set) then skip the playback. +** Make sure the pDone bit corresponding to the *pOffset page is set +** prior to returning. +** +** If the page record is successfully read from the (sub-)journal file +** and played back, then SQLITE_OK is returned. If an IO error occurs +** while reading the record from the (sub-)journal file or while writing +** to the database file, then the IO error code is returned. If data +** is successfully read from the (sub-)journal file but appears to be +** corrupted, SQLITE_DONE is returned. Data is considered corrupted in +** two circumstances: +** +** * If the record page-number is illegal (0 or PAGER_MJ_PGNO), or +** * If the record is being rolled back from the main journal file +** and the checksum field does not match the record content. +** +** Neither of these two scenarios are possible during a savepoint rollback. +** +** If this is a savepoint rollback, then memory may have to be dynamically +** allocated by this function. If this is the case and an allocation fails, +** SQLITE_NOMEM is returned. +*/ +static int pager_playback_one_page( + Pager *pPager, /* The pager being played back */ + i64 *pOffset, /* Offset of record to playback */ + Bitvec *pDone, /* Bitvec of pages already played back */ + int isMainJrnl, /* 1 -> main journal. 0 -> sub-journal. */ + int isSavepnt /* True for a savepoint rollback */ +){ + int rc; + PgHdr *pPg; /* An existing page in the cache */ + Pgno pgno; /* The page number of a page in journal */ + u32 cksum; /* Checksum used for sanity checking */ + char *aData; /* Temporary storage for the page */ + sqlite3_file *jfd; /* The file descriptor for the journal file */ + int isSynced; /* True if journal page is synced */ + + assert( (isMainJrnl&~1)==0 ); /* isMainJrnl is 0 or 1 */ + assert( (isSavepnt&~1)==0 ); /* isSavepnt is 0 or 1 */ + assert( isMainJrnl || pDone ); /* pDone always used on sub-journals */ + assert( isSavepnt || pDone==0 ); /* pDone never used on non-savepoint */ + + aData = pPager->pTmpSpace; + assert( aData ); /* Temp storage must have already been allocated */ + assert( pagerUseWal(pPager)==0 || (!isMainJrnl && isSavepnt) ); + + /* Either the state is greater than PAGER_WRITER_CACHEMOD (a transaction + ** or savepoint rollback done at the request of the caller) or this is + ** a hot-journal rollback. If it is a hot-journal rollback, the pager + ** is in state OPEN and holds an EXCLUSIVE lock. Hot-journal rollback + ** only reads from the main journal, not the sub-journal. + */ + assert( pPager->eState>=PAGER_WRITER_CACHEMOD + || (pPager->eState==PAGER_OPEN && pPager->eLock==EXCLUSIVE_LOCK) + ); + assert( pPager->eState>=PAGER_WRITER_CACHEMOD || isMainJrnl ); + + /* Read the page number and page data from the journal or sub-journal + ** file. Return an error code to the caller if an IO error occurs. + */ + jfd = isMainJrnl ? pPager->jfd : pPager->sjfd; + rc = read32bits(jfd, *pOffset, &pgno); + if( rc!=SQLITE_OK ) return rc; + rc = sqlite3OsRead(jfd, (u8*)aData, pPager->pageSize, (*pOffset)+4); + if( rc!=SQLITE_OK ) return rc; + *pOffset += pPager->pageSize + 4 + isMainJrnl*4; + + /* Sanity checking on the page. This is more important that I originally + ** thought. If a power failure occurs while the journal is being written, + ** it could cause invalid data to be written into the journal. We need to + ** detect this invalid data (with high probability) and ignore it. + */ + if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ + assert( !isSavepnt ); + return SQLITE_DONE; + } + if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){ + return SQLITE_OK; + } + if( isMainJrnl ){ + rc = read32bits(jfd, (*pOffset)-4, &cksum); + if( rc ) return rc; + if( !isSavepnt && pager_cksum(pPager, (u8*)aData)!=cksum ){ + return SQLITE_DONE; + } + } + + /* If this page has already been played back before during the current + ** rollback, then don't bother to play it back again. + */ + if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){ + return rc; + } + + /* When playing back page 1, restore the nReserve setting + */ + if( pgno==1 && pPager->nReserve!=((u8*)aData)[20] ){ + pPager->nReserve = ((u8*)aData)[20]; + pagerReportSize(pPager); + } + + /* If the pager is in CACHEMOD state, then there must be a copy of this + ** page in the pager cache. In this case just update the pager cache, + ** not the database file. The page is left marked dirty in this case. + ** + ** An exception to the above rule: If the database is in no-sync mode + ** and a page is moved during an incremental vacuum then the page may + ** not be in the pager cache. Later: if a malloc() or IO error occurs + ** during a Movepage() call, then the page may not be in the cache + ** either. So the condition described in the above paragraph is not + ** assert()able. + ** + ** If in WRITER_DBMOD, WRITER_FINISHED or OPEN state, then we update the + ** pager cache if it exists and the main file. The page is then marked + ** not dirty. Since this code is only executed in PAGER_OPEN state for + ** a hot-journal rollback, it is guaranteed that the page-cache is empty + ** if the pager is in OPEN state. + ** + ** Ticket #1171: The statement journal might contain page content that is + ** different from the page content at the start of the transaction. + ** This occurs when a page is changed prior to the start of a statement + ** then changed again within the statement. When rolling back such a + ** statement we must not write to the original database unless we know + ** for certain that original page contents are synced into the main rollback + ** journal. Otherwise, a power loss might leave modified data in the + ** database file without an entry in the rollback journal that can + ** restore the database to its original form. Two conditions must be + ** met before writing to the database files. (1) the database must be + ** locked. (2) we know that the original page content is fully synced + ** in the main journal either because the page is not in cache or else + ** the page is marked as needSync==0. + ** + ** 2008-04-14: When attempting to vacuum a corrupt database file, it + ** is possible to fail a statement on a database that does not yet exist. + ** Do not attempt to write if database file has never been opened. + */ + if( pagerUseWal(pPager) ){ + pPg = 0; + }else{ + pPg = sqlite3PagerLookup(pPager, pgno); + } + assert( pPg ); + assert( pPager->eState!=PAGER_OPEN || pPg==0 ); + PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n", + PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8*)aData), + (isMainJrnl?"main-journal":"sub-journal") + )); + if( isMainJrnl ){ + isSynced = pPager->noSync || (*pOffset <= pPager->journalHdr); + }else{ + isSynced = (pPg==0 || 0==(pPg->flags & PGHDR_NEED_SYNC)); + } + if( isOpen(pPager->fd) + && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) + && isSynced + ){ + i64 ofst = (pgno-1)*(i64)pPager->pageSize; + testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 ); + assert( !pagerUseWal(pPager) ); + rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst); + if( pgno>pPager->dbFileSize ){ + pPager->dbFileSize = pgno; + } + if( pPager->pBackup ){ + CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM); + sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)aData); + CODEC2(pPager, aData, pgno, 7, rc=SQLITE_NOMEM, aData); + } + }else if( !isMainJrnl && pPg==0 ){ + /* If this is a rollback of a savepoint and data was not written to + ** the database and the page is not in-memory, there is a potential + ** problem. When the page is next fetched by the b-tree layer, it + ** will be read from the database file, which may or may not be + ** current. + ** + ** There are a couple of different ways this can happen. All are quite + ** obscure. When running in synchronous mode, this can only happen + ** if the page is on the free-list at the start of the transaction, then + ** populated, then moved using sqlite3PagerMovepage(). + ** + ** The solution is to add an in-memory page to the cache containing + ** the data just read from the sub-journal. Mark the page as dirty + ** and if the pager requires a journal-sync, then mark the page as + ** requiring a journal-sync before it is written. + */ + assert( isSavepnt ); + assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)==0 ); + pPager->doNotSpill |= SPILLFLAG_ROLLBACK; + rc = sqlite3PagerAcquire(pPager, pgno, &pPg, 1); + assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)!=0 ); + pPager->doNotSpill &= ~SPILLFLAG_ROLLBACK; + if( rc!=SQLITE_OK ) return rc; + pPg->flags &= ~PGHDR_NEED_READ; + sqlite3PcacheMakeDirty(pPg); + } + if( pPg ){ + /* No page should ever be explicitly rolled back that is in use, except + ** for page 1 which is held in use in order to keep the lock on the + ** database active. However such a page may be rolled back as a result + ** of an internal error resulting in an automatic call to + ** sqlite3PagerRollback(). + */ + void *pData; + pData = pPg->pData; + memcpy(pData, (u8*)aData, pPager->pageSize); + pPager->xReiniter(pPg); + if( isMainJrnl && (!isSavepnt || *pOffset<=pPager->journalHdr) ){ + /* If the contents of this page were just restored from the main + ** journal file, then its content must be as they were when the + ** transaction was first opened. In this case we can mark the page + ** as clean, since there will be no need to write it out to the + ** database. + ** + ** There is one exception to this rule. If the page is being rolled + ** back as part of a savepoint (or statement) rollback from an + ** unsynced portion of the main journal file, then it is not safe + ** to mark the page as clean. This is because marking the page as + ** clean will clear the PGHDR_NEED_SYNC flag. Since the page is + ** already in the journal file (recorded in Pager.pInJournal) and + ** the PGHDR_NEED_SYNC flag is cleared, if the page is written to + ** again within this transaction, it will be marked as dirty but + ** the PGHDR_NEED_SYNC flag will not be set. It could then potentially + ** be written out into the database file before its journal file + ** segment is synced. If a crash occurs during or following this, + ** database corruption may ensue. + */ + assert( !pagerUseWal(pPager) ); + sqlite3PcacheMakeClean(pPg); + } + pager_set_pagehash(pPg); + + /* If this was page 1, then restore the value of Pager.dbFileVers. + ** Do this before any decoding. */ + if( pgno==1 ){ + memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers)); + } + + /* Decode the page just read from disk */ + CODEC1(pPager, pData, pPg->pgno, 3, rc=SQLITE_NOMEM); + sqlite3PcacheRelease(pPg); + } + return rc; +} + +/* +** Parameter zMaster is the name of a master journal file. A single journal +** file that referred to the master journal file has just been rolled back. +** This routine checks if it is possible to delete the master journal file, +** and does so if it is. +** +** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not +** available for use within this function. +** +** When a master journal file is created, it is populated with the names +** of all of its child journals, one after another, formatted as utf-8 +** encoded text. The end of each child journal file is marked with a +** nul-terminator byte (0x00). i.e. the entire contents of a master journal +** file for a transaction involving two databases might be: +** +** "/home/bill/a.db-journal\x00/home/bill/b.db-journal\x00" +** +** A master journal file may only be deleted once all of its child +** journals have been rolled back. +** +** This function reads the contents of the master-journal file into +** memory and loops through each of the child journal names. For +** each child journal, it checks if: +** +** * if the child journal exists, and if so +** * if the child journal contains a reference to master journal +** file zMaster +** +** If a child journal can be found that matches both of the criteria +** above, this function returns without doing anything. Otherwise, if +** no such child journal can be found, file zMaster is deleted from +** the file-system using sqlite3OsDelete(). +** +** If an IO error within this function, an error code is returned. This +** function allocates memory by calling sqlite3Malloc(). If an allocation +** fails, SQLITE_NOMEM is returned. Otherwise, if no IO or malloc errors +** occur, SQLITE_OK is returned. +** +** TODO: This function allocates a single block of memory to load +** the entire contents of the master journal file. This could be +** a couple of kilobytes or so - potentially larger than the page +** size. +*/ +static int pager_delmaster(Pager *pPager, const char *zMaster){ + sqlite3_vfs *pVfs = pPager->pVfs; + int rc; /* Return code */ + sqlite3_file *pMaster; /* Malloc'd master-journal file descriptor */ + sqlite3_file *pJournal; /* Malloc'd child-journal file descriptor */ + char *zMasterJournal = 0; /* Contents of master journal file */ + i64 nMasterJournal; /* Size of master journal file */ + char *zJournal; /* Pointer to one journal within MJ file */ + char *zMasterPtr; /* Space to hold MJ filename from a journal file */ + int nMasterPtr; /* Amount of space allocated to zMasterPtr[] */ + + /* Allocate space for both the pJournal and pMaster file descriptors. + ** If successful, open the master journal file for reading. + */ + pMaster = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile * 2); + pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile); + if( !pMaster ){ + rc = SQLITE_NOMEM; + }else{ + const int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL); + rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0); + } + if( rc!=SQLITE_OK ) goto delmaster_out; + + /* Load the entire master journal file into space obtained from + ** sqlite3_malloc() and pointed to by zMasterJournal. Also obtain + ** sufficient space (in zMasterPtr) to hold the names of master + ** journal files extracted from regular rollback-journals. + */ + rc = sqlite3OsFileSize(pMaster, &nMasterJournal); + if( rc!=SQLITE_OK ) goto delmaster_out; + nMasterPtr = pVfs->mxPathname+1; + zMasterJournal = sqlite3Malloc(nMasterJournal + nMasterPtr + 1); + if( !zMasterJournal ){ + rc = SQLITE_NOMEM; + goto delmaster_out; + } + zMasterPtr = &zMasterJournal[nMasterJournal+1]; + rc = sqlite3OsRead(pMaster, zMasterJournal, (int)nMasterJournal, 0); + if( rc!=SQLITE_OK ) goto delmaster_out; + zMasterJournal[nMasterJournal] = 0; + + zJournal = zMasterJournal; + while( (zJournal-zMasterJournal)<nMasterJournal ){ + int exists; + rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists); + if( rc!=SQLITE_OK ){ + goto delmaster_out; + } + if( exists ){ + /* One of the journals pointed to by the master journal exists. + ** Open it and check if it points at the master journal. If + ** so, return without deleting the master journal file. + */ + int c; + int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL); + rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0); + if( rc!=SQLITE_OK ){ + goto delmaster_out; + } + + rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr); + sqlite3OsClose(pJournal); + if( rc!=SQLITE_OK ){ + goto delmaster_out; + } + + c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0; + if( c ){ + /* We have a match. Do not delete the master journal file. */ + goto delmaster_out; + } + } + zJournal += (sqlite3Strlen30(zJournal)+1); + } + + sqlite3OsClose(pMaster); + rc = sqlite3OsDelete(pVfs, zMaster, 0); + +delmaster_out: + sqlite3_free(zMasterJournal); + if( pMaster ){ + sqlite3OsClose(pMaster); + assert( !isOpen(pJournal) ); + sqlite3_free(pMaster); + } + return rc; +} + + +/* +** This function is used to change the actual size of the database +** file in the file-system. This only happens when committing a transaction, +** or rolling back a transaction (including rolling back a hot-journal). +** +** If the main database file is not open, or the pager is not in either +** DBMOD or OPEN state, this function is a no-op. Otherwise, the size +** of the file is changed to nPage pages (nPage*pPager->pageSize bytes). +** If the file on disk is currently larger than nPage pages, then use the VFS +** xTruncate() method to truncate it. +** +** Or, it might be the case that the file on disk is smaller than +** nPage pages. Some operating system implementations can get confused if +** you try to truncate a file to some size that is larger than it +** currently is, so detect this case and write a single zero byte to +** the end of the new file instead. +** +** If successful, return SQLITE_OK. If an IO error occurs while modifying +** the database file, return the error code to the caller. +*/ +static int pager_truncate(Pager *pPager, Pgno nPage){ + int rc = SQLITE_OK; + assert( pPager->eState!=PAGER_ERROR ); + assert( pPager->eState!=PAGER_READER ); + + if( isOpen(pPager->fd) + && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) + ){ + i64 currentSize, newSize; + int szPage = pPager->pageSize; + assert( pPager->eLock==EXCLUSIVE_LOCK ); + /* TODO: Is it safe to use Pager.dbFileSize here? */ + rc = sqlite3OsFileSize(pPager->fd, ¤tSize); + newSize = szPage*(i64)nPage; + if( rc==SQLITE_OK && currentSize!=newSize ){ + if( currentSize>newSize ){ + rc = sqlite3OsTruncate(pPager->fd, newSize); + }else if( (currentSize+szPage)<=newSize ){ + char *pTmp = pPager->pTmpSpace; + memset(pTmp, 0, szPage); + testcase( (newSize-szPage) == currentSize ); + testcase( (newSize-szPage) > currentSize ); + rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage); + } + if( rc==SQLITE_OK ){ + pPager->dbFileSize = nPage; + } + } + } + return rc; +} + +/* +** Return a sanitized version of the sector-size of OS file pFile. The +** return value is guaranteed to lie between 32 and MAX_SECTOR_SIZE. +*/ +SQLITE_PRIVATE int sqlite3SectorSize(sqlite3_file *pFile){ + int iRet = sqlite3OsSectorSize(pFile); + if( iRet<32 ){ + iRet = 512; + }else if( iRet>MAX_SECTOR_SIZE ){ + assert( MAX_SECTOR_SIZE>=512 ); + iRet = MAX_SECTOR_SIZE; + } + return iRet; +} + +/* +** Set the value of the Pager.sectorSize variable for the given +** pager based on the value returned by the xSectorSize method +** of the open database file. The sector size will be used +** to determine the size and alignment of journal header and +** master journal pointers within created journal files. +** +** For temporary files the effective sector size is always 512 bytes. +** +** Otherwise, for non-temporary files, the effective sector size is +** the value returned by the xSectorSize() method rounded up to 32 if +** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it +** is greater than MAX_SECTOR_SIZE. +** +** If the file has the SQLITE_IOCAP_POWERSAFE_OVERWRITE property, then set +** the effective sector size to its minimum value (512). The purpose of +** pPager->sectorSize is to define the "blast radius" of bytes that +** might change if a crash occurs while writing to a single byte in +** that range. But with POWERSAFE_OVERWRITE, the blast radius is zero +** (that is what POWERSAFE_OVERWRITE means), so we minimize the sector +** size. For backwards compatibility of the rollback journal file format, +** we cannot reduce the effective sector size below 512. +*/ +static void setSectorSize(Pager *pPager){ + assert( isOpen(pPager->fd) || pPager->tempFile ); + + if( pPager->tempFile + || (sqlite3OsDeviceCharacteristics(pPager->fd) & + SQLITE_IOCAP_POWERSAFE_OVERWRITE)!=0 + ){ + /* Sector size doesn't matter for temporary files. Also, the file + ** may not have been opened yet, in which case the OsSectorSize() + ** call will segfault. */ + pPager->sectorSize = 512; + }else{ + pPager->sectorSize = sqlite3SectorSize(pPager->fd); + } +} + +/* +** Playback the journal and thus restore the database file to +** the state it was in before we started making changes. +** +** The journal file format is as follows: +** +** (1) 8 byte prefix. A copy of aJournalMagic[]. +** (2) 4 byte big-endian integer which is the number of valid page records +** in the journal. If this value is 0xffffffff, then compute the +** number of page records from the journal size. +** (3) 4 byte big-endian integer which is the initial value for the +** sanity checksum. +** (4) 4 byte integer which is the number of pages to truncate the +** database to during a rollback. +** (5) 4 byte big-endian integer which is the sector size. The header +** is this many bytes in size. +** (6) 4 byte big-endian integer which is the page size. +** (7) zero padding out to the next sector size. +** (8) Zero or more pages instances, each as follows: +** + 4 byte page number. +** + pPager->pageSize bytes of data. +** + 4 byte checksum +** +** When we speak of the journal header, we mean the first 7 items above. +** Each entry in the journal is an instance of the 8th item. +** +** Call the value from the second bullet "nRec". nRec is the number of +** valid page entries in the journal. In most cases, you can compute the +** value of nRec from the size of the journal file. But if a power +** failure occurred while the journal was being written, it could be the +** case that the size of the journal file had already been increased but +** the extra entries had not yet made it safely to disk. In such a case, +** the value of nRec computed from the file size would be too large. For +** that reason, we always use the nRec value in the header. +** +** If the nRec value is 0xffffffff it means that nRec should be computed +** from the file size. This value is used when the user selects the +** no-sync option for the journal. A power failure could lead to corruption +** in this case. But for things like temporary table (which will be +** deleted when the power is restored) we don't care. +** +** If the file opened as the journal file is not a well-formed +** journal file then all pages up to the first corrupted page are rolled +** back (or no pages if the journal header is corrupted). The journal file +** is then deleted and SQLITE_OK returned, just as if no corruption had +** been encountered. +** +** If an I/O or malloc() error occurs, the journal-file is not deleted +** and an error code is returned. +** +** The isHot parameter indicates that we are trying to rollback a journal +** that might be a hot journal. Or, it could be that the journal is +** preserved because of JOURNALMODE_PERSIST or JOURNALMODE_TRUNCATE. +** If the journal really is hot, reset the pager cache prior rolling +** back any content. If the journal is merely persistent, no reset is +** needed. +*/ +static int pager_playback(Pager *pPager, int isHot){ + sqlite3_vfs *pVfs = pPager->pVfs; + i64 szJ; /* Size of the journal file in bytes */ + u32 nRec; /* Number of Records in the journal */ + u32 u; /* Unsigned loop counter */ + Pgno mxPg = 0; /* Size of the original file in pages */ + int rc; /* Result code of a subroutine */ + int res = 1; /* Value returned by sqlite3OsAccess() */ + char *zMaster = 0; /* Name of master journal file if any */ + int needPagerReset; /* True to reset page prior to first page rollback */ + int nPlayback = 0; /* Total number of pages restored from journal */ + + /* Figure out how many records are in the journal. Abort early if + ** the journal is empty. + */ + assert( isOpen(pPager->jfd) ); + rc = sqlite3OsFileSize(pPager->jfd, &szJ); + if( rc!=SQLITE_OK ){ + goto end_playback; + } + + /* Read the master journal name from the journal, if it is present. + ** If a master journal file name is specified, but the file is not + ** present on disk, then the journal is not hot and does not need to be + ** played back. + ** + ** TODO: Technically the following is an error because it assumes that + ** buffer Pager.pTmpSpace is (mxPathname+1) bytes or larger. i.e. that + ** (pPager->pageSize >= pPager->pVfs->mxPathname+1). Using os_unix.c, + ** mxPathname is 512, which is the same as the minimum allowable value + ** for pageSize. + */ + zMaster = pPager->pTmpSpace; + rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1); + if( rc==SQLITE_OK && zMaster[0] ){ + rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res); + } + zMaster = 0; + if( rc!=SQLITE_OK || !res ){ + goto end_playback; + } + pPager->journalOff = 0; + needPagerReset = isHot; + + /* This loop terminates either when a readJournalHdr() or + ** pager_playback_one_page() call returns SQLITE_DONE or an IO error + ** occurs. + */ + while( 1 ){ + /* Read the next journal header from the journal file. If there are + ** not enough bytes left in the journal file for a complete header, or + ** it is corrupted, then a process must have failed while writing it. + ** This indicates nothing more needs to be rolled back. + */ + rc = readJournalHdr(pPager, isHot, szJ, &nRec, &mxPg); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + } + goto end_playback; + } + + /* If nRec is 0xffffffff, then this journal was created by a process + ** working in no-sync mode. This means that the rest of the journal + ** file consists of pages, there are no more journal headers. Compute + ** the value of nRec based on this assumption. + */ + if( nRec==0xffffffff ){ + assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ); + nRec = (int)((szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager)); + } + + /* If nRec is 0 and this rollback is of a transaction created by this + ** process and if this is the final header in the journal, then it means + ** that this part of the journal was being filled but has not yet been + ** synced to disk. Compute the number of pages based on the remaining + ** size of the file. + ** + ** The third term of the test was added to fix ticket #2565. + ** When rolling back a hot journal, nRec==0 always means that the next + ** chunk of the journal contains zero pages to be rolled back. But + ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in + ** the journal, it means that the journal might contain additional + ** pages that need to be rolled back and that the number of pages + ** should be computed based on the journal file size. + */ + if( nRec==0 && !isHot && + pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){ + nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager)); + } + + /* If this is the first header read from the journal, truncate the + ** database file back to its original size. + */ + if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){ + rc = pager_truncate(pPager, mxPg); + if( rc!=SQLITE_OK ){ + goto end_playback; + } + pPager->dbSize = mxPg; + } + + /* Copy original pages out of the journal and back into the + ** database file and/or page cache. + */ + for(u=0; u<nRec; u++){ + if( needPagerReset ){ + pager_reset(pPager); + needPagerReset = 0; + } + rc = pager_playback_one_page(pPager,&pPager->journalOff,0,1,0); + if( rc==SQLITE_OK ){ + nPlayback++; + }else{ + if( rc==SQLITE_DONE ){ + pPager->journalOff = szJ; + break; + }else if( rc==SQLITE_IOERR_SHORT_READ ){ + /* If the journal has been truncated, simply stop reading and + ** processing the journal. This might happen if the journal was + ** not completely written and synced prior to a crash. In that + ** case, the database should have never been written in the + ** first place so it is OK to simply abandon the rollback. */ + rc = SQLITE_OK; + goto end_playback; + }else{ + /* If we are unable to rollback, quit and return the error + ** code. This will cause the pager to enter the error state + ** so that no further harm will be done. Perhaps the next + ** process to come along will be able to rollback the database. + */ + goto end_playback; + } + } + } + } + /*NOTREACHED*/ + assert( 0 ); + +end_playback: + /* Following a rollback, the database file should be back in its original + ** state prior to the start of the transaction, so invoke the + ** SQLITE_FCNTL_DB_UNCHANGED file-control method to disable the + ** assertion that the transaction counter was modified. + */ +#ifdef SQLITE_DEBUG + if( pPager->fd->pMethods ){ + sqlite3OsFileControlHint(pPager->fd,SQLITE_FCNTL_DB_UNCHANGED,0); + } +#endif + + /* If this playback is happening automatically as a result of an IO or + ** malloc error that occurred after the change-counter was updated but + ** before the transaction was committed, then the change-counter + ** modification may just have been reverted. If this happens in exclusive + ** mode, then subsequent transactions performed by the connection will not + ** update the change-counter at all. This may lead to cache inconsistency + ** problems for other processes at some point in the future. So, just + ** in case this has happened, clear the changeCountDone flag now. + */ + pPager->changeCountDone = pPager->tempFile; + + if( rc==SQLITE_OK ){ + zMaster = pPager->pTmpSpace; + rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1); + testcase( rc!=SQLITE_OK ); + } + if( rc==SQLITE_OK + && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) + ){ + rc = sqlite3PagerSync(pPager, 0); + } + if( rc==SQLITE_OK ){ + rc = pager_end_transaction(pPager, zMaster[0]!='\0', 0); + testcase( rc!=SQLITE_OK ); + } + if( rc==SQLITE_OK && zMaster[0] && res ){ + /* If there was a master journal and this routine will return success, + ** see if it is possible to delete the master journal. + */ + rc = pager_delmaster(pPager, zMaster); + testcase( rc!=SQLITE_OK ); + } + if( isHot && nPlayback ){ + sqlite3_log(SQLITE_NOTICE_RECOVER_ROLLBACK, "recovered %d pages from %s", + nPlayback, pPager->zJournal); + } + + /* The Pager.sectorSize variable may have been updated while rolling + ** back a journal created by a process with a different sector size + ** value. Reset it to the correct value for this process. + */ + setSectorSize(pPager); + return rc; +} + + +/* +** Read the content for page pPg out of the database file and into +** pPg->pData. A shared lock or greater must be held on the database +** file before this function is called. +** +** If page 1 is read, then the value of Pager.dbFileVers[] is set to +** the value read from the database file. +** +** If an IO error occurs, then the IO error is returned to the caller. +** Otherwise, SQLITE_OK is returned. +*/ +static int readDbPage(PgHdr *pPg, u32 iFrame){ + Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ + Pgno pgno = pPg->pgno; /* Page number to read */ + int rc = SQLITE_OK; /* Return code */ + int pgsz = pPager->pageSize; /* Number of bytes to read */ + + assert( pPager->eState>=PAGER_READER ); + assert( isOpen(pPager->fd) ); + +#ifndef SQLITE_OMIT_WAL + if( iFrame ){ + /* Try to pull the page from the write-ahead log. */ + rc = sqlite3WalReadFrame(pPager->pWal, iFrame, pgsz, pPg->pData); + }else +#endif + { + i64 iOffset = (pgno-1)*(i64)pPager->pageSize; + rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset); + if( rc==SQLITE_IOERR_SHORT_READ ){ + rc = SQLITE_OK; + } + } + + if( pgno==1 ){ + if( rc ){ + /* If the read is unsuccessful, set the dbFileVers[] to something + ** that will never be a valid file version. dbFileVers[] is a copy + ** of bytes 24..39 of the database. Bytes 28..31 should always be + ** zero or the size of the database in page. Bytes 32..35 and 35..39 + ** should be page numbers which are never 0xffffffff. So filling + ** pPager->dbFileVers[] with all 0xff bytes should suffice. + ** + ** For an encrypted database, the situation is more complex: bytes + ** 24..39 of the database are white noise. But the probability of + ** white noise equaling 16 bytes of 0xff is vanishingly small so + ** we should still be ok. + */ + memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers)); + }else{ + u8 *dbFileVers = &((u8*)pPg->pData)[24]; + memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers)); + } + } + CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM); + + PAGER_INCR(sqlite3_pager_readdb_count); + PAGER_INCR(pPager->nRead); + IOTRACE(("PGIN %p %d\n", pPager, pgno)); + PAGERTRACE(("FETCH %d page %d hash(%08x)\n", + PAGERID(pPager), pgno, pager_pagehash(pPg))); + + return rc; +} + +/* +** Update the value of the change-counter at offsets 24 and 92 in +** the header and the sqlite version number at offset 96. +** +** This is an unconditional update. See also the pager_incr_changecounter() +** routine which only updates the change-counter if the update is actually +** needed, as determined by the pPager->changeCountDone state variable. +*/ +static void pager_write_changecounter(PgHdr *pPg){ + u32 change_counter; + + /* Increment the value just read and write it back to byte 24. */ + change_counter = sqlite3Get4byte((u8*)pPg->pPager->dbFileVers)+1; + put32bits(((char*)pPg->pData)+24, change_counter); + + /* Also store the SQLite version number in bytes 96..99 and in + ** bytes 92..95 store the change counter for which the version number + ** is valid. */ + put32bits(((char*)pPg->pData)+92, change_counter); + put32bits(((char*)pPg->pData)+96, SQLITE_VERSION_NUMBER); +} + +#ifndef SQLITE_OMIT_WAL +/* +** This function is invoked once for each page that has already been +** written into the log file when a WAL transaction is rolled back. +** Parameter iPg is the page number of said page. The pCtx argument +** is actually a pointer to the Pager structure. +** +** If page iPg is present in the cache, and has no outstanding references, +** it is discarded. Otherwise, if there are one or more outstanding +** references, the page content is reloaded from the database. If the +** attempt to reload content from the database is required and fails, +** return an SQLite error code. Otherwise, SQLITE_OK. +*/ +static int pagerUndoCallback(void *pCtx, Pgno iPg){ + int rc = SQLITE_OK; + Pager *pPager = (Pager *)pCtx; + PgHdr *pPg; + + assert( pagerUseWal(pPager) ); + pPg = sqlite3PagerLookup(pPager, iPg); + if( pPg ){ + if( sqlite3PcachePageRefcount(pPg)==1 ){ + sqlite3PcacheDrop(pPg); + }else{ + u32 iFrame = 0; + rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame); + if( rc==SQLITE_OK ){ + rc = readDbPage(pPg, iFrame); + } + if( rc==SQLITE_OK ){ + pPager->xReiniter(pPg); + } + sqlite3PagerUnrefNotNull(pPg); + } + } + + /* Normally, if a transaction is rolled back, any backup processes are + ** updated as data is copied out of the rollback journal and into the + ** database. This is not generally possible with a WAL database, as + ** rollback involves simply truncating the log file. Therefore, if one + ** or more frames have already been written to the log (and therefore + ** also copied into the backup databases) as part of this transaction, + ** the backups must be restarted. + */ + sqlite3BackupRestart(pPager->pBackup); + + return rc; +} + +/* +** This function is called to rollback a transaction on a WAL database. +*/ +static int pagerRollbackWal(Pager *pPager){ + int rc; /* Return Code */ + PgHdr *pList; /* List of dirty pages to revert */ + + /* For all pages in the cache that are currently dirty or have already + ** been written (but not committed) to the log file, do one of the + ** following: + ** + ** + Discard the cached page (if refcount==0), or + ** + Reload page content from the database (if refcount>0). + */ + pPager->dbSize = pPager->dbOrigSize; + rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager); + pList = sqlite3PcacheDirtyList(pPager->pPCache); + while( pList && rc==SQLITE_OK ){ + PgHdr *pNext = pList->pDirty; + rc = pagerUndoCallback((void *)pPager, pList->pgno); + pList = pNext; + } + + return rc; +} + +/* +** This function is a wrapper around sqlite3WalFrames(). As well as logging +** the contents of the list of pages headed by pList (connected by pDirty), +** this function notifies any active backup processes that the pages have +** changed. +** +** The list of pages passed into this routine is always sorted by page number. +** Hence, if page 1 appears anywhere on the list, it will be the first page. +*/ +static int pagerWalFrames( + Pager *pPager, /* Pager object */ + PgHdr *pList, /* List of frames to log */ + Pgno nTruncate, /* Database size after this commit */ + int isCommit /* True if this is a commit */ +){ + int rc; /* Return code */ + int nList; /* Number of pages in pList */ + PgHdr *p; /* For looping over pages */ + + assert( pPager->pWal ); + assert( pList ); +#ifdef SQLITE_DEBUG + /* Verify that the page list is in accending order */ + for(p=pList; p && p->pDirty; p=p->pDirty){ + assert( p->pgno < p->pDirty->pgno ); + } +#endif + + assert( pList->pDirty==0 || isCommit ); + if( isCommit ){ + /* If a WAL transaction is being committed, there is no point in writing + ** any pages with page numbers greater than nTruncate into the WAL file. + ** They will never be read by any client. So remove them from the pDirty + ** list here. */ + PgHdr **ppNext = &pList; + nList = 0; + for(p=pList; (*ppNext = p)!=0; p=p->pDirty){ + if( p->pgno<=nTruncate ){ + ppNext = &p->pDirty; + nList++; + } + } + assert( pList ); + }else{ + nList = 1; + } + pPager->aStat[PAGER_STAT_WRITE] += nList; + + if( pList->pgno==1 ) pager_write_changecounter(pList); + rc = sqlite3WalFrames(pPager->pWal, + pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags + ); + if( rc==SQLITE_OK && pPager->pBackup ){ + for(p=pList; p; p=p->pDirty){ + sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); + } + } + +#ifdef SQLITE_CHECK_PAGES + pList = sqlite3PcacheDirtyList(pPager->pPCache); + for(p=pList; p; p=p->pDirty){ + pager_set_pagehash(p); + } +#endif + + return rc; +} + +/* +** Begin a read transaction on the WAL. +** +** This routine used to be called "pagerOpenSnapshot()" because it essentially +** makes a snapshot of the database at the current point in time and preserves +** that snapshot for use by the reader in spite of concurrently changes by +** other writers or checkpointers. +*/ +static int pagerBeginReadTransaction(Pager *pPager){ + int rc; /* Return code */ + int changed = 0; /* True if cache must be reset */ + + assert( pagerUseWal(pPager) ); + assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER ); + + /* sqlite3WalEndReadTransaction() was not called for the previous + ** transaction in locking_mode=EXCLUSIVE. So call it now. If we + ** are in locking_mode=NORMAL and EndRead() was previously called, + ** the duplicate call is harmless. + */ + sqlite3WalEndReadTransaction(pPager->pWal); + + rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed); + if( rc!=SQLITE_OK || changed ){ + pager_reset(pPager); + if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0); + } + + return rc; +} +#endif + +/* +** This function is called as part of the transition from PAGER_OPEN +** to PAGER_READER state to determine the size of the database file +** in pages (assuming the page size currently stored in Pager.pageSize). +** +** If no error occurs, SQLITE_OK is returned and the size of the database +** in pages is stored in *pnPage. Otherwise, an error code (perhaps +** SQLITE_IOERR_FSTAT) is returned and *pnPage is left unmodified. +*/ +static int pagerPagecount(Pager *pPager, Pgno *pnPage){ + Pgno nPage; /* Value to return via *pnPage */ + + /* Query the WAL sub-system for the database size. The WalDbsize() + ** function returns zero if the WAL is not open (i.e. Pager.pWal==0), or + ** if the database size is not available. The database size is not + ** available from the WAL sub-system if the log file is empty or + ** contains no valid committed transactions. + */ + assert( pPager->eState==PAGER_OPEN ); + assert( pPager->eLock>=SHARED_LOCK ); + nPage = sqlite3WalDbsize(pPager->pWal); + + /* If the number of pages in the database is not available from the + ** WAL sub-system, determine the page counte based on the size of + ** the database file. If the size of the database file is not an + ** integer multiple of the page-size, round up the result. + */ + if( nPage==0 ){ + i64 n = 0; /* Size of db file in bytes */ + assert( isOpen(pPager->fd) || pPager->tempFile ); + if( isOpen(pPager->fd) ){ + int rc = sqlite3OsFileSize(pPager->fd, &n); + if( rc!=SQLITE_OK ){ + return rc; + } + } + nPage = (Pgno)((n+pPager->pageSize-1) / pPager->pageSize); + } + + /* If the current number of pages in the file is greater than the + ** configured maximum pager number, increase the allowed limit so + ** that the file can be read. + */ + if( nPage>pPager->mxPgno ){ + pPager->mxPgno = (Pgno)nPage; + } + + *pnPage = nPage; + return SQLITE_OK; +} + +#ifndef SQLITE_OMIT_WAL +/* +** Check if the *-wal file that corresponds to the database opened by pPager +** exists if the database is not empy, or verify that the *-wal file does +** not exist (by deleting it) if the database file is empty. +** +** If the database is not empty and the *-wal file exists, open the pager +** in WAL mode. If the database is empty or if no *-wal file exists and +** if no error occurs, make sure Pager.journalMode is not set to +** PAGER_JOURNALMODE_WAL. +** +** Return SQLITE_OK or an error code. +** +** The caller must hold a SHARED lock on the database file to call this +** function. Because an EXCLUSIVE lock on the db file is required to delete +** a WAL on a none-empty database, this ensures there is no race condition +** between the xAccess() below and an xDelete() being executed by some +** other connection. +*/ +static int pagerOpenWalIfPresent(Pager *pPager){ + int rc = SQLITE_OK; + assert( pPager->eState==PAGER_OPEN ); + assert( pPager->eLock>=SHARED_LOCK ); + + if( !pPager->tempFile ){ + int isWal; /* True if WAL file exists */ + Pgno nPage; /* Size of the database file */ + + rc = pagerPagecount(pPager, &nPage); + if( rc ) return rc; + if( nPage==0 ){ + rc = sqlite3OsDelete(pPager->pVfs, pPager->zWal, 0); + if( rc==SQLITE_IOERR_DELETE_NOENT ) rc = SQLITE_OK; + isWal = 0; + }else{ + rc = sqlite3OsAccess( + pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &isWal + ); + } + if( rc==SQLITE_OK ){ + if( isWal ){ + testcase( sqlite3PcachePagecount(pPager->pPCache)==0 ); + rc = sqlite3PagerOpenWal(pPager, 0); + }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){ + pPager->journalMode = PAGER_JOURNALMODE_DELETE; + } + } + } + return rc; +} +#endif + +/* +** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback +** the entire master journal file. The case pSavepoint==NULL occurs when +** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction +** savepoint. +** +** When pSavepoint is not NULL (meaning a non-transaction savepoint is +** being rolled back), then the rollback consists of up to three stages, +** performed in the order specified: +** +** * Pages are played back from the main journal starting at byte +** offset PagerSavepoint.iOffset and continuing to +** PagerSavepoint.iHdrOffset, or to the end of the main journal +** file if PagerSavepoint.iHdrOffset is zero. +** +** * If PagerSavepoint.iHdrOffset is not zero, then pages are played +** back starting from the journal header immediately following +** PagerSavepoint.iHdrOffset to the end of the main journal file. +** +** * Pages are then played back from the sub-journal file, starting +** with the PagerSavepoint.iSubRec and continuing to the end of +** the journal file. +** +** Throughout the rollback process, each time a page is rolled back, the +** corresponding bit is set in a bitvec structure (variable pDone in the +** implementation below). This is used to ensure that a page is only +** rolled back the first time it is encountered in either journal. +** +** If pSavepoint is NULL, then pages are only played back from the main +** journal file. There is no need for a bitvec in this case. +** +** In either case, before playback commences the Pager.dbSize variable +** is reset to the value that it held at the start of the savepoint +** (or transaction). No page with a page-number greater than this value +** is played back. If one is encountered it is simply skipped. +*/ +static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ + i64 szJ; /* Effective size of the main journal */ + i64 iHdrOff; /* End of first segment of main-journal records */ + int rc = SQLITE_OK; /* Return code */ + Bitvec *pDone = 0; /* Bitvec to ensure pages played back only once */ + + assert( pPager->eState!=PAGER_ERROR ); + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + + /* Allocate a bitvec to use to store the set of pages rolled back */ + if( pSavepoint ){ + pDone = sqlite3BitvecCreate(pSavepoint->nOrig); + if( !pDone ){ + return SQLITE_NOMEM; + } + } + + /* Set the database size back to the value it was before the savepoint + ** being reverted was opened. + */ + pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; + pPager->changeCountDone = pPager->tempFile; + + if( !pSavepoint && pagerUseWal(pPager) ){ + return pagerRollbackWal(pPager); + } + + /* Use pPager->journalOff as the effective size of the main rollback + ** journal. The actual file might be larger than this in + ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything + ** past pPager->journalOff is off-limits to us. + */ + szJ = pPager->journalOff; + assert( pagerUseWal(pPager)==0 || szJ==0 ); + + /* Begin by rolling back records from the main journal starting at + ** PagerSavepoint.iOffset and continuing to the next journal header. + ** There might be records in the main journal that have a page number + ** greater than the current database size (pPager->dbSize) but those + ** will be skipped automatically. Pages are added to pDone as they + ** are played back. + */ + if( pSavepoint && !pagerUseWal(pPager) ){ + iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; + pPager->journalOff = pSavepoint->iOffset; + while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){ + rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1); + } + assert( rc!=SQLITE_DONE ); + }else{ + pPager->journalOff = 0; + } + + /* Continue rolling back records out of the main journal starting at + ** the first journal header seen and continuing until the effective end + ** of the main journal file. Continue to skip out-of-range pages and + ** continue adding pages rolled back to pDone. + */ + while( rc==SQLITE_OK && pPager->journalOff<szJ ){ + u32 ii; /* Loop counter */ + u32 nJRec = 0; /* Number of Journal Records */ + u32 dummy; + rc = readJournalHdr(pPager, 0, szJ, &nJRec, &dummy); + assert( rc!=SQLITE_DONE ); + + /* + ** The "pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff" + ** test is related to ticket #2565. See the discussion in the + ** pager_playback() function for additional information. + */ + if( nJRec==0 + && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff + ){ + nJRec = (u32)((szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager)); + } + for(ii=0; rc==SQLITE_OK && ii<nJRec && pPager->journalOff<szJ; ii++){ + rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1); + } + assert( rc!=SQLITE_DONE ); + } + assert( rc!=SQLITE_OK || pPager->journalOff>=szJ ); + + /* Finally, rollback pages from the sub-journal. Page that were + ** previously rolled back out of the main journal (and are hence in pDone) + ** will be skipped. Out-of-range pages are also skipped. + */ + if( pSavepoint ){ + u32 ii; /* Loop counter */ + i64 offset = (i64)pSavepoint->iSubRec*(4+pPager->pageSize); + + if( pagerUseWal(pPager) ){ + rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->aWalData); + } + for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && ii<pPager->nSubRec; ii++){ + assert( offset==(i64)ii*(4+pPager->pageSize) ); + rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1); + } + assert( rc!=SQLITE_DONE ); + } + + sqlite3BitvecDestroy(pDone); + if( rc==SQLITE_OK ){ + pPager->journalOff = szJ; + } + + return rc; +} + +/* +** Change the maximum number of in-memory pages that are allowed. +*/ +SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){ + sqlite3PcacheSetCachesize(pPager->pPCache, mxPage); +} + +/* +** Invoke SQLITE_FCNTL_MMAP_SIZE based on the current value of szMmap. +*/ +static void pagerFixMaplimit(Pager *pPager){ +#if SQLITE_MAX_MMAP_SIZE>0 + sqlite3_file *fd = pPager->fd; + if( isOpen(fd) && fd->pMethods->iVersion>=3 ){ + sqlite3_int64 sz; + sz = pPager->szMmap; + pPager->bUseFetch = (sz>0); + sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_SIZE, &sz); + } +#endif +} + +/* +** Change the maximum size of any memory mapping made of the database file. +*/ +SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 szMmap){ + pPager->szMmap = szMmap; + pagerFixMaplimit(pPager); +} + +/* +** Adjust settings of the pager to those specified in the pgFlags parameter. +** +** The "level" in pgFlags & PAGER_SYNCHRONOUS_MASK sets the robustness +** of the database to damage due to OS crashes or power failures by +** changing the number of syncs()s when writing the journals. +** There are three levels: +** +** OFF sqlite3OsSync() is never called. This is the default +** for temporary and transient files. +** +** NORMAL The journal is synced once before writes begin on the +** database. This is normally adequate protection, but +** it is theoretically possible, though very unlikely, +** that an inopertune power failure could leave the journal +** in a state which would cause damage to the database +** when it is rolled back. +** +** FULL The journal is synced twice before writes begin on the +** database (with some additional information - the nRec field +** of the journal header - being written in between the two +** syncs). If we assume that writing a +** single disk sector is atomic, then this mode provides +** assurance that the journal will not be corrupted to the +** point of causing damage to the database during rollback. +** +** The above is for a rollback-journal mode. For WAL mode, OFF continues +** to mean that no syncs ever occur. NORMAL means that the WAL is synced +** prior to the start of checkpoint and that the database file is synced +** at the conclusion of the checkpoint if the entire content of the WAL +** was written back into the database. But no sync operations occur for +** an ordinary commit in NORMAL mode with WAL. FULL means that the WAL +** file is synced following each commit operation, in addition to the +** syncs associated with NORMAL. +** +** Do not confuse synchronous=FULL with SQLITE_SYNC_FULL. The +** SQLITE_SYNC_FULL macro means to use the MacOSX-style full-fsync +** using fcntl(F_FULLFSYNC). SQLITE_SYNC_NORMAL means to do an +** ordinary fsync() call. There is no difference between SQLITE_SYNC_FULL +** and SQLITE_SYNC_NORMAL on platforms other than MacOSX. But the +** synchronous=FULL versus synchronous=NORMAL setting determines when +** the xSync primitive is called and is relevant to all platforms. +** +** Numeric values associated with these states are OFF==1, NORMAL=2, +** and FULL=3. +*/ +#ifndef SQLITE_OMIT_PAGER_PRAGMAS +SQLITE_PRIVATE void sqlite3PagerSetFlags( + Pager *pPager, /* The pager to set safety level for */ + unsigned pgFlags /* Various flags */ +){ + unsigned level = pgFlags & PAGER_SYNCHRONOUS_MASK; + assert( level>=1 && level<=3 ); + pPager->noSync = (level==1 || pPager->tempFile) ?1:0; + pPager->fullSync = (level==3 && !pPager->tempFile) ?1:0; + if( pPager->noSync ){ + pPager->syncFlags = 0; + pPager->ckptSyncFlags = 0; + }else if( pgFlags & PAGER_FULLFSYNC ){ + pPager->syncFlags = SQLITE_SYNC_FULL; + pPager->ckptSyncFlags = SQLITE_SYNC_FULL; + }else if( pgFlags & PAGER_CKPT_FULLFSYNC ){ + pPager->syncFlags = SQLITE_SYNC_NORMAL; + pPager->ckptSyncFlags = SQLITE_SYNC_FULL; + }else{ + pPager->syncFlags = SQLITE_SYNC_NORMAL; + pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL; + } + pPager->walSyncFlags = pPager->syncFlags; + if( pPager->fullSync ){ + pPager->walSyncFlags |= WAL_SYNC_TRANSACTIONS; + } + if( pgFlags & PAGER_CACHESPILL ){ + pPager->doNotSpill &= ~SPILLFLAG_OFF; + }else{ + pPager->doNotSpill |= SPILLFLAG_OFF; + } +} +#endif + +/* +** The following global variable is incremented whenever the library +** attempts to open a temporary file. This information is used for +** testing and analysis only. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_opentemp_count = 0; +#endif + +/* +** Open a temporary file. +** +** Write the file descriptor into *pFile. Return SQLITE_OK on success +** or some other error code if we fail. The OS will automatically +** delete the temporary file when it is closed. +** +** The flags passed to the VFS layer xOpen() call are those specified +** by parameter vfsFlags ORed with the following: +** +** SQLITE_OPEN_READWRITE +** SQLITE_OPEN_CREATE +** SQLITE_OPEN_EXCLUSIVE +** SQLITE_OPEN_DELETEONCLOSE +*/ +static int pagerOpentemp( + Pager *pPager, /* The pager object */ + sqlite3_file *pFile, /* Write the file descriptor here */ + int vfsFlags /* Flags passed through to the VFS */ +){ + int rc; /* Return code */ + +#ifdef SQLITE_TEST + sqlite3_opentemp_count++; /* Used for testing and analysis only */ +#endif + + vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | + SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE; + rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0); + assert( rc!=SQLITE_OK || isOpen(pFile) ); + return rc; +} + +/* +** Set the busy handler function. +** +** The pager invokes the busy-handler if sqlite3OsLock() returns +** SQLITE_BUSY when trying to upgrade from no-lock to a SHARED lock, +** or when trying to upgrade from a RESERVED lock to an EXCLUSIVE +** lock. It does *not* invoke the busy handler when upgrading from +** SHARED to RESERVED, or when upgrading from SHARED to EXCLUSIVE +** (which occurs during hot-journal rollback). Summary: +** +** Transition | Invokes xBusyHandler +** -------------------------------------------------------- +** NO_LOCK -> SHARED_LOCK | Yes +** SHARED_LOCK -> RESERVED_LOCK | No +** SHARED_LOCK -> EXCLUSIVE_LOCK | No +** RESERVED_LOCK -> EXCLUSIVE_LOCK | Yes +** +** If the busy-handler callback returns non-zero, the lock is +** retried. If it returns zero, then the SQLITE_BUSY error is +** returned to the caller of the pager API function. +*/ +SQLITE_PRIVATE void sqlite3PagerSetBusyhandler( + Pager *pPager, /* Pager object */ + int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ + void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ +){ + pPager->xBusyHandler = xBusyHandler; + pPager->pBusyHandlerArg = pBusyHandlerArg; + + if( isOpen(pPager->fd) ){ + void **ap = (void **)&pPager->xBusyHandler; + assert( ((int(*)(void *))(ap[0]))==xBusyHandler ); + assert( ap[1]==pBusyHandlerArg ); + sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_BUSYHANDLER, (void *)ap); + } +} + +/* +** Change the page size used by the Pager object. The new page size +** is passed in *pPageSize. +** +** If the pager is in the error state when this function is called, it +** is a no-op. The value returned is the error state error code (i.e. +** one of SQLITE_IOERR, an SQLITE_IOERR_xxx sub-code or SQLITE_FULL). +** +** Otherwise, if all of the following are true: +** +** * the new page size (value of *pPageSize) is valid (a power +** of two between 512 and SQLITE_MAX_PAGE_SIZE, inclusive), and +** +** * there are no outstanding page references, and +** +** * the database is either not an in-memory database or it is +** an in-memory database that currently consists of zero pages. +** +** then the pager object page size is set to *pPageSize. +** +** If the page size is changed, then this function uses sqlite3PagerMalloc() +** to obtain a new Pager.pTmpSpace buffer. If this allocation attempt +** fails, SQLITE_NOMEM is returned and the page size remains unchanged. +** In all other cases, SQLITE_OK is returned. +** +** If the page size is not changed, either because one of the enumerated +** conditions above is not true, the pager was in error state when this +** function was called, or because the memory allocation attempt failed, +** then *pPageSize is set to the old, retained page size before returning. +*/ +SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nReserve){ + int rc = SQLITE_OK; + + /* It is not possible to do a full assert_pager_state() here, as this + ** function may be called from within PagerOpen(), before the state + ** of the Pager object is internally consistent. + ** + ** At one point this function returned an error if the pager was in + ** PAGER_ERROR state. But since PAGER_ERROR state guarantees that + ** there is at least one outstanding page reference, this function + ** is a no-op for that case anyhow. + */ + + u32 pageSize = *pPageSize; + assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) ); + if( (pPager->memDb==0 || pPager->dbSize==0) + && sqlite3PcacheRefCount(pPager->pPCache)==0 + && pageSize && pageSize!=(u32)pPager->pageSize + ){ + char *pNew = NULL; /* New temp space */ + i64 nByte = 0; + + if( pPager->eState>PAGER_OPEN && isOpen(pPager->fd) ){ + rc = sqlite3OsFileSize(pPager->fd, &nByte); + } + if( rc==SQLITE_OK ){ + pNew = (char *)sqlite3PageMalloc(pageSize); + if( !pNew ) rc = SQLITE_NOMEM; + } + + if( rc==SQLITE_OK ){ + pager_reset(pPager); + rc = sqlite3PcacheSetPageSize(pPager->pPCache, pageSize); + } + if( rc==SQLITE_OK ){ + sqlite3PageFree(pPager->pTmpSpace); + pPager->pTmpSpace = pNew; + pPager->dbSize = (Pgno)((nByte+pageSize-1)/pageSize); + pPager->pageSize = pageSize; + }else{ + sqlite3PageFree(pNew); + } + } + + *pPageSize = pPager->pageSize; + if( rc==SQLITE_OK ){ + if( nReserve<0 ) nReserve = pPager->nReserve; + assert( nReserve>=0 && nReserve<1000 ); + pPager->nReserve = (i16)nReserve; + pagerReportSize(pPager); + pagerFixMaplimit(pPager); + } + return rc; +} + +/* +** Return a pointer to the "temporary page" buffer held internally +** by the pager. This is a buffer that is big enough to hold the +** entire content of a database page. This buffer is used internally +** during rollback and will be overwritten whenever a rollback +** occurs. But other modules are free to use it too, as long as +** no rollbacks are happening. +*/ +SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager *pPager){ + return pPager->pTmpSpace; +} + +/* +** Attempt to set the maximum database page count if mxPage is positive. +** Make no changes if mxPage is zero or negative. And never reduce the +** maximum page count below the current size of the database. +** +** Regardless of mxPage, return the current maximum page count. +*/ +SQLITE_PRIVATE int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){ + if( mxPage>0 ){ + pPager->mxPgno = mxPage; + } + assert( pPager->eState!=PAGER_OPEN ); /* Called only by OP_MaxPgcnt */ + assert( pPager->mxPgno>=pPager->dbSize ); /* OP_MaxPgcnt enforces this */ + return pPager->mxPgno; +} + +/* +** The following set of routines are used to disable the simulated +** I/O error mechanism. These routines are used to avoid simulated +** errors in places where we do not care about errors. +** +** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops +** and generate no code. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE int sqlite3_io_error_pending; +SQLITE_PRIVATE int sqlite3_io_error_hit; +static int saved_cnt; +void disable_simulated_io_errors(void){ + saved_cnt = sqlite3_io_error_pending; + sqlite3_io_error_pending = -1; +} +void enable_simulated_io_errors(void){ + sqlite3_io_error_pending = saved_cnt; +} +#else +# define disable_simulated_io_errors() +# define enable_simulated_io_errors() +#endif + +/* +** Read the first N bytes from the beginning of the file into memory +** that pDest points to. +** +** If the pager was opened on a transient file (zFilename==""), or +** opened on a file less than N bytes in size, the output buffer is +** zeroed and SQLITE_OK returned. The rationale for this is that this +** function is used to read database headers, and a new transient or +** zero sized database has a header than consists entirely of zeroes. +** +** If any IO error apart from SQLITE_IOERR_SHORT_READ is encountered, +** the error code is returned to the caller and the contents of the +** output buffer undefined. +*/ +SQLITE_PRIVATE int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){ + int rc = SQLITE_OK; + memset(pDest, 0, N); + assert( isOpen(pPager->fd) || pPager->tempFile ); + + /* This routine is only called by btree immediately after creating + ** the Pager object. There has not been an opportunity to transition + ** to WAL mode yet. + */ + assert( !pagerUseWal(pPager) ); + + if( isOpen(pPager->fd) ){ + IOTRACE(("DBHDR %p 0 %d\n", pPager, N)) + rc = sqlite3OsRead(pPager->fd, pDest, N, 0); + if( rc==SQLITE_IOERR_SHORT_READ ){ + rc = SQLITE_OK; + } + } + return rc; +} + +/* +** This function may only be called when a read-transaction is open on +** the pager. It returns the total number of pages in the database. +** +** However, if the file is between 1 and <page-size> bytes in size, then +** this is considered a 1 page file. +*/ +SQLITE_PRIVATE void sqlite3PagerPagecount(Pager *pPager, int *pnPage){ + assert( pPager->eState>=PAGER_READER ); + assert( pPager->eState!=PAGER_WRITER_FINISHED ); + *pnPage = (int)pPager->dbSize; +} + + +/* +** Try to obtain a lock of type locktype on the database file. If +** a similar or greater lock is already held, this function is a no-op +** (returning SQLITE_OK immediately). +** +** Otherwise, attempt to obtain the lock using sqlite3OsLock(). Invoke +** the busy callback if the lock is currently not available. Repeat +** until the busy callback returns false or until the attempt to +** obtain the lock succeeds. +** +** Return SQLITE_OK on success and an error code if we cannot obtain +** the lock. If the lock is obtained successfully, set the Pager.state +** variable to locktype before returning. +*/ +static int pager_wait_on_lock(Pager *pPager, int locktype){ + int rc; /* Return code */ + + /* Check that this is either a no-op (because the requested lock is + ** already held), or one of the transitions that the busy-handler + ** may be invoked during, according to the comment above + ** sqlite3PagerSetBusyhandler(). + */ + assert( (pPager->eLock>=locktype) + || (pPager->eLock==NO_LOCK && locktype==SHARED_LOCK) + || (pPager->eLock==RESERVED_LOCK && locktype==EXCLUSIVE_LOCK) + ); + + do { + rc = pagerLockDb(pPager, locktype); + }while( rc==SQLITE_BUSY && pPager->xBusyHandler(pPager->pBusyHandlerArg) ); + return rc; +} + +/* +** Function assertTruncateConstraint(pPager) checks that one of the +** following is true for all dirty pages currently in the page-cache: +** +** a) The page number is less than or equal to the size of the +** current database image, in pages, OR +** +** b) if the page content were written at this time, it would not +** be necessary to write the current content out to the sub-journal +** (as determined by function subjRequiresPage()). +** +** If the condition asserted by this function were not true, and the +** dirty page were to be discarded from the cache via the pagerStress() +** routine, pagerStress() would not write the current page content to +** the database file. If a savepoint transaction were rolled back after +** this happened, the correct behavior would be to restore the current +** content of the page. However, since this content is not present in either +** the database file or the portion of the rollback journal and +** sub-journal rolled back the content could not be restored and the +** database image would become corrupt. It is therefore fortunate that +** this circumstance cannot arise. +*/ +#if defined(SQLITE_DEBUG) +static void assertTruncateConstraintCb(PgHdr *pPg){ + assert( pPg->flags&PGHDR_DIRTY ); + assert( !subjRequiresPage(pPg) || pPg->pgno<=pPg->pPager->dbSize ); +} +static void assertTruncateConstraint(Pager *pPager){ + sqlite3PcacheIterateDirty(pPager->pPCache, assertTruncateConstraintCb); +} +#else +# define assertTruncateConstraint(pPager) +#endif + +/* +** Truncate the in-memory database file image to nPage pages. This +** function does not actually modify the database file on disk. It +** just sets the internal state of the pager object so that the +** truncation will be done when the current transaction is committed. +** +** This function is only called right before committing a transaction. +** Once this function has been called, the transaction must either be +** rolled back or committed. It is not safe to call this function and +** then continue writing to the database. +*/ +SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){ + assert( pPager->dbSize>=nPage ); + assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); + pPager->dbSize = nPage; + + /* At one point the code here called assertTruncateConstraint() to + ** ensure that all pages being truncated away by this operation are, + ** if one or more savepoints are open, present in the savepoint + ** journal so that they can be restored if the savepoint is rolled + ** back. This is no longer necessary as this function is now only + ** called right before committing a transaction. So although the + ** Pager object may still have open savepoints (Pager.nSavepoint!=0), + ** they cannot be rolled back. So the assertTruncateConstraint() call + ** is no longer correct. */ +} + + +/* +** This function is called before attempting a hot-journal rollback. It +** syncs the journal file to disk, then sets pPager->journalHdr to the +** size of the journal file so that the pager_playback() routine knows +** that the entire journal file has been synced. +** +** Syncing a hot-journal to disk before attempting to roll it back ensures +** that if a power-failure occurs during the rollback, the process that +** attempts rollback following system recovery sees the same journal +** content as this process. +** +** If everything goes as planned, SQLITE_OK is returned. Otherwise, +** an SQLite error code. +*/ +static int pagerSyncHotJournal(Pager *pPager){ + int rc = SQLITE_OK; + if( !pPager->noSync ){ + rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_NORMAL); + } + if( rc==SQLITE_OK ){ + rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr); + } + return rc; +} + +/* +** Obtain a reference to a memory mapped page object for page number pgno. +** The new object will use the pointer pData, obtained from xFetch(). +** If successful, set *ppPage to point to the new page reference +** and return SQLITE_OK. Otherwise, return an SQLite error code and set +** *ppPage to zero. +** +** Page references obtained by calling this function should be released +** by calling pagerReleaseMapPage(). +*/ +static int pagerAcquireMapPage( + Pager *pPager, /* Pager object */ + Pgno pgno, /* Page number */ + void *pData, /* xFetch()'d data for this page */ + PgHdr **ppPage /* OUT: Acquired page object */ +){ + PgHdr *p; /* Memory mapped page to return */ + + if( pPager->pMmapFreelist ){ + *ppPage = p = pPager->pMmapFreelist; + pPager->pMmapFreelist = p->pDirty; + p->pDirty = 0; + memset(p->pExtra, 0, pPager->nExtra); + }else{ + *ppPage = p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra); + if( p==0 ){ + sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pData); + return SQLITE_NOMEM; + } + p->pExtra = (void *)&p[1]; + p->flags = PGHDR_MMAP; + p->nRef = 1; + p->pPager = pPager; + } + + assert( p->pExtra==(void *)&p[1] ); + assert( p->pPage==0 ); + assert( p->flags==PGHDR_MMAP ); + assert( p->pPager==pPager ); + assert( p->nRef==1 ); + + p->pgno = pgno; + p->pData = pData; + pPager->nMmapOut++; + + return SQLITE_OK; +} + +/* +** Release a reference to page pPg. pPg must have been returned by an +** earlier call to pagerAcquireMapPage(). +*/ +static void pagerReleaseMapPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + pPager->nMmapOut--; + pPg->pDirty = pPager->pMmapFreelist; + pPager->pMmapFreelist = pPg; + + assert( pPager->fd->pMethods->iVersion>=3 ); + sqlite3OsUnfetch(pPager->fd, (i64)(pPg->pgno-1)*pPager->pageSize, pPg->pData); +} + +/* +** Free all PgHdr objects stored in the Pager.pMmapFreelist list. +*/ +static void pagerFreeMapHdrs(Pager *pPager){ + PgHdr *p; + PgHdr *pNext; + for(p=pPager->pMmapFreelist; p; p=pNext){ + pNext = p->pDirty; + sqlite3_free(p); + } +} + + +/* +** Shutdown the page cache. Free all memory and close all files. +** +** If a transaction was in progress when this routine is called, that +** transaction is rolled back. All outstanding pages are invalidated +** and their memory is freed. Any attempt to use a page associated +** with this page cache after this function returns will likely +** result in a coredump. +** +** This function always succeeds. If a transaction is active an attempt +** is made to roll it back. If an error occurs during the rollback +** a hot journal may be left in the filesystem but no error is returned +** to the caller. +*/ +SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager){ + u8 *pTmp = (u8 *)pPager->pTmpSpace; + + assert( assert_pager_state(pPager) ); + disable_simulated_io_errors(); + pagerFreeMapHdrs(pPager); + /* pPager->errCode = 0; */ + pPager->exclusiveMode = 0; +#ifndef SQLITE_OMIT_WAL + sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, pTmp); + pPager->pWal = 0; +#endif + pager_reset(pPager); + + /* If it is open, sync the journal file before calling UnlockAndRollback. + ** If this is not done, then an unsynced portion of the open journal + ** file may be played back into the database. If a power failure occurs + ** while this is happening, the database could become corrupt. + ** + ** If an error occurs while trying to sync the journal, shift the pager + ** into the ERROR state. This causes UnlockAndRollback to unlock the + ** database and close the journal file without attempting to roll it + ** back or finalize it. The next database user will have to do hot-journal + ** rollback before accessing the database file. + */ + if( isOpen(pPager->jfd) ){ + pager_error(pPager, pagerSyncHotJournal(pPager)); + } + pagerUnlockAndRollback(pPager); + + enable_simulated_io_errors(); + PAGERTRACE(("CLOSE %d\n", PAGERID(pPager))); + IOTRACE(("CLOSE %p\n", pPager)) + sqlite3OsClose(pPager->jfd); + sqlite3OsClose(pPager->fd); + sqlite3PageFree(pTmp); + sqlite3PcacheClose(pPager->pPCache); + +#ifdef SQLITE_HAS_CODEC + if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec); +#endif + + assert( !pPager->aSavepoint && !pPager->pInJournal ); + assert( !isOpen(pPager->jfd) && !isOpen(pPager->sjfd) ); + + sqlite3_free(pPager); + return SQLITE_OK; +} + +#if !defined(NDEBUG) || defined(SQLITE_TEST) +/* +** Return the page number for page pPg. +*/ +SQLITE_PRIVATE Pgno sqlite3PagerPagenumber(DbPage *pPg){ + return pPg->pgno; +} +#endif + +/* +** Increment the reference count for page pPg. +*/ +SQLITE_PRIVATE void sqlite3PagerRef(DbPage *pPg){ + sqlite3PcacheRef(pPg); +} + +/* +** Sync the journal. In other words, make sure all the pages that have +** been written to the journal have actually reached the surface of the +** disk and can be restored in the event of a hot-journal rollback. +** +** If the Pager.noSync flag is set, then this function is a no-op. +** Otherwise, the actions required depend on the journal-mode and the +** device characteristics of the file-system, as follows: +** +** * If the journal file is an in-memory journal file, no action need +** be taken. +** +** * Otherwise, if the device does not support the SAFE_APPEND property, +** then the nRec field of the most recently written journal header +** is updated to contain the number of journal records that have +** been written following it. If the pager is operating in full-sync +** mode, then the journal file is synced before this field is updated. +** +** * If the device does not support the SEQUENTIAL property, then +** journal file is synced. +** +** Or, in pseudo-code: +** +** if( NOT <in-memory journal> ){ +** if( NOT SAFE_APPEND ){ +** if( <full-sync mode> ) xSync(<journal file>); +** <update nRec field> +** } +** if( NOT SEQUENTIAL ) xSync(<journal file>); +** } +** +** If successful, this routine clears the PGHDR_NEED_SYNC flag of every +** page currently held in memory before returning SQLITE_OK. If an IO +** error is encountered, then the IO error code is returned to the caller. +*/ +static int syncJournal(Pager *pPager, int newHdr){ + int rc; /* Return code */ + + assert( pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + ); + assert( assert_pager_state(pPager) ); + assert( !pagerUseWal(pPager) ); + + rc = sqlite3PagerExclusiveLock(pPager); + if( rc!=SQLITE_OK ) return rc; + + if( !pPager->noSync ){ + assert( !pPager->tempFile ); + if( isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){ + const int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); + assert( isOpen(pPager->jfd) ); + + if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ + /* This block deals with an obscure problem. If the last connection + ** that wrote to this database was operating in persistent-journal + ** mode, then the journal file may at this point actually be larger + ** than Pager.journalOff bytes. If the next thing in the journal + ** file happens to be a journal-header (written as part of the + ** previous connection's transaction), and a crash or power-failure + ** occurs after nRec is updated but before this connection writes + ** anything else to the journal file (or commits/rolls back its + ** transaction), then SQLite may become confused when doing the + ** hot-journal rollback following recovery. It may roll back all + ** of this connections data, then proceed to rolling back the old, + ** out-of-date data that follows it. Database corruption. + ** + ** To work around this, if the journal file does appear to contain + ** a valid header following Pager.journalOff, then write a 0x00 + ** byte to the start of it to prevent it from being recognized. + ** + ** Variable iNextHdrOffset is set to the offset at which this + ** problematic header will occur, if it exists. aMagic is used + ** as a temporary buffer to inspect the first couple of bytes of + ** the potential journal header. + */ + i64 iNextHdrOffset; + u8 aMagic[8]; + u8 zHeader[sizeof(aJournalMagic)+4]; + + memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); + put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec); + + iNextHdrOffset = journalHdrOffset(pPager); + rc = sqlite3OsRead(pPager->jfd, aMagic, 8, iNextHdrOffset); + if( rc==SQLITE_OK && 0==memcmp(aMagic, aJournalMagic, 8) ){ + static const u8 zerobyte = 0; + rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, iNextHdrOffset); + } + if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ + return rc; + } + + /* Write the nRec value into the journal file header. If in + ** full-synchronous mode, sync the journal first. This ensures that + ** all data has really hit the disk before nRec is updated to mark + ** it as a candidate for rollback. + ** + ** This is not required if the persistent media supports the + ** SAFE_APPEND property. Because in this case it is not possible + ** for garbage data to be appended to the file, the nRec field + ** is populated with 0xFFFFFFFF when the journal header is written + ** and never needs to be updated. + */ + if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ + PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); + IOTRACE(("JSYNC %p\n", pPager)) + rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags); + if( rc!=SQLITE_OK ) return rc; + } + IOTRACE(("JHDR %p %lld\n", pPager, pPager->journalHdr)); + rc = sqlite3OsWrite( + pPager->jfd, zHeader, sizeof(zHeader), pPager->journalHdr + ); + if( rc!=SQLITE_OK ) return rc; + } + if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ + PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); + IOTRACE(("JSYNC %p\n", pPager)) + rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags| + (pPager->syncFlags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0) + ); + if( rc!=SQLITE_OK ) return rc; + } + + pPager->journalHdr = pPager->journalOff; + if( newHdr && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ + pPager->nRec = 0; + rc = writeJournalHdr(pPager); + if( rc!=SQLITE_OK ) return rc; + } + }else{ + pPager->journalHdr = pPager->journalOff; + } + } + + /* Unless the pager is in noSync mode, the journal file was just + ** successfully synced. Either way, clear the PGHDR_NEED_SYNC flag on + ** all pages. + */ + sqlite3PcacheClearSyncFlags(pPager->pPCache); + pPager->eState = PAGER_WRITER_DBMOD; + assert( assert_pager_state(pPager) ); + return SQLITE_OK; +} + +/* +** The argument is the first in a linked list of dirty pages connected +** by the PgHdr.pDirty pointer. This function writes each one of the +** in-memory pages in the list to the database file. The argument may +** be NULL, representing an empty list. In this case this function is +** a no-op. +** +** The pager must hold at least a RESERVED lock when this function +** is called. Before writing anything to the database file, this lock +** is upgraded to an EXCLUSIVE lock. If the lock cannot be obtained, +** SQLITE_BUSY is returned and no data is written to the database file. +** +** If the pager is a temp-file pager and the actual file-system file +** is not yet open, it is created and opened before any data is +** written out. +** +** Once the lock has been upgraded and, if necessary, the file opened, +** the pages are written out to the database file in list order. Writing +** a page is skipped if it meets either of the following criteria: +** +** * The page number is greater than Pager.dbSize, or +** * The PGHDR_DONT_WRITE flag is set on the page. +** +** If writing out a page causes the database file to grow, Pager.dbFileSize +** is updated accordingly. If page 1 is written out, then the value cached +** in Pager.dbFileVers[] is updated to match the new value stored in +** the database file. +** +** If everything is successful, SQLITE_OK is returned. If an IO error +** occurs, an IO error code is returned. Or, if the EXCLUSIVE lock cannot +** be obtained, SQLITE_BUSY is returned. +*/ +static int pager_write_pagelist(Pager *pPager, PgHdr *pList){ + int rc = SQLITE_OK; /* Return code */ + + /* This function is only called for rollback pagers in WRITER_DBMOD state. */ + assert( !pagerUseWal(pPager) ); + assert( pPager->eState==PAGER_WRITER_DBMOD ); + assert( pPager->eLock==EXCLUSIVE_LOCK ); + + /* If the file is a temp-file has not yet been opened, open it now. It + ** is not possible for rc to be other than SQLITE_OK if this branch + ** is taken, as pager_wait_on_lock() is a no-op for temp-files. + */ + if( !isOpen(pPager->fd) ){ + assert( pPager->tempFile && rc==SQLITE_OK ); + rc = pagerOpentemp(pPager, pPager->fd, pPager->vfsFlags); + } + + /* Before the first write, give the VFS a hint of what the final + ** file size will be. + */ + assert( rc!=SQLITE_OK || isOpen(pPager->fd) ); + if( rc==SQLITE_OK + && pPager->dbHintSize<pPager->dbSize + && (pList->pDirty || pList->pgno>pPager->dbHintSize) + ){ + sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize; + sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile); + pPager->dbHintSize = pPager->dbSize; + } + + while( rc==SQLITE_OK && pList ){ + Pgno pgno = pList->pgno; + + /* If there are dirty pages in the page cache with page numbers greater + ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to + ** make the file smaller (presumably by auto-vacuum code). Do not write + ** any such pages to the file. + ** + ** Also, do not write out any page that has the PGHDR_DONT_WRITE flag + ** set (set by sqlite3PagerDontWrite()). + */ + if( pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){ + i64 offset = (pgno-1)*(i64)pPager->pageSize; /* Offset to write */ + char *pData; /* Data to write */ + + assert( (pList->flags&PGHDR_NEED_SYNC)==0 ); + if( pList->pgno==1 ) pager_write_changecounter(pList); + + /* Encode the database */ + CODEC2(pPager, pList->pData, pgno, 6, return SQLITE_NOMEM, pData); + + /* Write out the page data. */ + rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset); + + /* If page 1 was just written, update Pager.dbFileVers to match + ** the value now stored in the database file. If writing this + ** page caused the database file to grow, update dbFileSize. + */ + if( pgno==1 ){ + memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers)); + } + if( pgno>pPager->dbFileSize ){ + pPager->dbFileSize = pgno; + } + pPager->aStat[PAGER_STAT_WRITE]++; + + /* Update any backup objects copying the contents of this pager. */ + sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)pList->pData); + + PAGERTRACE(("STORE %d page %d hash(%08x)\n", + PAGERID(pPager), pgno, pager_pagehash(pList))); + IOTRACE(("PGOUT %p %d\n", pPager, pgno)); + PAGER_INCR(sqlite3_pager_writedb_count); + }else{ + PAGERTRACE(("NOSTORE %d page %d\n", PAGERID(pPager), pgno)); + } + pager_set_pagehash(pList); + pList = pList->pDirty; + } + + return rc; +} + +/* +** Ensure that the sub-journal file is open. If it is already open, this +** function is a no-op. +** +** SQLITE_OK is returned if everything goes according to plan. An +** SQLITE_IOERR_XXX error code is returned if a call to sqlite3OsOpen() +** fails. +*/ +static int openSubJournal(Pager *pPager){ + int rc = SQLITE_OK; + if( !isOpen(pPager->sjfd) ){ + if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){ + sqlite3MemJournalOpen(pPager->sjfd); + }else{ + rc = pagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL); + } + } + return rc; +} + +/* +** Append a record of the current state of page pPg to the sub-journal. +** +** If successful, set the bit corresponding to pPg->pgno in the bitvecs +** for all open savepoints before returning. +** +** This function returns SQLITE_OK if everything is successful, an IO +** error code if the attempt to write to the sub-journal fails, or +** SQLITE_NOMEM if a malloc fails while setting a bit in a savepoint +** bitvec. +*/ +static int subjournalPage(PgHdr *pPg){ + int rc = SQLITE_OK; + Pager *pPager = pPg->pPager; + if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ + + /* Open the sub-journal, if it has not already been opened */ + assert( pPager->useJournal ); + assert( isOpen(pPager->jfd) || pagerUseWal(pPager) ); + assert( isOpen(pPager->sjfd) || pPager->nSubRec==0 ); + assert( pagerUseWal(pPager) + || pageInJournal(pPager, pPg) + || pPg->pgno>pPager->dbOrigSize + ); + rc = openSubJournal(pPager); + + /* If the sub-journal was opened successfully (or was already open), + ** write the journal record into the file. */ + if( rc==SQLITE_OK ){ + void *pData = pPg->pData; + i64 offset = (i64)pPager->nSubRec*(4+pPager->pageSize); + char *pData2; + + CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); + PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); + rc = write32bits(pPager->sjfd, offset, pPg->pgno); + if( rc==SQLITE_OK ){ + rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4); + } + } + } + if( rc==SQLITE_OK ){ + pPager->nSubRec++; + assert( pPager->nSavepoint>0 ); + rc = addToSavepointBitvecs(pPager, pPg->pgno); + } + return rc; +} +static int subjournalPageIfRequired(PgHdr *pPg){ + if( subjRequiresPage(pPg) ){ + return subjournalPage(pPg); + }else{ + return SQLITE_OK; + } +} + +/* +** This function is called by the pcache layer when it has reached some +** soft memory limit. The first argument is a pointer to a Pager object +** (cast as a void*). The pager is always 'purgeable' (not an in-memory +** database). The second argument is a reference to a page that is +** currently dirty but has no outstanding references. The page +** is always associated with the Pager object passed as the first +** argument. +** +** The job of this function is to make pPg clean by writing its contents +** out to the database file, if possible. This may involve syncing the +** journal file. +** +** If successful, sqlite3PcacheMakeClean() is called on the page and +** SQLITE_OK returned. If an IO error occurs while trying to make the +** page clean, the IO error code is returned. If the page cannot be +** made clean for some other reason, but no error occurs, then SQLITE_OK +** is returned by sqlite3PcacheMakeClean() is not called. +*/ +static int pagerStress(void *p, PgHdr *pPg){ + Pager *pPager = (Pager *)p; + int rc = SQLITE_OK; + + assert( pPg->pPager==pPager ); + assert( pPg->flags&PGHDR_DIRTY ); + + /* The doNotSpill NOSYNC bit is set during times when doing a sync of + ** journal (and adding a new header) is not allowed. This occurs + ** during calls to sqlite3PagerWrite() while trying to journal multiple + ** pages belonging to the same sector. + ** + ** The doNotSpill ROLLBACK and OFF bits inhibits all cache spilling + ** regardless of whether or not a sync is required. This is set during + ** a rollback or by user request, respectively. + ** + ** Spilling is also prohibited when in an error state since that could + ** lead to database corruption. In the current implementation it + ** is impossible for sqlite3PcacheFetch() to be called with createFlag==3 + ** while in the error state, hence it is impossible for this routine to + ** be called in the error state. Nevertheless, we include a NEVER() + ** test for the error state as a safeguard against future changes. + */ + if( NEVER(pPager->errCode) ) return SQLITE_OK; + testcase( pPager->doNotSpill & SPILLFLAG_ROLLBACK ); + testcase( pPager->doNotSpill & SPILLFLAG_OFF ); + testcase( pPager->doNotSpill & SPILLFLAG_NOSYNC ); + if( pPager->doNotSpill + && ((pPager->doNotSpill & (SPILLFLAG_ROLLBACK|SPILLFLAG_OFF))!=0 + || (pPg->flags & PGHDR_NEED_SYNC)!=0) + ){ + return SQLITE_OK; + } + + pPg->pDirty = 0; + if( pagerUseWal(pPager) ){ + /* Write a single frame for this page to the log. */ + rc = subjournalPageIfRequired(pPg); + if( rc==SQLITE_OK ){ + rc = pagerWalFrames(pPager, pPg, 0, 0); + } + }else{ + + /* Sync the journal file if required. */ + if( pPg->flags&PGHDR_NEED_SYNC + || pPager->eState==PAGER_WRITER_CACHEMOD + ){ + rc = syncJournal(pPager, 1); + } + + /* Write the contents of the page out to the database file. */ + if( rc==SQLITE_OK ){ + assert( (pPg->flags&PGHDR_NEED_SYNC)==0 ); + rc = pager_write_pagelist(pPager, pPg); + } + } + + /* Mark the page as clean. */ + if( rc==SQLITE_OK ){ + PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno)); + sqlite3PcacheMakeClean(pPg); + } + + return pager_error(pPager, rc); +} + + +/* +** Allocate and initialize a new Pager object and put a pointer to it +** in *ppPager. The pager should eventually be freed by passing it +** to sqlite3PagerClose(). +** +** The zFilename argument is the path to the database file to open. +** If zFilename is NULL then a randomly-named temporary file is created +** and used as the file to be cached. Temporary files are be deleted +** automatically when they are closed. If zFilename is ":memory:" then +** all information is held in cache. It is never written to disk. +** This can be used to implement an in-memory database. +** +** The nExtra parameter specifies the number of bytes of space allocated +** along with each page reference. This space is available to the user +** via the sqlite3PagerGetExtra() API. +** +** The flags argument is used to specify properties that affect the +** operation of the pager. It should be passed some bitwise combination +** of the PAGER_* flags. +** +** The vfsFlags parameter is a bitmask to pass to the flags parameter +** of the xOpen() method of the supplied VFS when opening files. +** +** If the pager object is allocated and the specified file opened +** successfully, SQLITE_OK is returned and *ppPager set to point to +** the new pager object. If an error occurs, *ppPager is set to NULL +** and error code returned. This function may return SQLITE_NOMEM +** (sqlite3Malloc() is used to allocate memory), SQLITE_CANTOPEN or +** various SQLITE_IO_XXX errors. +*/ +SQLITE_PRIVATE int sqlite3PagerOpen( + sqlite3_vfs *pVfs, /* The virtual file system to use */ + Pager **ppPager, /* OUT: Return the Pager structure here */ + const char *zFilename, /* Name of the database file to open */ + int nExtra, /* Extra bytes append to each in-memory page */ + int flags, /* flags controlling this file */ + int vfsFlags, /* flags passed through to sqlite3_vfs.xOpen() */ + void (*xReinit)(DbPage*) /* Function to reinitialize pages */ +){ + u8 *pPtr; + Pager *pPager = 0; /* Pager object to allocate and return */ + int rc = SQLITE_OK; /* Return code */ + int tempFile = 0; /* True for temp files (incl. in-memory files) */ + int memDb = 0; /* True if this is an in-memory file */ + int readOnly = 0; /* True if this is a read-only file */ + int journalFileSize; /* Bytes to allocate for each journal fd */ + char *zPathname = 0; /* Full path to database file */ + int nPathname = 0; /* Number of bytes in zPathname */ + int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; /* False to omit journal */ + int pcacheSize = sqlite3PcacheSize(); /* Bytes to allocate for PCache */ + u32 szPageDflt = SQLITE_DEFAULT_PAGE_SIZE; /* Default page size */ + const char *zUri = 0; /* URI args to copy */ + int nUri = 0; /* Number of bytes of URI args at *zUri */ + + /* Figure out how much space is required for each journal file-handle + ** (there are two of them, the main journal and the sub-journal). This + ** is the maximum space required for an in-memory journal file handle + ** and a regular journal file-handle. Note that a "regular journal-handle" + ** may be a wrapper capable of caching the first portion of the journal + ** file in memory to implement the atomic-write optimization (see + ** source file journal.c). + */ + if( sqlite3JournalSize(pVfs)>sqlite3MemJournalSize() ){ + journalFileSize = ROUND8(sqlite3JournalSize(pVfs)); + }else{ + journalFileSize = ROUND8(sqlite3MemJournalSize()); + } + + /* Set the output variable to NULL in case an error occurs. */ + *ppPager = 0; + + /* Compute and store the full pathname in an allocated buffer pointed + ** to by zPathname, length nPathname. Or, if this is a temporary file, + ** leave both nPathname and zPathname set to 0. + */ + if( zFilename && zFilename[0] ){ + const char *z; + nPathname = pVfs->mxPathname+1; + zPathname = sqlite3DbMallocRaw(0, nPathname*2); + if( zPathname==0 ){ + return SQLITE_NOMEM; + } + zPathname[0] = 0; /* Make sure initialized even if FullPathname() fails */ + rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname); + nPathname = sqlite3Strlen30(zPathname); + z = zUri = &zFilename[sqlite3Strlen30(zFilename)+1]; + while( *z ){ + z += sqlite3Strlen30(z)+1; + z += sqlite3Strlen30(z)+1; + } + nUri = (int)(&z[1] - zUri); + assert( nUri>=0 ); + if( rc==SQLITE_OK && nPathname+8>pVfs->mxPathname ){ + /* This branch is taken when the journal path required by + ** the database being opened will be more than pVfs->mxPathname + ** bytes in length. This means the database cannot be opened, + ** as it will not be possible to open the journal file or even + ** check for a hot-journal before reading. + */ + rc = SQLITE_CANTOPEN_BKPT; + } + if( rc!=SQLITE_OK ){ + sqlite3DbFree(0, zPathname); + return rc; + } + } + + /* Allocate memory for the Pager structure, PCache object, the + ** three file descriptors, the database file name and the journal + ** file name. The layout in memory is as follows: + ** + ** Pager object (sizeof(Pager) bytes) + ** PCache object (sqlite3PcacheSize() bytes) + ** Database file handle (pVfs->szOsFile bytes) + ** Sub-journal file handle (journalFileSize bytes) + ** Main journal file handle (journalFileSize bytes) + ** Database file name (nPathname+1 bytes) + ** Journal file name (nPathname+8+1 bytes) + */ + pPtr = (u8 *)sqlite3MallocZero( + ROUND8(sizeof(*pPager)) + /* Pager structure */ + ROUND8(pcacheSize) + /* PCache object */ + ROUND8(pVfs->szOsFile) + /* The main db file */ + journalFileSize * 2 + /* The two journal files */ + nPathname + 1 + nUri + /* zFilename */ + nPathname + 8 + 2 /* zJournal */ +#ifndef SQLITE_OMIT_WAL + + nPathname + 4 + 2 /* zWal */ +#endif + ); + assert( EIGHT_BYTE_ALIGNMENT(SQLITE_INT_TO_PTR(journalFileSize)) ); + if( !pPtr ){ + sqlite3DbFree(0, zPathname); + return SQLITE_NOMEM; + } + pPager = (Pager*)(pPtr); + pPager->pPCache = (PCache*)(pPtr += ROUND8(sizeof(*pPager))); + pPager->fd = (sqlite3_file*)(pPtr += ROUND8(pcacheSize)); + pPager->sjfd = (sqlite3_file*)(pPtr += ROUND8(pVfs->szOsFile)); + pPager->jfd = (sqlite3_file*)(pPtr += journalFileSize); + pPager->zFilename = (char*)(pPtr += journalFileSize); + assert( EIGHT_BYTE_ALIGNMENT(pPager->jfd) ); + + /* Fill in the Pager.zFilename and Pager.zJournal buffers, if required. */ + if( zPathname ){ + assert( nPathname>0 ); + pPager->zJournal = (char*)(pPtr += nPathname + 1 + nUri); + memcpy(pPager->zFilename, zPathname, nPathname); + if( nUri ) memcpy(&pPager->zFilename[nPathname+1], zUri, nUri); + memcpy(pPager->zJournal, zPathname, nPathname); + memcpy(&pPager->zJournal[nPathname], "-journal\000", 8+2); +#ifndef SQLITE_OMIT_WAL + pPager->zWal = &pPager->zJournal[nPathname+8+1]; + memcpy(pPager->zWal, zPathname, nPathname); + memcpy(&pPager->zWal[nPathname], "-wal\000", 4+1); +#endif + sqlite3DbFree(0, zPathname); + } + pPager->pVfs = pVfs; + pPager->vfsFlags = vfsFlags; + + /* Open the pager file. + */ + if( zFilename && zFilename[0] ){ + int fout = 0; /* VFS flags returned by xOpen() */ + rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, vfsFlags, &fout); + assert( !memDb ); + readOnly = (fout&SQLITE_OPEN_READONLY); + + /* If the file was successfully opened for read/write access, + ** choose a default page size in case we have to create the + ** database file. The default page size is the maximum of: + ** + ** + SQLITE_DEFAULT_PAGE_SIZE, + ** + The value returned by sqlite3OsSectorSize() + ** + The largest page size that can be written atomically. + */ + if( rc==SQLITE_OK ){ + int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); + if( !readOnly ){ + setSectorSize(pPager); + assert(SQLITE_DEFAULT_PAGE_SIZE<=SQLITE_MAX_DEFAULT_PAGE_SIZE); + if( szPageDflt<pPager->sectorSize ){ + if( pPager->sectorSize>SQLITE_MAX_DEFAULT_PAGE_SIZE ){ + szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE; + }else{ + szPageDflt = (u32)pPager->sectorSize; + } + } +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + { + int ii; + assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); + assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); + assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536); + for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){ + if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ){ + szPageDflt = ii; + } + } + } +#endif + } + pPager->noLock = 0; + if( (iDc & SQLITE_IOCAP_IMMUTABLE)!=0 ){ + vfsFlags |= SQLITE_OPEN_READONLY; + goto act_like_temp_file; + } + } + }else{ + /* If a temporary file is requested, it is not opened immediately. + ** In this case we accept the default page size and delay actually + ** opening the file until the first call to OsWrite(). + ** + ** This branch is also run for an in-memory database. An in-memory + ** database is the same as a temp-file that is never written out to + ** disk and uses an in-memory rollback journal. + ** + ** This branch also runs for files marked as immutable. + */ +act_like_temp_file: + tempFile = 1; + pPager->eState = PAGER_READER; /* Pretend we already have a lock */ + pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE mode */ + pPager->noLock = 1; /* Do no locking */ + readOnly = (vfsFlags&SQLITE_OPEN_READONLY); + } + + /* The following call to PagerSetPagesize() serves to set the value of + ** Pager.pageSize and to allocate the Pager.pTmpSpace buffer. + */ + if( rc==SQLITE_OK ){ + assert( pPager->memDb==0 ); + rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1); + testcase( rc!=SQLITE_OK ); + } + + /* Initialize the PCache object. */ + if( rc==SQLITE_OK ){ + assert( nExtra<1000 ); + nExtra = ROUND8(nExtra); + rc = sqlite3PcacheOpen(szPageDflt, nExtra, !memDb, + !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); + } + + /* If an error occurred above, free the Pager structure and close the file. + */ + if( rc!=SQLITE_OK ){ + sqlite3OsClose(pPager->fd); + sqlite3PageFree(pPager->pTmpSpace); + sqlite3_free(pPager); + return rc; + } + + PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename)); + IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename)) + + pPager->useJournal = (u8)useJournal; + /* pPager->stmtOpen = 0; */ + /* pPager->stmtInUse = 0; */ + /* pPager->nRef = 0; */ + /* pPager->stmtSize = 0; */ + /* pPager->stmtJSize = 0; */ + /* pPager->nPage = 0; */ + pPager->mxPgno = SQLITE_MAX_PAGE_COUNT; + /* pPager->state = PAGER_UNLOCK; */ + /* pPager->errMask = 0; */ + pPager->tempFile = (u8)tempFile; + assert( tempFile==PAGER_LOCKINGMODE_NORMAL + || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE ); + assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 ); + pPager->exclusiveMode = (u8)tempFile; + pPager->changeCountDone = pPager->tempFile; + pPager->memDb = (u8)memDb; + pPager->readOnly = (u8)readOnly; + assert( useJournal || pPager->tempFile ); + pPager->noSync = pPager->tempFile; + if( pPager->noSync ){ + assert( pPager->fullSync==0 ); + assert( pPager->syncFlags==0 ); + assert( pPager->walSyncFlags==0 ); + assert( pPager->ckptSyncFlags==0 ); + }else{ + pPager->fullSync = 1; + pPager->syncFlags = SQLITE_SYNC_NORMAL; + pPager->walSyncFlags = SQLITE_SYNC_NORMAL | WAL_SYNC_TRANSACTIONS; + pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL; + } + /* pPager->pFirst = 0; */ + /* pPager->pFirstSynced = 0; */ + /* pPager->pLast = 0; */ + pPager->nExtra = (u16)nExtra; + pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT; + assert( isOpen(pPager->fd) || tempFile ); + setSectorSize(pPager); + if( !useJournal ){ + pPager->journalMode = PAGER_JOURNALMODE_OFF; + }else if( memDb ){ + pPager->journalMode = PAGER_JOURNALMODE_MEMORY; + } + /* pPager->xBusyHandler = 0; */ + /* pPager->pBusyHandlerArg = 0; */ + pPager->xReiniter = xReinit; + /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */ + /* pPager->szMmap = SQLITE_DEFAULT_MMAP_SIZE // will be set by btree.c */ + + *ppPager = pPager; + return SQLITE_OK; +} + + +/* Verify that the database file has not be deleted or renamed out from +** under the pager. Return SQLITE_OK if the database is still were it ought +** to be on disk. Return non-zero (SQLITE_READONLY_DBMOVED or some other error +** code from sqlite3OsAccess()) if the database has gone missing. +*/ +static int databaseIsUnmoved(Pager *pPager){ + int bHasMoved = 0; + int rc; + + if( pPager->tempFile ) return SQLITE_OK; + if( pPager->dbSize==0 ) return SQLITE_OK; + assert( pPager->zFilename && pPager->zFilename[0] ); + rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_HAS_MOVED, &bHasMoved); + if( rc==SQLITE_NOTFOUND ){ + /* If the HAS_MOVED file-control is unimplemented, assume that the file + ** has not been moved. That is the historical behavior of SQLite: prior to + ** version 3.8.3, it never checked */ + rc = SQLITE_OK; + }else if( rc==SQLITE_OK && bHasMoved ){ + rc = SQLITE_READONLY_DBMOVED; + } + return rc; +} + + +/* +** This function is called after transitioning from PAGER_UNLOCK to +** PAGER_SHARED state. It tests if there is a hot journal present in +** the file-system for the given pager. A hot journal is one that +** needs to be played back. According to this function, a hot-journal +** file exists if the following criteria are met: +** +** * The journal file exists in the file system, and +** * No process holds a RESERVED or greater lock on the database file, and +** * The database file itself is greater than 0 bytes in size, and +** * The first byte of the journal file exists and is not 0x00. +** +** If the current size of the database file is 0 but a journal file +** exists, that is probably an old journal left over from a prior +** database with the same name. In this case the journal file is +** just deleted using OsDelete, *pExists is set to 0 and SQLITE_OK +** is returned. +** +** This routine does not check if there is a master journal filename +** at the end of the file. If there is, and that master journal file +** does not exist, then the journal file is not really hot. In this +** case this routine will return a false-positive. The pager_playback() +** routine will discover that the journal file is not really hot and +** will not roll it back. +** +** If a hot-journal file is found to exist, *pExists is set to 1 and +** SQLITE_OK returned. If no hot-journal file is present, *pExists is +** set to 0 and SQLITE_OK returned. If an IO error occurs while trying +** to determine whether or not a hot-journal file exists, the IO error +** code is returned and the value of *pExists is undefined. +*/ +static int hasHotJournal(Pager *pPager, int *pExists){ + sqlite3_vfs * const pVfs = pPager->pVfs; + int rc = SQLITE_OK; /* Return code */ + int exists = 1; /* True if a journal file is present */ + int jrnlOpen = !!isOpen(pPager->jfd); + + assert( pPager->useJournal ); + assert( isOpen(pPager->fd) ); + assert( pPager->eState==PAGER_OPEN ); + + assert( jrnlOpen==0 || ( sqlite3OsDeviceCharacteristics(pPager->jfd) & + SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN + )); + + *pExists = 0; + if( !jrnlOpen ){ + rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists); + } + if( rc==SQLITE_OK && exists ){ + int locked = 0; /* True if some process holds a RESERVED lock */ + + /* Race condition here: Another process might have been holding the + ** the RESERVED lock and have a journal open at the sqlite3OsAccess() + ** call above, but then delete the journal and drop the lock before + ** we get to the following sqlite3OsCheckReservedLock() call. If that + ** is the case, this routine might think there is a hot journal when + ** in fact there is none. This results in a false-positive which will + ** be dealt with by the playback routine. Ticket #3883. + */ + rc = sqlite3OsCheckReservedLock(pPager->fd, &locked); + if( rc==SQLITE_OK && !locked ){ + Pgno nPage; /* Number of pages in database file */ + + rc = pagerPagecount(pPager, &nPage); + if( rc==SQLITE_OK ){ + /* If the database is zero pages in size, that means that either (1) the + ** journal is a remnant from a prior database with the same name where + ** the database file but not the journal was deleted, or (2) the initial + ** transaction that populates a new database is being rolled back. + ** In either case, the journal file can be deleted. However, take care + ** not to delete the journal file if it is already open due to + ** journal_mode=PERSIST. + */ + if( nPage==0 && !jrnlOpen ){ + if( pagerLockDb(pPager, RESERVED_LOCK)==SQLITE_OK ){ + sqlite3OsDelete(pVfs, pPager->zJournal, 0); + if( !pPager->exclusiveMode ) pagerUnlockDb(pPager, SHARED_LOCK); + } + }else{ + /* The journal file exists and no other connection has a reserved + ** or greater lock on the database file. Now check that there is + ** at least one non-zero bytes at the start of the journal file. + ** If there is, then we consider this journal to be hot. If not, + ** it can be ignored. + */ + if( !jrnlOpen ){ + int f = SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL; + rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &f); + } + if( rc==SQLITE_OK ){ + u8 first = 0; + rc = sqlite3OsRead(pPager->jfd, (void *)&first, 1, 0); + if( rc==SQLITE_IOERR_SHORT_READ ){ + rc = SQLITE_OK; + } + if( !jrnlOpen ){ + sqlite3OsClose(pPager->jfd); + } + *pExists = (first!=0); + }else if( rc==SQLITE_CANTOPEN ){ + /* If we cannot open the rollback journal file in order to see if + ** it has a zero header, that might be due to an I/O error, or + ** it might be due to the race condition described above and in + ** ticket #3883. Either way, assume that the journal is hot. + ** This might be a false positive. But if it is, then the + ** automatic journal playback and recovery mechanism will deal + ** with it under an EXCLUSIVE lock where we do not need to + ** worry so much with race conditions. + */ + *pExists = 1; + rc = SQLITE_OK; + } + } + } + } + } + + return rc; +} + +/* +** This function is called to obtain a shared lock on the database file. +** It is illegal to call sqlite3PagerAcquire() until after this function +** has been successfully called. If a shared-lock is already held when +** this function is called, it is a no-op. +** +** The following operations are also performed by this function. +** +** 1) If the pager is currently in PAGER_OPEN state (no lock held +** on the database file), then an attempt is made to obtain a +** SHARED lock on the database file. Immediately after obtaining +** the SHARED lock, the file-system is checked for a hot-journal, +** which is played back if present. Following any hot-journal +** rollback, the contents of the cache are validated by checking +** the 'change-counter' field of the database file header and +** discarded if they are found to be invalid. +** +** 2) If the pager is running in exclusive-mode, and there are currently +** no outstanding references to any pages, and is in the error state, +** then an attempt is made to clear the error state by discarding +** the contents of the page cache and rolling back any open journal +** file. +** +** If everything is successful, SQLITE_OK is returned. If an IO error +** occurs while locking the database, checking for a hot-journal file or +** rolling back a journal file, the IO error code is returned. +*/ +SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){ + int rc = SQLITE_OK; /* Return code */ + + /* This routine is only called from b-tree and only when there are no + ** outstanding pages. This implies that the pager state should either + ** be OPEN or READER. READER is only possible if the pager is or was in + ** exclusive access mode. + */ + assert( sqlite3PcacheRefCount(pPager->pPCache)==0 ); + assert( assert_pager_state(pPager) ); + assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER ); + + if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){ + int bHotJournal = 1; /* True if there exists a hot journal-file */ + + rc = pager_wait_on_lock(pPager, SHARED_LOCK); + if( rc!=SQLITE_OK ){ + assert( pPager->eLock==NO_LOCK || pPager->eLock==UNKNOWN_LOCK ); + goto failed; + } + + /* If a journal file exists, and there is no RESERVED lock on the + ** database file, then it either needs to be played back or deleted. + */ + if( pPager->eLock<=SHARED_LOCK ){ + rc = hasHotJournal(pPager, &bHotJournal); + } + if( rc!=SQLITE_OK ){ + goto failed; + } + if( bHotJournal ){ + if( pPager->readOnly ){ + rc = SQLITE_READONLY_ROLLBACK; + goto failed; + } + + /* Get an EXCLUSIVE lock on the database file. At this point it is + ** important that a RESERVED lock is not obtained on the way to the + ** EXCLUSIVE lock. If it were, another process might open the + ** database file, detect the RESERVED lock, and conclude that the + ** database is safe to read while this process is still rolling the + ** hot-journal back. + ** + ** Because the intermediate RESERVED lock is not requested, any + ** other process attempting to access the database file will get to + ** this point in the code and fail to obtain its own EXCLUSIVE lock + ** on the database file. + ** + ** Unless the pager is in locking_mode=exclusive mode, the lock is + ** downgraded to SHARED_LOCK before this function returns. + */ + rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); + if( rc!=SQLITE_OK ){ + goto failed; + } + + /* If it is not already open and the file exists on disk, open the + ** journal for read/write access. Write access is required because + ** in exclusive-access mode the file descriptor will be kept open + ** and possibly used for a transaction later on. Also, write-access + ** is usually required to finalize the journal in journal_mode=persist + ** mode (and also for journal_mode=truncate on some systems). + ** + ** If the journal does not exist, it usually means that some + ** other connection managed to get in and roll it back before + ** this connection obtained the exclusive lock above. Or, it + ** may mean that the pager was in the error-state when this + ** function was called and the journal file does not exist. + */ + if( !isOpen(pPager->jfd) ){ + sqlite3_vfs * const pVfs = pPager->pVfs; + int bExists; /* True if journal file exists */ + rc = sqlite3OsAccess( + pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &bExists); + if( rc==SQLITE_OK && bExists ){ + int fout = 0; + int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL; + assert( !pPager->tempFile ); + rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout); + assert( rc!=SQLITE_OK || isOpen(pPager->jfd) ); + if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){ + rc = SQLITE_CANTOPEN_BKPT; + sqlite3OsClose(pPager->jfd); + } + } + } + + /* Playback and delete the journal. Drop the database write + ** lock and reacquire the read lock. Purge the cache before + ** playing back the hot-journal so that we don't end up with + ** an inconsistent cache. Sync the hot journal before playing + ** it back since the process that crashed and left the hot journal + ** probably did not sync it and we are required to always sync + ** the journal before playing it back. + */ + if( isOpen(pPager->jfd) ){ + assert( rc==SQLITE_OK ); + rc = pagerSyncHotJournal(pPager); + if( rc==SQLITE_OK ){ + rc = pager_playback(pPager, 1); + pPager->eState = PAGER_OPEN; + } + }else if( !pPager->exclusiveMode ){ + pagerUnlockDb(pPager, SHARED_LOCK); + } + + if( rc!=SQLITE_OK ){ + /* This branch is taken if an error occurs while trying to open + ** or roll back a hot-journal while holding an EXCLUSIVE lock. The + ** pager_unlock() routine will be called before returning to unlock + ** the file. If the unlock attempt fails, then Pager.eLock must be + ** set to UNKNOWN_LOCK (see the comment above the #define for + ** UNKNOWN_LOCK above for an explanation). + ** + ** In order to get pager_unlock() to do this, set Pager.eState to + ** PAGER_ERROR now. This is not actually counted as a transition + ** to ERROR state in the state diagram at the top of this file, + ** since we know that the same call to pager_unlock() will very + ** shortly transition the pager object to the OPEN state. Calling + ** assert_pager_state() would fail now, as it should not be possible + ** to be in ERROR state when there are zero outstanding page + ** references. + */ + pager_error(pPager, rc); + goto failed; + } + + assert( pPager->eState==PAGER_OPEN ); + assert( (pPager->eLock==SHARED_LOCK) + || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK) + ); + } + + if( !pPager->tempFile && pPager->hasHeldSharedLock ){ + /* The shared-lock has just been acquired then check to + ** see if the database has been modified. If the database has changed, + ** flush the cache. The hasHeldSharedLock flag prevents this from + ** occurring on the very first access to a file, in order to save a + ** single unnecessary sqlite3OsRead() call at the start-up. + ** + ** Database changes are detected by looking at 15 bytes beginning + ** at offset 24 into the file. The first 4 of these 16 bytes are + ** a 32-bit counter that is incremented with each change. The + ** other bytes change randomly with each file change when + ** a codec is in use. + ** + ** There is a vanishingly small chance that a change will not be + ** detected. The chance of an undetected change is so small that + ** it can be neglected. + */ + Pgno nPage = 0; + char dbFileVers[sizeof(pPager->dbFileVers)]; + + rc = pagerPagecount(pPager, &nPage); + if( rc ) goto failed; + + if( nPage>0 ){ + IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers))); + rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24); + if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ + goto failed; + } + }else{ + memset(dbFileVers, 0, sizeof(dbFileVers)); + } + + if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ + pager_reset(pPager); + + /* Unmap the database file. It is possible that external processes + ** may have truncated the database file and then extended it back + ** to its original size while this process was not holding a lock. + ** In this case there may exist a Pager.pMap mapping that appears + ** to be the right size but is not actually valid. Avoid this + ** possibility by unmapping the db here. */ + if( USEFETCH(pPager) ){ + sqlite3OsUnfetch(pPager->fd, 0, 0); + } + } + } + + /* If there is a WAL file in the file-system, open this database in WAL + ** mode. Otherwise, the following function call is a no-op. + */ + rc = pagerOpenWalIfPresent(pPager); +#ifndef SQLITE_OMIT_WAL + assert( pPager->pWal==0 || rc==SQLITE_OK ); +#endif + } + + if( pagerUseWal(pPager) ){ + assert( rc==SQLITE_OK ); + rc = pagerBeginReadTransaction(pPager); + } + + if( pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){ + rc = pagerPagecount(pPager, &pPager->dbSize); + } + + failed: + if( rc!=SQLITE_OK ){ + pager_unlock(pPager); + assert( pPager->eState==PAGER_OPEN ); + }else{ + pPager->eState = PAGER_READER; + pPager->hasHeldSharedLock = 1; + } + return rc; +} + +/* +** If the reference count has reached zero, rollback any active +** transaction and unlock the pager. +** +** Except, in locking_mode=EXCLUSIVE when there is nothing to in +** the rollback journal, the unlock is not performed and there is +** nothing to rollback, so this routine is a no-op. +*/ +static void pagerUnlockIfUnused(Pager *pPager){ + if( pPager->nMmapOut==0 && (sqlite3PcacheRefCount(pPager->pPCache)==0) ){ + pagerUnlockAndRollback(pPager); + } +} + +/* +** Acquire a reference to page number pgno in pager pPager (a page +** reference has type DbPage*). If the requested reference is +** successfully obtained, it is copied to *ppPage and SQLITE_OK returned. +** +** If the requested page is already in the cache, it is returned. +** Otherwise, a new page object is allocated and populated with data +** read from the database file. In some cases, the pcache module may +** choose not to allocate a new page object and may reuse an existing +** object with no outstanding references. +** +** The extra data appended to a page is always initialized to zeros the +** first time a page is loaded into memory. If the page requested is +** already in the cache when this function is called, then the extra +** data is left as it was when the page object was last used. +** +** If the database image is smaller than the requested page or if a +** non-zero value is passed as the noContent parameter and the +** requested page is not already stored in the cache, then no +** actual disk read occurs. In this case the memory image of the +** page is initialized to all zeros. +** +** If noContent is true, it means that we do not care about the contents +** of the page. This occurs in two scenarios: +** +** a) When reading a free-list leaf page from the database, and +** +** b) When a savepoint is being rolled back and we need to load +** a new page into the cache to be filled with the data read +** from the savepoint journal. +** +** If noContent is true, then the data returned is zeroed instead of +** being read from the database. Additionally, the bits corresponding +** to pgno in Pager.pInJournal (bitvec of pages already written to the +** journal file) and the PagerSavepoint.pInSavepoint bitvecs of any open +** savepoints are set. This means if the page is made writable at any +** point in the future, using a call to sqlite3PagerWrite(), its contents +** will not be journaled. This saves IO. +** +** The acquisition might fail for several reasons. In all cases, +** an appropriate error code is returned and *ppPage is set to NULL. +** +** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt +** to find a page in the in-memory cache first. If the page is not already +** in memory, this routine goes to disk to read it in whereas Lookup() +** just returns 0. This routine acquires a read-lock the first time it +** has to go to disk, and could also playback an old journal if necessary. +** Since Lookup() never goes to disk, it never has to deal with locks +** or journal files. +*/ +SQLITE_PRIVATE int sqlite3PagerAcquire( + Pager *pPager, /* The pager open on the database file */ + Pgno pgno, /* Page number to fetch */ + DbPage **ppPage, /* Write a pointer to the page here */ + int flags /* PAGER_GET_XXX flags */ +){ + int rc = SQLITE_OK; + PgHdr *pPg = 0; + u32 iFrame = 0; /* Frame to read from WAL file */ + const int noContent = (flags & PAGER_GET_NOCONTENT); + + /* It is acceptable to use a read-only (mmap) page for any page except + ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY + ** flag was specified by the caller. And so long as the db is not a + ** temporary or in-memory database. */ + const int bMmapOk = (pgno>1 && USEFETCH(pPager) + && (pPager->eState==PAGER_READER || (flags & PAGER_GET_READONLY)) +#ifdef SQLITE_HAS_CODEC + && pPager->xCodec==0 +#endif + ); + + /* Optimization note: Adding the "pgno<=1" term before "pgno==0" here + ** allows the compiler optimizer to reuse the results of the "pgno>1" + ** test in the previous statement, and avoid testing pgno==0 in the + ** common case where pgno is large. */ + if( pgno<=1 && pgno==0 ){ + return SQLITE_CORRUPT_BKPT; + } + assert( pPager->eState>=PAGER_READER ); + assert( assert_pager_state(pPager) ); + assert( noContent==0 || bMmapOk==0 ); + + assert( pPager->hasHeldSharedLock==1 ); + + /* If the pager is in the error state, return an error immediately. + ** Otherwise, request the page from the PCache layer. */ + if( pPager->errCode!=SQLITE_OK ){ + rc = pPager->errCode; + }else{ + if( bMmapOk && pagerUseWal(pPager) ){ + rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame); + if( rc!=SQLITE_OK ) goto pager_acquire_err; + } + + if( bMmapOk && iFrame==0 ){ + void *pData = 0; + + rc = sqlite3OsFetch(pPager->fd, + (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData + ); + + if( rc==SQLITE_OK && pData ){ + if( pPager->eState>PAGER_READER ){ + pPg = sqlite3PagerLookup(pPager, pgno); + } + if( pPg==0 ){ + rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg); + }else{ + sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData); + } + if( pPg ){ + assert( rc==SQLITE_OK ); + *ppPage = pPg; + return SQLITE_OK; + } + } + if( rc!=SQLITE_OK ){ + goto pager_acquire_err; + } + } + + { + sqlite3_pcache_page *pBase; + pBase = sqlite3PcacheFetch(pPager->pPCache, pgno, 3); + if( pBase==0 ){ + rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase); + if( rc!=SQLITE_OK ) goto pager_acquire_err; + if( pBase==0 ){ + pPg = *ppPage = 0; + rc = SQLITE_NOMEM; + goto pager_acquire_err; + } + } + pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase); + assert( pPg!=0 ); + } + } + + if( rc!=SQLITE_OK ){ + /* Either the call to sqlite3PcacheFetch() returned an error or the + ** pager was already in the error-state when this function was called. + ** Set pPg to 0 and jump to the exception handler. */ + pPg = 0; + goto pager_acquire_err; + } + assert( pPg==(*ppPage) ); + assert( pPg->pgno==pgno ); + assert( pPg->pPager==pPager || pPg->pPager==0 ); + + if( pPg->pPager && !noContent ){ + /* In this case the pcache already contains an initialized copy of + ** the page. Return without further ado. */ + assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) ); + pPager->aStat[PAGER_STAT_HIT]++; + return SQLITE_OK; + + }else{ + /* The pager cache has created a new page. Its content needs to + ** be initialized. */ + + pPg->pPager = pPager; + + /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page + ** number greater than this, or the unused locking-page, is requested. */ + if( pgno>PAGER_MAX_PGNO || pgno==PAGER_MJ_PGNO(pPager) ){ + rc = SQLITE_CORRUPT_BKPT; + goto pager_acquire_err; + } + + if( pPager->dbSize<pgno || noContent || !isOpen(pPager->fd) ){ + if( pgno>pPager->mxPgno ){ + rc = SQLITE_FULL; + goto pager_acquire_err; + } + if( noContent ){ + /* Failure to set the bits in the InJournal bit-vectors is benign. + ** It merely means that we might do some extra work to journal a + ** page that does not need to be journaled. Nevertheless, be sure + ** to test the case where a malloc error occurs while trying to set + ** a bit in a bit vector. + */ + if( pgno<=pPager->dbOrigSize ){ + TESTONLY( rc = ) sqlite3BitvecSet(pPager->pInJournal, pgno); + testcase( rc==SQLITE_NOMEM ); + } + TESTONLY( rc = ) addToSavepointBitvecs(pPager, pgno); + testcase( rc==SQLITE_NOMEM ); + } + memset(pPg->pData, 0, pPager->pageSize); + IOTRACE(("ZERO %p %d\n", pPager, pgno)); + }else{ + if( pagerUseWal(pPager) && bMmapOk==0 ){ + rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame); + if( rc!=SQLITE_OK ) goto pager_acquire_err; + } + assert( pPg->pPager==pPager ); + pPager->aStat[PAGER_STAT_MISS]++; + rc = readDbPage(pPg, iFrame); + if( rc!=SQLITE_OK ){ + goto pager_acquire_err; + } + } + pager_set_pagehash(pPg); + } + + return SQLITE_OK; + +pager_acquire_err: + assert( rc!=SQLITE_OK ); + if( pPg ){ + sqlite3PcacheDrop(pPg); + } + pagerUnlockIfUnused(pPager); + + *ppPage = 0; + return rc; +} + +/* +** Acquire a page if it is already in the in-memory cache. Do +** not read the page from disk. Return a pointer to the page, +** or 0 if the page is not in cache. +** +** See also sqlite3PagerGet(). The difference between this routine +** and sqlite3PagerGet() is that _get() will go to the disk and read +** in the page if the page is not already in cache. This routine +** returns NULL if the page is not in cache or if a disk I/O error +** has ever happened. +*/ +SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ + sqlite3_pcache_page *pPage; + assert( pPager!=0 ); + assert( pgno!=0 ); + assert( pPager->pPCache!=0 ); + pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0); + assert( pPage==0 || pPager->hasHeldSharedLock ); + if( pPage==0 ) return 0; + return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage); +} + +/* +** Release a page reference. +** +** If the number of references to the page drop to zero, then the +** page is added to the LRU list. When all references to all pages +** are released, a rollback occurs and the lock on the database is +** removed. +*/ +SQLITE_PRIVATE void sqlite3PagerUnrefNotNull(DbPage *pPg){ + Pager *pPager; + assert( pPg!=0 ); + pPager = pPg->pPager; + if( pPg->flags & PGHDR_MMAP ){ + pagerReleaseMapPage(pPg); + }else{ + sqlite3PcacheRelease(pPg); + } + pagerUnlockIfUnused(pPager); +} +SQLITE_PRIVATE void sqlite3PagerUnref(DbPage *pPg){ + if( pPg ) sqlite3PagerUnrefNotNull(pPg); +} + +/* +** This function is called at the start of every write transaction. +** There must already be a RESERVED or EXCLUSIVE lock on the database +** file when this routine is called. +** +** Open the journal file for pager pPager and write a journal header +** to the start of it. If there are active savepoints, open the sub-journal +** as well. This function is only used when the journal file is being +** opened to write a rollback log for a transaction. It is not used +** when opening a hot journal file to roll it back. +** +** If the journal file is already open (as it may be in exclusive mode), +** then this function just writes a journal header to the start of the +** already open file. +** +** Whether or not the journal file is opened by this function, the +** Pager.pInJournal bitvec structure is allocated. +** +** Return SQLITE_OK if everything is successful. Otherwise, return +** SQLITE_NOMEM if the attempt to allocate Pager.pInJournal fails, or +** an IO error code if opening or writing the journal file fails. +*/ +static int pager_open_journal(Pager *pPager){ + int rc = SQLITE_OK; /* Return code */ + sqlite3_vfs * const pVfs = pPager->pVfs; /* Local cache of vfs pointer */ + + assert( pPager->eState==PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + assert( pPager->pInJournal==0 ); + + /* If already in the error state, this function is a no-op. But on + ** the other hand, this routine is never called if we are already in + ** an error state. */ + if( NEVER(pPager->errCode) ) return pPager->errCode; + + if( !pagerUseWal(pPager) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ + pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize); + if( pPager->pInJournal==0 ){ + return SQLITE_NOMEM; + } + + /* Open the journal file if it is not already open. */ + if( !isOpen(pPager->jfd) ){ + if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){ + sqlite3MemJournalOpen(pPager->jfd); + }else{ + const int flags = /* VFS flags to open journal file */ + SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE| + (pPager->tempFile ? + (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL): + (SQLITE_OPEN_MAIN_JOURNAL) + ); + + /* Verify that the database still has the same name as it did when + ** it was originally opened. */ + rc = databaseIsUnmoved(pPager); + if( rc==SQLITE_OK ){ +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + rc = sqlite3JournalOpen( + pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager) + ); +#else + rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0); +#endif + } + } + assert( rc!=SQLITE_OK || isOpen(pPager->jfd) ); + } + + + /* Write the first journal header to the journal file and open + ** the sub-journal if necessary. + */ + if( rc==SQLITE_OK ){ + /* TODO: Check if all of these are really required. */ + pPager->nRec = 0; + pPager->journalOff = 0; + pPager->setMaster = 0; + pPager->journalHdr = 0; + rc = writeJournalHdr(pPager); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3BitvecDestroy(pPager->pInJournal); + pPager->pInJournal = 0; + }else{ + assert( pPager->eState==PAGER_WRITER_LOCKED ); + pPager->eState = PAGER_WRITER_CACHEMOD; + } + + return rc; +} + +/* +** Begin a write-transaction on the specified pager object. If a +** write-transaction has already been opened, this function is a no-op. +** +** If the exFlag argument is false, then acquire at least a RESERVED +** lock on the database file. If exFlag is true, then acquire at least +** an EXCLUSIVE lock. If such a lock is already held, no locking +** functions need be called. +** +** If the subjInMemory argument is non-zero, then any sub-journal opened +** within this transaction will be opened as an in-memory file. This +** has no effect if the sub-journal is already opened (as it may be when +** running in exclusive mode) or if the transaction does not require a +** sub-journal. If the subjInMemory argument is zero, then any required +** sub-journal is implemented in-memory if pPager is an in-memory database, +** or using a temporary file otherwise. +*/ +SQLITE_PRIVATE int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ + int rc = SQLITE_OK; + + if( pPager->errCode ) return pPager->errCode; + assert( pPager->eState>=PAGER_READER && pPager->eState<PAGER_ERROR ); + pPager->subjInMemory = (u8)subjInMemory; + + if( ALWAYS(pPager->eState==PAGER_READER) ){ + assert( pPager->pInJournal==0 ); + + if( pagerUseWal(pPager) ){ + /* If the pager is configured to use locking_mode=exclusive, and an + ** exclusive lock on the database is not already held, obtain it now. + */ + if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){ + rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); + if( rc!=SQLITE_OK ){ + return rc; + } + sqlite3WalExclusiveMode(pPager->pWal, 1); + } + + /* Grab the write lock on the log file. If successful, upgrade to + ** PAGER_RESERVED state. Otherwise, return an error code to the caller. + ** The busy-handler is not invoked if another connection already + ** holds the write-lock. If possible, the upper layer will call it. + */ + rc = sqlite3WalBeginWriteTransaction(pPager->pWal); + }else{ + /* Obtain a RESERVED lock on the database file. If the exFlag parameter + ** is true, then immediately upgrade this to an EXCLUSIVE lock. The + ** busy-handler callback can be used when upgrading to the EXCLUSIVE + ** lock, but not when obtaining the RESERVED lock. + */ + rc = pagerLockDb(pPager, RESERVED_LOCK); + if( rc==SQLITE_OK && exFlag ){ + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + } + } + + if( rc==SQLITE_OK ){ + /* Change to WRITER_LOCKED state. + ** + ** WAL mode sets Pager.eState to PAGER_WRITER_LOCKED or CACHEMOD + ** when it has an open transaction, but never to DBMOD or FINISHED. + ** This is because in those states the code to roll back savepoint + ** transactions may copy data from the sub-journal into the database + ** file as well as into the page cache. Which would be incorrect in + ** WAL mode. + */ + pPager->eState = PAGER_WRITER_LOCKED; + pPager->dbHintSize = pPager->dbSize; + pPager->dbFileSize = pPager->dbSize; + pPager->dbOrigSize = pPager->dbSize; + pPager->journalOff = 0; + } + + assert( rc==SQLITE_OK || pPager->eState==PAGER_READER ); + assert( rc!=SQLITE_OK || pPager->eState==PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + } + + PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager))); + return rc; +} + +/* +** Write page pPg onto the end of the rollback journal. +*/ +static SQLITE_NOINLINE int pagerAddPageToRollbackJournal(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + int rc; + u32 cksum; + char *pData2; + i64 iOff = pPager->journalOff; + + /* We should never write to the journal file the page that + ** contains the database locks. The following assert verifies + ** that we do not. */ + assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); + + assert( pPager->journalHdr<=pPager->journalOff ); + CODEC2(pPager, pPg->pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); + cksum = pager_cksum(pPager, (u8*)pData2); + + /* Even if an IO or diskfull error occurs while journalling the + ** page in the block above, set the need-sync flag for the page. + ** Otherwise, when the transaction is rolled back, the logic in + ** playback_one_page() will think that the page needs to be restored + ** in the database file. And if an IO error occurs while doing so, + ** then corruption may follow. + */ + pPg->flags |= PGHDR_NEED_SYNC; + + rc = write32bits(pPager->jfd, iOff, pPg->pgno); + if( rc!=SQLITE_OK ) return rc; + rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4); + if( rc!=SQLITE_OK ) return rc; + rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum); + if( rc!=SQLITE_OK ) return rc; + + IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, + pPager->journalOff, pPager->pageSize)); + PAGER_INCR(sqlite3_pager_writej_count); + PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", + PAGERID(pPager), pPg->pgno, + ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); + + pPager->journalOff += 8 + pPager->pageSize; + pPager->nRec++; + assert( pPager->pInJournal!=0 ); + rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); + testcase( rc==SQLITE_NOMEM ); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + rc |= addToSavepointBitvecs(pPager, pPg->pgno); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + return rc; +} + +/* +** Mark a single data page as writeable. The page is written into the +** main journal or sub-journal as required. If the page is written into +** one of the journals, the corresponding bit is set in the +** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs +** of any open savepoints as appropriate. +*/ +static int pager_write(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + int rc = SQLITE_OK; + + /* This routine is not called unless a write-transaction has already + ** been started. The journal file may or may not be open at this point. + ** It is never called in the ERROR state. + */ + assert( pPager->eState==PAGER_WRITER_LOCKED + || pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + ); + assert( assert_pager_state(pPager) ); + assert( pPager->errCode==0 ); + assert( pPager->readOnly==0 ); + CHECK_PAGE(pPg); + + /* The journal file needs to be opened. Higher level routines have already + ** obtained the necessary locks to begin the write-transaction, but the + ** rollback journal might not yet be open. Open it now if this is the case. + ** + ** This is done before calling sqlite3PcacheMakeDirty() on the page. + ** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then + ** an error might occur and the pager would end up in WRITER_LOCKED state + ** with pages marked as dirty in the cache. + */ + if( pPager->eState==PAGER_WRITER_LOCKED ){ + rc = pager_open_journal(pPager); + if( rc!=SQLITE_OK ) return rc; + } + assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); + assert( assert_pager_state(pPager) ); + + /* Mark the page that is about to be modified as dirty. */ + sqlite3PcacheMakeDirty(pPg); + + /* If a rollback journal is in use, them make sure the page that is about + ** to change is in the rollback journal, or if the page is a new page off + ** then end of the file, make sure it is marked as PGHDR_NEED_SYNC. + */ + assert( (pPager->pInJournal!=0) == isOpen(pPager->jfd) ); + if( pPager->pInJournal!=0 + && sqlite3BitvecTestNotNull(pPager->pInJournal, pPg->pgno)==0 + ){ + assert( pagerUseWal(pPager)==0 ); + if( pPg->pgno<=pPager->dbOrigSize ){ + rc = pagerAddPageToRollbackJournal(pPg); + if( rc!=SQLITE_OK ){ + return rc; + } + }else{ + if( pPager->eState!=PAGER_WRITER_DBMOD ){ + pPg->flags |= PGHDR_NEED_SYNC; + } + PAGERTRACE(("APPEND %d page %d needSync=%d\n", + PAGERID(pPager), pPg->pgno, + ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); + } + } + + /* The PGHDR_DIRTY bit is set above when the page was added to the dirty-list + ** and before writing the page into the rollback journal. Wait until now, + ** after the page has been successfully journalled, before setting the + ** PGHDR_WRITEABLE bit that indicates that the page can be safely modified. + */ + pPg->flags |= PGHDR_WRITEABLE; + + /* If the statement journal is open and the page is not in it, + ** then write the page into the statement journal. + */ + if( pPager->nSavepoint>0 ){ + rc = subjournalPageIfRequired(pPg); + } + + /* Update the database size and return. */ + if( pPager->dbSize<pPg->pgno ){ + pPager->dbSize = pPg->pgno; + } + return rc; +} + +/* +** This is a variant of sqlite3PagerWrite() that runs when the sector size +** is larger than the page size. SQLite makes the (reasonable) assumption that +** all bytes of a sector are written together by hardware. Hence, all bytes of +** a sector need to be journalled in case of a power loss in the middle of +** a write. +** +** Usually, the sector size is less than or equal to the page size, in which +** case pages can be individually written. This routine only runs in the +** exceptional case where the page size is smaller than the sector size. +*/ +static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){ + int rc = SQLITE_OK; /* Return code */ + Pgno nPageCount; /* Total number of pages in database file */ + Pgno pg1; /* First page of the sector pPg is located on. */ + int nPage = 0; /* Number of pages starting at pg1 to journal */ + int ii; /* Loop counter */ + int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */ + Pager *pPager = pPg->pPager; /* The pager that owns pPg */ + Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); + + /* Set the doNotSpill NOSYNC bit to 1. This is because we cannot allow + ** a journal header to be written between the pages journaled by + ** this function. + */ + assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)==0 ); + pPager->doNotSpill |= SPILLFLAG_NOSYNC; + + /* This trick assumes that both the page-size and sector-size are + ** an integer power of 2. It sets variable pg1 to the identifier + ** of the first page of the sector pPg is located on. + */ + pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1; + + nPageCount = pPager->dbSize; + if( pPg->pgno>nPageCount ){ + nPage = (pPg->pgno - pg1)+1; + }else if( (pg1+nPagePerSector-1)>nPageCount ){ + nPage = nPageCount+1-pg1; + }else{ + nPage = nPagePerSector; + } + assert(nPage>0); + assert(pg1<=pPg->pgno); + assert((pg1+nPage)>pPg->pgno); + + for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){ + Pgno pg = pg1+ii; + PgHdr *pPage; + if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){ + if( pg!=PAGER_MJ_PGNO(pPager) ){ + rc = sqlite3PagerGet(pPager, pg, &pPage); + if( rc==SQLITE_OK ){ + rc = pager_write(pPage); + if( pPage->flags&PGHDR_NEED_SYNC ){ + needSync = 1; + } + sqlite3PagerUnrefNotNull(pPage); + } + } + }else if( (pPage = sqlite3PagerLookup(pPager, pg))!=0 ){ + if( pPage->flags&PGHDR_NEED_SYNC ){ + needSync = 1; + } + sqlite3PagerUnrefNotNull(pPage); + } + } + + /* If the PGHDR_NEED_SYNC flag is set for any of the nPage pages + ** starting at pg1, then it needs to be set for all of them. Because + ** writing to any of these nPage pages may damage the others, the + ** journal file must contain sync()ed copies of all of them + ** before any of them can be written out to the database file. + */ + if( rc==SQLITE_OK && needSync ){ + for(ii=0; ii<nPage; ii++){ + PgHdr *pPage = sqlite3PagerLookup(pPager, pg1+ii); + if( pPage ){ + pPage->flags |= PGHDR_NEED_SYNC; + sqlite3PagerUnrefNotNull(pPage); + } + } + } + + assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)!=0 ); + pPager->doNotSpill &= ~SPILLFLAG_NOSYNC; + return rc; +} + +/* +** Mark a data page as writeable. This routine must be called before +** making changes to a page. The caller must check the return value +** of this function and be careful not to change any page data unless +** this routine returns SQLITE_OK. +** +** The difference between this function and pager_write() is that this +** function also deals with the special case where 2 or more pages +** fit on a single disk sector. In this case all co-resident pages +** must have been written to the journal file before returning. +** +** If an error occurs, SQLITE_NOMEM or an IO error code is returned +** as appropriate. Otherwise, SQLITE_OK. +*/ +SQLITE_PRIVATE int sqlite3PagerWrite(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + assert( (pPg->flags & PGHDR_MMAP)==0 ); + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( pPager->eState!=PAGER_ERROR ); + assert( assert_pager_state(pPager) ); + if( (pPg->flags & PGHDR_WRITEABLE)!=0 && pPager->dbSize>=pPg->pgno ){ + if( pPager->nSavepoint ) return subjournalPageIfRequired(pPg); + return SQLITE_OK; + }else if( pPager->sectorSize > (u32)pPager->pageSize ){ + return pagerWriteLargeSector(pPg); + }else{ + return pager_write(pPg); + } +} + +/* +** Return TRUE if the page given in the argument was previously passed +** to sqlite3PagerWrite(). In other words, return TRUE if it is ok +** to change the content of the page. +*/ +#ifndef NDEBUG +SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage *pPg){ + return pPg->flags & PGHDR_WRITEABLE; +} +#endif + +/* +** A call to this routine tells the pager that it is not necessary to +** write the information on page pPg back to the disk, even though +** that page might be marked as dirty. This happens, for example, when +** the page has been added as a leaf of the freelist and so its +** content no longer matters. +** +** The overlying software layer calls this routine when all of the data +** on the given page is unused. The pager marks the page as clean so +** that it does not get written to disk. +** +** Tests show that this optimization can quadruple the speed of large +** DELETE operations. +*/ +SQLITE_PRIVATE void sqlite3PagerDontWrite(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + if( (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){ + PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager))); + IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno)) + pPg->flags |= PGHDR_DONT_WRITE; + pPg->flags &= ~PGHDR_WRITEABLE; + pager_set_pagehash(pPg); + } +} + +/* +** This routine is called to increment the value of the database file +** change-counter, stored as a 4-byte big-endian integer starting at +** byte offset 24 of the pager file. The secondary change counter at +** 92 is also updated, as is the SQLite version number at offset 96. +** +** But this only happens if the pPager->changeCountDone flag is false. +** To avoid excess churning of page 1, the update only happens once. +** See also the pager_write_changecounter() routine that does an +** unconditional update of the change counters. +** +** If the isDirectMode flag is zero, then this is done by calling +** sqlite3PagerWrite() on page 1, then modifying the contents of the +** page data. In this case the file will be updated when the current +** transaction is committed. +** +** The isDirectMode flag may only be non-zero if the library was compiled +** with the SQLITE_ENABLE_ATOMIC_WRITE macro defined. In this case, +** if isDirect is non-zero, then the database file is updated directly +** by writing an updated version of page 1 using a call to the +** sqlite3OsWrite() function. +*/ +static int pager_incr_changecounter(Pager *pPager, int isDirectMode){ + int rc = SQLITE_OK; + + assert( pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + ); + assert( assert_pager_state(pPager) ); + + /* Declare and initialize constant integer 'isDirect'. If the + ** atomic-write optimization is enabled in this build, then isDirect + ** is initialized to the value passed as the isDirectMode parameter + ** to this function. Otherwise, it is always set to zero. + ** + ** The idea is that if the atomic-write optimization is not + ** enabled at compile time, the compiler can omit the tests of + ** 'isDirect' below, as well as the block enclosed in the + ** "if( isDirect )" condition. + */ +#ifndef SQLITE_ENABLE_ATOMIC_WRITE +# define DIRECT_MODE 0 + assert( isDirectMode==0 ); + UNUSED_PARAMETER(isDirectMode); +#else +# define DIRECT_MODE isDirectMode +#endif + + if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){ + PgHdr *pPgHdr; /* Reference to page 1 */ + + assert( !pPager->tempFile && isOpen(pPager->fd) ); + + /* Open page 1 of the file for writing. */ + rc = sqlite3PagerGet(pPager, 1, &pPgHdr); + assert( pPgHdr==0 || rc==SQLITE_OK ); + + /* If page one was fetched successfully, and this function is not + ** operating in direct-mode, make page 1 writable. When not in + ** direct mode, page 1 is always held in cache and hence the PagerGet() + ** above is always successful - hence the ALWAYS on rc==SQLITE_OK. + */ + if( !DIRECT_MODE && ALWAYS(rc==SQLITE_OK) ){ + rc = sqlite3PagerWrite(pPgHdr); + } + + if( rc==SQLITE_OK ){ + /* Actually do the update of the change counter */ + pager_write_changecounter(pPgHdr); + + /* If running in direct mode, write the contents of page 1 to the file. */ + if( DIRECT_MODE ){ + const void *zBuf; + assert( pPager->dbFileSize>0 ); + CODEC2(pPager, pPgHdr->pData, 1, 6, rc=SQLITE_NOMEM, zBuf); + if( rc==SQLITE_OK ){ + rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0); + pPager->aStat[PAGER_STAT_WRITE]++; + } + if( rc==SQLITE_OK ){ + /* Update the pager's copy of the change-counter. Otherwise, the + ** next time a read transaction is opened the cache will be + ** flushed (as the change-counter values will not match). */ + const void *pCopy = (const void *)&((const char *)zBuf)[24]; + memcpy(&pPager->dbFileVers, pCopy, sizeof(pPager->dbFileVers)); + pPager->changeCountDone = 1; + } + }else{ + pPager->changeCountDone = 1; + } + } + + /* Release the page reference. */ + sqlite3PagerUnref(pPgHdr); + } + return rc; +} + +/* +** Sync the database file to disk. This is a no-op for in-memory databases +** or pages with the Pager.noSync flag set. +** +** If successful, or if called on a pager for which it is a no-op, this +** function returns SQLITE_OK. Otherwise, an IO error code is returned. +*/ +SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager, const char *zMaster){ + int rc = SQLITE_OK; + + if( isOpen(pPager->fd) ){ + void *pArg = (void*)zMaster; + rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_SYNC, pArg); + if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; + } + if( rc==SQLITE_OK && !pPager->noSync ){ + rc = sqlite3OsSync(pPager->fd, pPager->syncFlags); + } + return rc; +} + +/* +** This function may only be called while a write-transaction is active in +** rollback. If the connection is in WAL mode, this call is a no-op. +** Otherwise, if the connection does not already have an EXCLUSIVE lock on +** the database file, an attempt is made to obtain one. +** +** If the EXCLUSIVE lock is already held or the attempt to obtain it is +** successful, or the connection is in WAL mode, SQLITE_OK is returned. +** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is +** returned. +*/ +SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager *pPager){ + int rc = SQLITE_OK; + assert( pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + || pPager->eState==PAGER_WRITER_LOCKED + ); + assert( assert_pager_state(pPager) ); + if( 0==pagerUseWal(pPager) ){ + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + } + return rc; +} + +/* +** Sync the database file for the pager pPager. zMaster points to the name +** of a master journal file that should be written into the individual +** journal file. zMaster may be NULL, which is interpreted as no master +** journal (a single database transaction). +** +** This routine ensures that: +** +** * The database file change-counter is updated, +** * the journal is synced (unless the atomic-write optimization is used), +** * all dirty pages are written to the database file, +** * the database file is truncated (if required), and +** * the database file synced. +** +** The only thing that remains to commit the transaction is to finalize +** (delete, truncate or zero the first part of) the journal file (or +** delete the master journal file if specified). +** +** Note that if zMaster==NULL, this does not overwrite a previous value +** passed to an sqlite3PagerCommitPhaseOne() call. +** +** If the final parameter - noSync - is true, then the database file itself +** is not synced. The caller must call sqlite3PagerSync() directly to +** sync the database file before calling CommitPhaseTwo() to delete the +** journal file in this case. +*/ +SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne( + Pager *pPager, /* Pager object */ + const char *zMaster, /* If not NULL, the master journal name */ + int noSync /* True to omit the xSync on the db file */ +){ + int rc = SQLITE_OK; /* Return code */ + + assert( pPager->eState==PAGER_WRITER_LOCKED + || pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + || pPager->eState==PAGER_ERROR + ); + assert( assert_pager_state(pPager) ); + + /* If a prior error occurred, report that error again. */ + if( NEVER(pPager->errCode) ) return pPager->errCode; + + PAGERTRACE(("DATABASE SYNC: File=%s zMaster=%s nSize=%d\n", + pPager->zFilename, zMaster, pPager->dbSize)); + + /* If no database changes have been made, return early. */ + if( pPager->eState<PAGER_WRITER_CACHEMOD ) return SQLITE_OK; + + { + if( pagerUseWal(pPager) ){ + PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); + PgHdr *pPageOne = 0; + if( pList==0 ){ + /* Must have at least one page for the WAL commit flag. + ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */ + rc = sqlite3PagerGet(pPager, 1, &pPageOne); + pList = pPageOne; + pList->pDirty = 0; + } + assert( rc==SQLITE_OK ); + if( ALWAYS(pList) ){ + rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1); + } + sqlite3PagerUnref(pPageOne); + if( rc==SQLITE_OK ){ + sqlite3PcacheCleanAll(pPager->pPCache); + } + }else{ + /* The following block updates the change-counter. Exactly how it + ** does this depends on whether or not the atomic-update optimization + ** was enabled at compile time, and if this transaction meets the + ** runtime criteria to use the operation: + ** + ** * The file-system supports the atomic-write property for + ** blocks of size page-size, and + ** * This commit is not part of a multi-file transaction, and + ** * Exactly one page has been modified and store in the journal file. + ** + ** If the optimization was not enabled at compile time, then the + ** pager_incr_changecounter() function is called to update the change + ** counter in 'indirect-mode'. If the optimization is compiled in but + ** is not applicable to this transaction, call sqlite3JournalCreate() + ** to make sure the journal file has actually been created, then call + ** pager_incr_changecounter() to update the change-counter in indirect + ** mode. + ** + ** Otherwise, if the optimization is both enabled and applicable, + ** then call pager_incr_changecounter() to update the change-counter + ** in 'direct' mode. In this case the journal file will never be + ** created for this transaction. + */ + #ifdef SQLITE_ENABLE_ATOMIC_WRITE + PgHdr *pPg; + assert( isOpen(pPager->jfd) + || pPager->journalMode==PAGER_JOURNALMODE_OFF + || pPager->journalMode==PAGER_JOURNALMODE_WAL + ); + if( !zMaster && isOpen(pPager->jfd) + && pPager->journalOff==jrnlBufferSize(pPager) + && pPager->dbSize>=pPager->dbOrigSize + && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) + ){ + /* Update the db file change counter via the direct-write method. The + ** following call will modify the in-memory representation of page 1 + ** to include the updated change counter and then write page 1 + ** directly to the database file. Because of the atomic-write + ** property of the host file-system, this is safe. + */ + rc = pager_incr_changecounter(pPager, 1); + }else{ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc==SQLITE_OK ){ + rc = pager_incr_changecounter(pPager, 0); + } + } + #else + rc = pager_incr_changecounter(pPager, 0); + #endif + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* Write the master journal name into the journal file. If a master + ** journal file name has already been written to the journal file, + ** or if zMaster is NULL (no master journal), then this call is a no-op. + */ + rc = writeMasterJournal(pPager, zMaster); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* Sync the journal file and write all dirty pages to the database. + ** If the atomic-update optimization is being used, this sync will not + ** create the journal file or perform any real IO. + ** + ** Because the change-counter page was just modified, unless the + ** atomic-update optimization is used it is almost certain that the + ** journal requires a sync here. However, in locking_mode=exclusive + ** on a system under memory pressure it is just possible that this is + ** not the case. In this case it is likely enough that the redundant + ** xSync() call will be changed to a no-op by the OS anyhow. + */ + rc = syncJournal(pPager, 0); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache)); + if( rc!=SQLITE_OK ){ + assert( rc!=SQLITE_IOERR_BLOCKED ); + goto commit_phase_one_exit; + } + sqlite3PcacheCleanAll(pPager->pPCache); + + /* If the file on disk is smaller than the database image, use + ** pager_truncate to grow the file here. This can happen if the database + ** image was extended as part of the current transaction and then the + ** last page in the db image moved to the free-list. In this case the + ** last page is never written out to disk, leaving the database file + ** undersized. Fix this now if it is the case. */ + if( pPager->dbSize>pPager->dbFileSize ){ + Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); + assert( pPager->eState==PAGER_WRITER_DBMOD ); + rc = pager_truncate(pPager, nNew); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } + + /* Finally, sync the database file. */ + if( !noSync ){ + rc = sqlite3PagerSync(pPager, zMaster); + } + IOTRACE(("DBSYNC %p\n", pPager)) + } + } + +commit_phase_one_exit: + if( rc==SQLITE_OK && !pagerUseWal(pPager) ){ + pPager->eState = PAGER_WRITER_FINISHED; + } + return rc; +} + + +/* +** When this function is called, the database file has been completely +** updated to reflect the changes made by the current transaction and +** synced to disk. The journal file still exists in the file-system +** though, and if a failure occurs at this point it will eventually +** be used as a hot-journal and the current transaction rolled back. +** +** This function finalizes the journal file, either by deleting, +** truncating or partially zeroing it, so that it cannot be used +** for hot-journal rollback. Once this is done the transaction is +** irrevocably committed. +** +** If an error occurs, an IO error code is returned and the pager +** moves into the error state. Otherwise, SQLITE_OK is returned. +*/ +SQLITE_PRIVATE int sqlite3PagerCommitPhaseTwo(Pager *pPager){ + int rc = SQLITE_OK; /* Return code */ + + /* This routine should not be called if a prior error has occurred. + ** But if (due to a coding error elsewhere in the system) it does get + ** called, just return the same error code without doing anything. */ + if( NEVER(pPager->errCode) ) return pPager->errCode; + + assert( pPager->eState==PAGER_WRITER_LOCKED + || pPager->eState==PAGER_WRITER_FINISHED + || (pagerUseWal(pPager) && pPager->eState==PAGER_WRITER_CACHEMOD) + ); + assert( assert_pager_state(pPager) ); + + /* An optimization. If the database was not actually modified during + ** this transaction, the pager is running in exclusive-mode and is + ** using persistent journals, then this function is a no-op. + ** + ** The start of the journal file currently contains a single journal + ** header with the nRec field set to 0. If such a journal is used as + ** a hot-journal during hot-journal rollback, 0 changes will be made + ** to the database file. So there is no need to zero the journal + ** header. Since the pager is in exclusive mode, there is no need + ** to drop any locks either. + */ + if( pPager->eState==PAGER_WRITER_LOCKED + && pPager->exclusiveMode + && pPager->journalMode==PAGER_JOURNALMODE_PERSIST + ){ + assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) || !pPager->journalOff ); + pPager->eState = PAGER_READER; + return SQLITE_OK; + } + + PAGERTRACE(("COMMIT %d\n", PAGERID(pPager))); + pPager->iDataVersion++; + rc = pager_end_transaction(pPager, pPager->setMaster, 1); + return pager_error(pPager, rc); +} + +/* +** If a write transaction is open, then all changes made within the +** transaction are reverted and the current write-transaction is closed. +** The pager falls back to PAGER_READER state if successful, or PAGER_ERROR +** state if an error occurs. +** +** If the pager is already in PAGER_ERROR state when this function is called, +** it returns Pager.errCode immediately. No work is performed in this case. +** +** Otherwise, in rollback mode, this function performs two functions: +** +** 1) It rolls back the journal file, restoring all database file and +** in-memory cache pages to the state they were in when the transaction +** was opened, and +** +** 2) It finalizes the journal file, so that it is not used for hot +** rollback at any point in the future. +** +** Finalization of the journal file (task 2) is only performed if the +** rollback is successful. +** +** In WAL mode, all cache-entries containing data modified within the +** current transaction are either expelled from the cache or reverted to +** their pre-transaction state by re-reading data from the database or +** WAL files. The WAL transaction is then closed. +*/ +SQLITE_PRIVATE int sqlite3PagerRollback(Pager *pPager){ + int rc = SQLITE_OK; /* Return code */ + PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager))); + + /* PagerRollback() is a no-op if called in READER or OPEN state. If + ** the pager is already in the ERROR state, the rollback is not + ** attempted here. Instead, the error code is returned to the caller. + */ + assert( assert_pager_state(pPager) ); + if( pPager->eState==PAGER_ERROR ) return pPager->errCode; + if( pPager->eState<=PAGER_READER ) return SQLITE_OK; + + if( pagerUseWal(pPager) ){ + int rc2; + rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1); + rc2 = pager_end_transaction(pPager, pPager->setMaster, 0); + if( rc==SQLITE_OK ) rc = rc2; + }else if( !isOpen(pPager->jfd) || pPager->eState==PAGER_WRITER_LOCKED ){ + int eState = pPager->eState; + rc = pager_end_transaction(pPager, 0, 0); + if( eState>PAGER_WRITER_LOCKED ){ + /* This can happen using journal_mode=off. Move the pager to the error + ** state to indicate that the contents of the cache may not be trusted. + ** Any active readers will get SQLITE_ABORT. + */ + pPager->errCode = SQLITE_ABORT; + pPager->eState = PAGER_ERROR; + return rc; + } + }else{ + rc = pager_playback(pPager, 0); + } + + assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK ); + assert( rc==SQLITE_OK || rc==SQLITE_FULL || rc==SQLITE_CORRUPT + || rc==SQLITE_NOMEM || (rc&0xFF)==SQLITE_IOERR + || rc==SQLITE_CANTOPEN + ); + + /* If an error occurs during a ROLLBACK, we can no longer trust the pager + ** cache. So call pager_error() on the way out to make any error persistent. + */ + return pager_error(pPager, rc); +} + +/* +** Return TRUE if the database file is opened read-only. Return FALSE +** if the database is (in theory) writable. +*/ +SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager *pPager){ + return pPager->readOnly; +} + +#ifdef SQLITE_DEBUG +/* +** Return the sum of the reference counts for all pages held by pPager. +*/ +SQLITE_PRIVATE int sqlite3PagerRefcount(Pager *pPager){ + return sqlite3PcacheRefCount(pPager->pPCache); +} +#endif + +#if 0 +/* +** Return the approximate number of bytes of memory currently +** used by the pager and its associated cache. +*/ +SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager *pPager){ + int perPageSize = pPager->pageSize + pPager->nExtra + sizeof(PgHdr) + + 5*sizeof(void*); + return perPageSize*sqlite3PcachePagecount(pPager->pPCache) + + sqlite3MallocSize(pPager) + + pPager->pageSize; +} +#endif + +/* +** Return the number of references to the specified page. +*/ +SQLITE_PRIVATE int sqlite3PagerPageRefcount(DbPage *pPage){ + return sqlite3PcachePageRefcount(pPage); +} + +#ifdef SQLITE_TEST +/* +** This routine is used for testing and analysis only. +*/ +SQLITE_PRIVATE int *sqlite3PagerStats(Pager *pPager){ + static int a[11]; + a[0] = sqlite3PcacheRefCount(pPager->pPCache); + a[1] = sqlite3PcachePagecount(pPager->pPCache); + a[2] = sqlite3PcacheGetCachesize(pPager->pPCache); + a[3] = pPager->eState==PAGER_OPEN ? -1 : (int) pPager->dbSize; + a[4] = pPager->eState; + a[5] = pPager->errCode; + a[6] = pPager->aStat[PAGER_STAT_HIT]; + a[7] = pPager->aStat[PAGER_STAT_MISS]; + a[8] = 0; /* Used to be pPager->nOvfl */ + a[9] = pPager->nRead; + a[10] = pPager->aStat[PAGER_STAT_WRITE]; + return a; +} +#endif + +#if 0 +/* +** Parameter eStat must be either SQLITE_DBSTATUS_CACHE_HIT or +** SQLITE_DBSTATUS_CACHE_MISS. Before returning, *pnVal is incremented by the +** current cache hit or miss count, according to the value of eStat. If the +** reset parameter is non-zero, the cache hit or miss count is zeroed before +** returning. +*/ +SQLITE_PRIVATE void sqlite3PagerCacheStat(Pager *pPager, int eStat, int reset, int *pnVal){ + + assert( eStat==SQLITE_DBSTATUS_CACHE_HIT + || eStat==SQLITE_DBSTATUS_CACHE_MISS + || eStat==SQLITE_DBSTATUS_CACHE_WRITE + ); + + assert( SQLITE_DBSTATUS_CACHE_HIT+1==SQLITE_DBSTATUS_CACHE_MISS ); + assert( SQLITE_DBSTATUS_CACHE_HIT+2==SQLITE_DBSTATUS_CACHE_WRITE ); + assert( PAGER_STAT_HIT==0 && PAGER_STAT_MISS==1 && PAGER_STAT_WRITE==2 ); + + *pnVal += pPager->aStat[eStat - SQLITE_DBSTATUS_CACHE_HIT]; + if( reset ){ + pPager->aStat[eStat - SQLITE_DBSTATUS_CACHE_HIT] = 0; + } +} +#endif + +/* +** Check that there are at least nSavepoint savepoints open. If there are +** currently less than nSavepoints open, then open one or more savepoints +** to make up the difference. If the number of savepoints is already +** equal to nSavepoint, then this function is a no-op. +** +** If a memory allocation fails, SQLITE_NOMEM is returned. If an error +** occurs while opening the sub-journal file, then an IO error code is +** returned. Otherwise, SQLITE_OK. +*/ +static SQLITE_NOINLINE int pagerOpenSavepoint(Pager *pPager, int nSavepoint){ + int rc = SQLITE_OK; /* Return code */ + int nCurrent = pPager->nSavepoint; /* Current number of savepoints */ + int ii; /* Iterator variable */ + PagerSavepoint *aNew; /* New Pager.aSavepoint array */ + + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + assert( nSavepoint>nCurrent && pPager->useJournal ); + + /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM + ** if the allocation fails. Otherwise, zero the new portion in case a + ** malloc failure occurs while populating it in the for(...) loop below. + */ + aNew = (PagerSavepoint *)sqlite3Realloc( + pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint + ); + if( !aNew ){ + return SQLITE_NOMEM; + } + memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint)); + pPager->aSavepoint = aNew; + + /* Populate the PagerSavepoint structures just allocated. */ + for(ii=nCurrent; ii<nSavepoint; ii++){ + aNew[ii].nOrig = pPager->dbSize; + if( isOpen(pPager->jfd) && pPager->journalOff>0 ){ + aNew[ii].iOffset = pPager->journalOff; + }else{ + aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); + } + aNew[ii].iSubRec = pPager->nSubRec; + aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); + if( !aNew[ii].pInSavepoint ){ + return SQLITE_NOMEM; + } + if( pagerUseWal(pPager) ){ + sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData); + } + pPager->nSavepoint = ii+1; + } + assert( pPager->nSavepoint==nSavepoint ); + assertTruncateConstraint(pPager); + return rc; +} +SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + + if( nSavepoint>pPager->nSavepoint && pPager->useJournal ){ + return pagerOpenSavepoint(pPager, nSavepoint); + }else{ + return SQLITE_OK; + } +} + + +/* +** This function is called to rollback or release (commit) a savepoint. +** The savepoint to release or rollback need not be the most recently +** created savepoint. +** +** Parameter op is always either SAVEPOINT_ROLLBACK or SAVEPOINT_RELEASE. +** If it is SAVEPOINT_RELEASE, then release and destroy the savepoint with +** index iSavepoint. If it is SAVEPOINT_ROLLBACK, then rollback all changes +** that have occurred since the specified savepoint was created. +** +** The savepoint to rollback or release is identified by parameter +** iSavepoint. A value of 0 means to operate on the outermost savepoint +** (the first created). A value of (Pager.nSavepoint-1) means operate +** on the most recently created savepoint. If iSavepoint is greater than +** (Pager.nSavepoint-1), then this function is a no-op. +** +** If a negative value is passed to this function, then the current +** transaction is rolled back. This is different to calling +** sqlite3PagerRollback() because this function does not terminate +** the transaction or unlock the database, it just restores the +** contents of the database to its original state. +** +** In any case, all savepoints with an index greater than iSavepoint +** are destroyed. If this is a release operation (op==SAVEPOINT_RELEASE), +** then savepoint iSavepoint is also destroyed. +** +** This function may return SQLITE_NOMEM if a memory allocation fails, +** or an IO error code if an IO error occurs while rolling back a +** savepoint. If no errors occur, SQLITE_OK is returned. +*/ +SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){ + int rc = pPager->errCode; /* Return code */ + + assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); + assert( iSavepoint>=0 || op==SAVEPOINT_ROLLBACK ); + + if( rc==SQLITE_OK && iSavepoint<pPager->nSavepoint ){ + int ii; /* Iterator variable */ + int nNew; /* Number of remaining savepoints after this op. */ + + /* Figure out how many savepoints will still be active after this + ** operation. Store this value in nNew. Then free resources associated + ** with any savepoints that are destroyed by this operation. + */ + nNew = iSavepoint + (( op==SAVEPOINT_RELEASE ) ? 0 : 1); + for(ii=nNew; ii<pPager->nSavepoint; ii++){ + sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint); + } + pPager->nSavepoint = nNew; + + /* If this is a release of the outermost savepoint, truncate + ** the sub-journal to zero bytes in size. */ + if( op==SAVEPOINT_RELEASE ){ + if( nNew==0 && isOpen(pPager->sjfd) ){ + /* Only truncate if it is an in-memory sub-journal. */ + if( sqlite3IsMemJournal(pPager->sjfd) ){ + rc = sqlite3OsTruncate(pPager->sjfd, 0); + assert( rc==SQLITE_OK ); + } + pPager->nSubRec = 0; + } + } + /* Else this is a rollback operation, playback the specified savepoint. + ** If this is a temp-file, it is possible that the journal file has + ** not yet been opened. In this case there have been no changes to + ** the database file, so the playback operation can be skipped. + */ + else if( pagerUseWal(pPager) || isOpen(pPager->jfd) ){ + PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1]; + rc = pagerPlaybackSavepoint(pPager, pSavepoint); + assert(rc!=SQLITE_DONE); + } + } + + return rc; +} + +/* +** Return the full pathname of the database file. +** +** Except, if the pager is in-memory only, then return an empty string if +** nullIfMemDb is true. This routine is called with nullIfMemDb==1 when +** used to report the filename to the user, for compatibility with legacy +** behavior. But when the Btree needs to know the filename for matching to +** shared cache, it uses nullIfMemDb==0 so that in-memory databases can +** participate in shared-cache. +*/ +SQLITE_PRIVATE const char *sqlite3PagerFilename(Pager *pPager, int nullIfMemDb){ + return (nullIfMemDb && pPager->memDb) ? "" : pPager->zFilename; +} + +/* +** Return the VFS structure for the pager. +*/ +SQLITE_PRIVATE const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){ + return pPager->pVfs; +} + +#ifdef SQLITE_DIRECT_OVERFLOW_READ +/* +** Return the file handle for the database file associated +** with the pager. This might return NULL if the file has +** not yet been opened. +*/ +SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager *pPager){ + return pPager->fd; +} +#endif + +/* +** Return the full pathname of the journal file. +*/ +SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager *pPager){ + return pPager->zJournal; +} + +/* +** Return true if fsync() calls are disabled for this pager. Return FALSE +** if fsync()s are executed normally. +*/ +SQLITE_PRIVATE int sqlite3PagerNosync(Pager *pPager){ + return pPager->noSync; +} + +#ifdef SQLITE_HAS_CODEC +/* +** Set or retrieve the codec for this pager +*/ +SQLITE_PRIVATE void sqlite3PagerSetCodec( + Pager *pPager, + void *(*xCodec)(void*,void*,Pgno,int), + void (*xCodecSizeChng)(void*,int,int), + void (*xCodecFree)(void*), + void *pCodec +){ + if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec); + pPager->xCodec = pPager->memDb ? 0 : xCodec; + pPager->xCodecSizeChng = xCodecSizeChng; + pPager->xCodecFree = xCodecFree; + pPager->pCodec = pCodec; + pagerReportSize(pPager); +} +SQLITE_PRIVATE void *sqlite3PagerGetCodec(Pager *pPager){ + return pPager->pCodec; +} + +/* +** This function is called by the wal module when writing page content +** into the log file. +** +** This function returns a pointer to a buffer containing the encrypted +** page content. If a malloc fails, this function may return NULL. +*/ +SQLITE_PRIVATE void *sqlite3PagerCodec(PgHdr *pPg){ + void *aData = 0; + CODEC2(pPg->pPager, pPg->pData, pPg->pgno, 6, return 0, aData); + return aData; +} + +/* +** Return the current pager state +*/ +SQLITE_PRIVATE int sqlite3PagerState(Pager *pPager){ + return pPager->eState; +} +#endif /* SQLITE_HAS_CODEC */ + +#ifndef SQLITE_OMIT_AUTOVACUUM +/* +** Move the page pPg to location pgno in the file. +** +** There must be no references to the page previously located at +** pgno (which we call pPgOld) though that page is allowed to be +** in cache. If the page previously located at pgno is not already +** in the rollback journal, it is not put there by by this routine. +** +** References to the page pPg remain valid. Updating any +** meta-data associated with pPg (i.e. data stored in the nExtra bytes +** allocated along with the page) is the responsibility of the caller. +** +** A transaction must be active when this routine is called. It used to be +** required that a statement transaction was not active, but this restriction +** has been removed (CREATE INDEX needs to move a page when a statement +** transaction is active). +** +** If the fourth argument, isCommit, is non-zero, then this page is being +** moved as part of a database reorganization just before the transaction +** is being committed. In this case, it is guaranteed that the database page +** pPg refers to will not be written to again within this transaction. +** +** This function may return SQLITE_NOMEM or an IO error code if an error +** occurs. Otherwise, it returns SQLITE_OK. +*/ +SQLITE_PRIVATE int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){ + PgHdr *pPgOld; /* The page being overwritten. */ + Pgno needSyncPgno = 0; /* Old value of pPg->pgno, if sync is required */ + int rc; /* Return code */ + + assert( pPg->nRef>0 ); + assert( pPager->eState==PAGER_WRITER_CACHEMOD + || pPager->eState==PAGER_WRITER_DBMOD + ); + assert( assert_pager_state(pPager) ); + + /* If the page being moved is dirty and has not been saved by the latest + ** savepoint, then save the current contents of the page into the + ** sub-journal now. This is required to handle the following scenario: + ** + ** BEGIN; + ** <journal page X, then modify it in memory> + ** SAVEPOINT one; + ** <Move page X to location Y> + ** ROLLBACK TO one; + ** + ** If page X were not written to the sub-journal here, it would not + ** be possible to restore its contents when the "ROLLBACK TO one" + ** statement were is processed. + ** + ** subjournalPage() may need to allocate space to store pPg->pgno into + ** one or more savepoint bitvecs. This is the reason this function + ** may return SQLITE_NOMEM. + */ + if( (pPg->flags & PGHDR_DIRTY)!=0 + && SQLITE_OK!=(rc = subjournalPageIfRequired(pPg)) + ){ + return rc; + } + + PAGERTRACE(("MOVE %d page %d (needSync=%d) moves to %d\n", + PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno)); + IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno)) + + /* If the journal needs to be sync()ed before page pPg->pgno can + ** be written to, store pPg->pgno in local variable needSyncPgno. + ** + ** If the isCommit flag is set, there is no need to remember that + ** the journal needs to be sync()ed before database page pPg->pgno + ** can be written to. The caller has already promised not to write to it. + */ + if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){ + needSyncPgno = pPg->pgno; + assert( pPager->journalMode==PAGER_JOURNALMODE_OFF || + pageInJournal(pPager, pPg) || pPg->pgno>pPager->dbOrigSize ); + assert( pPg->flags&PGHDR_DIRTY ); + } + + /* If the cache contains a page with page-number pgno, remove it + ** from its hash chain. Also, if the PGHDR_NEED_SYNC flag was set for + ** page pgno before the 'move' operation, it needs to be retained + ** for the page moved there. + */ + pPg->flags &= ~PGHDR_NEED_SYNC; + pPgOld = sqlite3PagerLookup(pPager, pgno); + assert( !pPgOld || pPgOld->nRef==1 ); + if( pPgOld ){ + pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC); + sqlite3PcacheDrop(pPgOld); + } + + sqlite3PcacheMove(pPg, pgno); + sqlite3PcacheMakeDirty(pPg); + + if( needSyncPgno ){ + /* If needSyncPgno is non-zero, then the journal file needs to be + ** sync()ed before any data is written to database file page needSyncPgno. + ** Currently, no such page exists in the page-cache and the + ** "is journaled" bitvec flag has been set. This needs to be remedied by + ** loading the page into the pager-cache and setting the PGHDR_NEED_SYNC + ** flag. + ** + ** If the attempt to load the page into the page-cache fails, (due + ** to a malloc() or IO failure), clear the bit in the pInJournal[] + ** array. Otherwise, if the page is loaded and written again in + ** this transaction, it may be written to the database file before + ** it is synced into the journal file. This way, it may end up in + ** the journal file twice, but that is not a problem. + */ + PgHdr *pPgHdr; + rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr); + if( rc!=SQLITE_OK ){ + if( needSyncPgno<=pPager->dbOrigSize ){ + assert( pPager->pTmpSpace!=0 ); + sqlite3BitvecClear(pPager->pInJournal, needSyncPgno, pPager->pTmpSpace); + } + return rc; + } + pPgHdr->flags |= PGHDR_NEED_SYNC; + sqlite3PcacheMakeDirty(pPgHdr); + sqlite3PagerUnrefNotNull(pPgHdr); + } + + return SQLITE_OK; +} +#endif + +/* +** The page handle passed as the first argument refers to a dirty page +** with a page number other than iNew. This function changes the page's +** page number to iNew and sets the value of the PgHdr.flags field to +** the value passed as the third parameter. +*/ +SQLITE_PRIVATE void sqlite3PagerRekey(DbPage *pPg, Pgno iNew, u16 flags){ + assert( pPg->pgno!=iNew ); + pPg->flags = flags; + sqlite3PcacheMove(pPg, iNew); +} + +/* +** Return a pointer to the data for the specified page. +*/ +SQLITE_PRIVATE void *sqlite3PagerGetData(DbPage *pPg){ + assert( pPg->nRef>0 || pPg->pPager->memDb ); + return pPg->pData; +} + +/* +** Return a pointer to the Pager.nExtra bytes of "extra" space +** allocated along with the specified page. +*/ +SQLITE_PRIVATE void *sqlite3PagerGetExtra(DbPage *pPg){ + return pPg->pExtra; +} + +#if 0 +/* +** Get/set the locking-mode for this pager. Parameter eMode must be one +** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or +** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then +** the locking-mode is set to the value specified. +** +** The returned value is either PAGER_LOCKINGMODE_NORMAL or +** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated) +** locking-mode. +*/ +SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *pPager, int eMode){ + assert( eMode==PAGER_LOCKINGMODE_QUERY + || eMode==PAGER_LOCKINGMODE_NORMAL + || eMode==PAGER_LOCKINGMODE_EXCLUSIVE ); + assert( PAGER_LOCKINGMODE_QUERY<0 ); + assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 ); + assert( pPager->exclusiveMode || 0==sqlite3WalHeapMemory(pPager->pWal) ); + if( eMode>=0 && !pPager->tempFile && !sqlite3WalHeapMemory(pPager->pWal) ){ + pPager->exclusiveMode = (u8)eMode; + } + return (int)pPager->exclusiveMode; +} + +/* +** Set the journal-mode for this pager. Parameter eMode must be one of: +** +** PAGER_JOURNALMODE_DELETE +** PAGER_JOURNALMODE_TRUNCATE +** PAGER_JOURNALMODE_PERSIST +** PAGER_JOURNALMODE_OFF +** PAGER_JOURNALMODE_MEMORY +** PAGER_JOURNALMODE_WAL +** +** The journalmode is set to the value specified if the change is allowed. +** The change may be disallowed for the following reasons: +** +** * An in-memory database can only have its journal_mode set to _OFF +** or _MEMORY. +** +** * Temporary databases cannot have _WAL journalmode. +** +** The returned indicate the current (possibly updated) journal-mode. +*/ +SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){ + u8 eOld = pPager->journalMode; /* Prior journalmode */ + +#ifdef SQLITE_DEBUG + /* The print_pager_state() routine is intended to be used by the debugger + ** only. We invoke it once here to suppress a compiler warning. */ + print_pager_state(pPager); +#endif + + + /* The eMode parameter is always valid */ + assert( eMode==PAGER_JOURNALMODE_DELETE + || eMode==PAGER_JOURNALMODE_TRUNCATE + || eMode==PAGER_JOURNALMODE_PERSIST + || eMode==PAGER_JOURNALMODE_OFF + || eMode==PAGER_JOURNALMODE_WAL + || eMode==PAGER_JOURNALMODE_MEMORY ); + + /* This routine is only called from the OP_JournalMode opcode, and + ** the logic there will never allow a temporary file to be changed + ** to WAL mode. + */ + assert( pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL ); + + if( eMode!=eOld ){ + + /* Change the journal mode. */ + assert( pPager->eState!=PAGER_ERROR ); + pPager->journalMode = (u8)eMode; + + /* When transistioning from TRUNCATE or PERSIST to any other journal + ** mode except WAL, unless the pager is in locking_mode=exclusive mode, + ** delete the journal file. + */ + assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 ); + assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 ); + assert( (PAGER_JOURNALMODE_DELETE & 5)==0 ); + assert( (PAGER_JOURNALMODE_MEMORY & 5)==4 ); + assert( (PAGER_JOURNALMODE_OFF & 5)==0 ); + assert( (PAGER_JOURNALMODE_WAL & 5)==5 ); + + assert( isOpen(pPager->fd) || pPager->exclusiveMode ); + if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 ){ + + /* In this case we would like to delete the journal file. If it is + ** not possible, then that is not a problem. Deleting the journal file + ** here is an optimization only. + ** + ** Before deleting the journal file, obtain a RESERVED lock on the + ** database file. This ensures that the journal file is not deleted + ** while it is in use by some other client. + */ + sqlite3OsClose(pPager->jfd); + if( pPager->eLock>=RESERVED_LOCK ){ + sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); + }else{ + int rc = SQLITE_OK; + int state = pPager->eState; + assert( state==PAGER_OPEN || state==PAGER_READER ); + if( state==PAGER_OPEN ){ + rc = sqlite3PagerSharedLock(pPager); + } + if( pPager->eState==PAGER_READER ){ + assert( rc==SQLITE_OK ); + rc = pagerLockDb(pPager, RESERVED_LOCK); + } + if( rc==SQLITE_OK ){ + sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); + } + if( rc==SQLITE_OK && state==PAGER_READER ){ + pagerUnlockDb(pPager, SHARED_LOCK); + }else if( state==PAGER_OPEN ){ + pager_unlock(pPager); + } + assert( state==pPager->eState ); + } + }else if( eMode==PAGER_JOURNALMODE_OFF ){ + sqlite3OsClose(pPager->jfd); + } + } + + /* Return the new journal mode */ + return (int)pPager->journalMode; +} + +/* +** Return the current journal mode. +*/ +SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager *pPager){ + return (int)pPager->journalMode; +} + +/* +** Return TRUE if the pager is in a state where it is OK to change the +** journalmode. Journalmode changes can only happen when the database +** is unmodified. +*/ +SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager *pPager){ + assert( assert_pager_state(pPager) ); + if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0; + if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0; + return 1; +} + +/* +** Get/set the size-limit used for persistent journal files. +** +** Setting the size limit to -1 means no limit is enforced. +** An attempt to set a limit smaller than -1 is a no-op. +*/ +SQLITE_PRIVATE i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){ + if( iLimit>=-1 ){ + pPager->journalSizeLimit = iLimit; + sqlite3WalLimit(pPager->pWal, iLimit); + } + return pPager->journalSizeLimit; +} +#endif + +#ifndef SQLITE_OMIT_VACUUM +#if 0 +/* +** Clear the pager cache. +*/ +SQLITE_PRIVATE void sqlite3PagerClearCache(Pager *pPager){ + if( pPager->tempFile==0 ) pager_reset(pPager); +} +#endif +#endif + +#ifndef SQLITE_OMIT_WAL +/* +** This function is called when the user invokes "PRAGMA wal_checkpoint", +** "PRAGMA wal_blocking_checkpoint" or calls the sqlite3_wal_checkpoint() +** or wal_blocking_checkpoint() API functions. +** +** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. +*/ +SQLITE_PRIVATE int sqlite3PagerCheckpoint(Pager *pPager, int eMode, int *pnLog, int *pnCkpt){ + int rc = SQLITE_OK; + if( pPager->pWal ){ + rc = sqlite3WalCheckpoint(pPager->pWal, eMode, + (eMode==SQLITE_CHECKPOINT_PASSIVE ? 0 : pPager->xBusyHandler), + pPager->pBusyHandlerArg, + pPager->ckptSyncFlags, pPager->pageSize, (u8 *)pPager->pTmpSpace, + pnLog, pnCkpt + ); + } + return rc; +} + +#if 0 +SQLITE_PRIVATE int sqlite3PagerWalCallback(Pager *pPager){ + return sqlite3WalCallback(pPager->pWal); +} +#endif + +/* +** Return true if the underlying VFS for the given pager supports the +** primitives necessary for write-ahead logging. +*/ +SQLITE_PRIVATE int sqlite3PagerWalSupported(Pager *pPager){ + const sqlite3_io_methods *pMethods = pPager->fd->pMethods; + return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap); +} + +/* +** Attempt to take an exclusive lock on the database file. If a PENDING lock +** is obtained instead, immediately release it. +*/ +static int pagerExclusiveLock(Pager *pPager){ + int rc; /* Return code */ + + assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK ); + rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); + if( rc!=SQLITE_OK ){ + /* If the attempt to grab the exclusive lock failed, release the + ** pending lock that may have been obtained instead. */ + pagerUnlockDb(pPager, SHARED_LOCK); + } + + return rc; +} + +/* +** Call sqlite3WalOpen() to open the WAL handle. If the pager is in +** exclusive-locking mode when this function is called, take an EXCLUSIVE +** lock on the database file and use heap-memory to store the wal-index +** in. Otherwise, use the normal shared-memory. +*/ +static int pagerOpenWal(Pager *pPager){ + int rc = SQLITE_OK; + + assert( pPager->pWal==0 && pPager->tempFile==0 ); + assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK ); + + /* If the pager is already in exclusive-mode, the WAL module will use + ** heap-memory for the wal-index instead of the VFS shared-memory + ** implementation. Take the exclusive lock now, before opening the WAL + ** file, to make sure this is safe. + */ + if( pPager->exclusiveMode ){ + rc = pagerExclusiveLock(pPager); + } + + /* Open the connection to the log file. If this operation fails, + ** (e.g. due to malloc() failure), return an error code. + */ + if( rc==SQLITE_OK ){ + rc = sqlite3WalOpen(pPager->pVfs, + pPager->fd, pPager->zWal, pPager->exclusiveMode, + pPager->journalSizeLimit, &pPager->pWal + ); + } + pagerFixMaplimit(pPager); + + return rc; +} + + +/* +** The caller must be holding a SHARED lock on the database file to call +** this function. +** +** If the pager passed as the first argument is open on a real database +** file (not a temp file or an in-memory database), and the WAL file +** is not already open, make an attempt to open it now. If successful, +** return SQLITE_OK. If an error occurs or the VFS used by the pager does +** not support the xShmXXX() methods, return an error code. *pbOpen is +** not modified in either case. +** +** If the pager is open on a temp-file (or in-memory database), or if +** the WAL file is already open, set *pbOpen to 1 and return SQLITE_OK +** without doing anything. +*/ +SQLITE_PRIVATE int sqlite3PagerOpenWal( + Pager *pPager, /* Pager object */ + int *pbOpen /* OUT: Set to true if call is a no-op */ +){ + int rc = SQLITE_OK; /* Return code */ + + assert( assert_pager_state(pPager) ); + assert( pPager->eState==PAGER_OPEN || pbOpen ); + assert( pPager->eState==PAGER_READER || !pbOpen ); + assert( pbOpen==0 || *pbOpen==0 ); + assert( pbOpen!=0 || (!pPager->tempFile && !pPager->pWal) ); + + if( !pPager->tempFile && !pPager->pWal ){ + if( !sqlite3PagerWalSupported(pPager) ) return SQLITE_CANTOPEN; + + /* Close any rollback journal previously open */ + sqlite3OsClose(pPager->jfd); + + rc = pagerOpenWal(pPager); + if( rc==SQLITE_OK ){ + pPager->journalMode = PAGER_JOURNALMODE_WAL; + pPager->eState = PAGER_OPEN; + } + }else{ + *pbOpen = 1; + } + + return rc; +} + +/* +** This function is called to close the connection to the log file prior +** to switching from WAL to rollback mode. +** +** Before closing the log file, this function attempts to take an +** EXCLUSIVE lock on the database file. If this cannot be obtained, an +** error (SQLITE_BUSY) is returned and the log connection is not closed. +** If successful, the EXCLUSIVE lock is not released before returning. +*/ +#if 0 +SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager){ + int rc = SQLITE_OK; + + assert( pPager->journalMode==PAGER_JOURNALMODE_WAL ); + + /* If the log file is not already open, but does exist in the file-system, + ** it may need to be checkpointed before the connection can switch to + ** rollback mode. Open it now so this can happen. + */ + if( !pPager->pWal ){ + int logexists = 0; + rc = pagerLockDb(pPager, SHARED_LOCK); + if( rc==SQLITE_OK ){ + rc = sqlite3OsAccess( + pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &logexists + ); + } + if( rc==SQLITE_OK && logexists ){ + rc = pagerOpenWal(pPager); + } + } + + /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on + ** the database file, the log and log-summary files will be deleted. + */ + if( rc==SQLITE_OK && pPager->pWal ){ + rc = pagerExclusiveLock(pPager); + if( rc==SQLITE_OK ){ + rc = sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, + pPager->pageSize, (u8*)pPager->pTmpSpace); + pPager->pWal = 0; + pagerFixMaplimit(pPager); + } + } + return rc; +} +#endif +#endif /* !SQLITE_OMIT_WAL */ + +#ifdef SQLITE_ENABLE_ZIPVFS +/* +** A read-lock must be held on the pager when this function is called. If +** the pager is in WAL mode and the WAL file currently contains one or more +** frames, return the size in bytes of the page images stored within the +** WAL frames. Otherwise, if this is not a WAL database or the WAL file +** is empty, return 0. +*/ +SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager){ + assert( pPager->eState>=PAGER_READER ); + return sqlite3WalFramesize(pPager->pWal); +} +#endif + + +#endif /* SQLITE_OMIT_DISKIO */ + +/************** End of pager.c ***********************************************/ +/************** Begin file wal.c *********************************************/ +/* +** 2010 February 1 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains the implementation of a write-ahead log (WAL) used in +** "journal_mode=WAL" mode. +** +** WRITE-AHEAD LOG (WAL) FILE FORMAT +** +** A WAL file consists of a header followed by zero or more "frames". +** Each frame records the revised content of a single page from the +** database file. All changes to the database are recorded by writing +** frames into the WAL. Transactions commit when a frame is written that +** contains a commit marker. A single WAL can and usually does record +** multiple transactions. Periodically, the content of the WAL is +** transferred back into the database file in an operation called a +** "checkpoint". +** +** A single WAL file can be used multiple times. In other words, the +** WAL can fill up with frames and then be checkpointed and then new +** frames can overwrite the old ones. A WAL always grows from beginning +** toward the end. Checksums and counters attached to each frame are +** used to determine which frames within the WAL are valid and which +** are leftovers from prior checkpoints. +** +** The WAL header is 32 bytes in size and consists of the following eight +** big-endian 32-bit unsigned integer values: +** +** 0: Magic number. 0x377f0682 or 0x377f0683 +** 4: File format version. Currently 3007000 +** 8: Database page size. Example: 1024 +** 12: Checkpoint sequence number +** 16: Salt-1, random integer incremented with each checkpoint +** 20: Salt-2, a different random integer changing with each ckpt +** 24: Checksum-1 (first part of checksum for first 24 bytes of header). +** 28: Checksum-2 (second part of checksum for first 24 bytes of header). +** +** Immediately following the wal-header are zero or more frames. Each +** frame consists of a 24-byte frame-header followed by a <page-size> bytes +** of page data. The frame-header is six big-endian 32-bit unsigned +** integer values, as follows: +** +** 0: Page number. +** 4: For commit records, the size of the database image in pages +** after the commit. For all other records, zero. +** 8: Salt-1 (copied from the header) +** 12: Salt-2 (copied from the header) +** 16: Checksum-1. +** 20: Checksum-2. +** +** A frame is considered valid if and only if the following conditions are +** true: +** +** (1) The salt-1 and salt-2 values in the frame-header match +** salt values in the wal-header +** +** (2) The checksum values in the final 8 bytes of the frame-header +** exactly match the checksum computed consecutively on the +** WAL header and the first 8 bytes and the content of all frames +** up to and including the current frame. +** +** The checksum is computed using 32-bit big-endian integers if the +** magic number in the first 4 bytes of the WAL is 0x377f0683 and it +** is computed using little-endian if the magic number is 0x377f0682. +** The checksum values are always stored in the frame header in a +** big-endian format regardless of which byte order is used to compute +** the checksum. The checksum is computed by interpreting the input as +** an even number of unsigned 32-bit integers: x[0] through x[N]. The +** algorithm used for the checksum is as follows: +** +** for i from 0 to n-1 step 2: +** s0 += x[i] + s1; +** s1 += x[i+1] + s0; +** endfor +** +** Note that s0 and s1 are both weighted checksums using fibonacci weights +** in reverse order (the largest fibonacci weight occurs on the first element +** of the sequence being summed.) The s1 value spans all 32-bit +** terms of the sequence whereas s0 omits the final term. +** +** On a checkpoint, the WAL is first VFS.xSync-ed, then valid content of the +** WAL is transferred into the database, then the database is VFS.xSync-ed. +** The VFS.xSync operations serve as write barriers - all writes launched +** before the xSync must complete before any write that launches after the +** xSync begins. +** +** After each checkpoint, the salt-1 value is incremented and the salt-2 +** value is randomized. This prevents old and new frames in the WAL from +** being considered valid at the same time and being checkpointing together +** following a crash. +** +** READER ALGORITHM +** +** To read a page from the database (call it page number P), a reader +** first checks the WAL to see if it contains page P. If so, then the +** last valid instance of page P that is a followed by a commit frame +** or is a commit frame itself becomes the value read. If the WAL +** contains no copies of page P that are valid and which are a commit +** frame or are followed by a commit frame, then page P is read from +** the database file. +** +** To start a read transaction, the reader records the index of the last +** valid frame in the WAL. The reader uses this recorded "mxFrame" value +** for all subsequent read operations. New transactions can be appended +** to the WAL, but as long as the reader uses its original mxFrame value +** and ignores the newly appended content, it will see a consistent snapshot +** of the database from a single point in time. This technique allows +** multiple concurrent readers to view different versions of the database +** content simultaneously. +** +** The reader algorithm in the previous paragraphs works correctly, but +** because frames for page P can appear anywhere within the WAL, the +** reader has to scan the entire WAL looking for page P frames. If the +** WAL is large (multiple megabytes is typical) that scan can be slow, +** and read performance suffers. To overcome this problem, a separate +** data structure called the wal-index is maintained to expedite the +** search for frames of a particular page. +** +** WAL-INDEX FORMAT +** +** Conceptually, the wal-index is shared memory, though VFS implementations +** might choose to implement the wal-index using a mmapped file. Because +** the wal-index is shared memory, SQLite does not support journal_mode=WAL +** on a network filesystem. All users of the database must be able to +** share memory. +** +** The wal-index is transient. After a crash, the wal-index can (and should +** be) reconstructed from the original WAL file. In fact, the VFS is required +** to either truncate or zero the header of the wal-index when the last +** connection to it closes. Because the wal-index is transient, it can +** use an architecture-specific format; it does not have to be cross-platform. +** Hence, unlike the database and WAL file formats which store all values +** as big endian, the wal-index can store multi-byte values in the native +** byte order of the host computer. +** +** The purpose of the wal-index is to answer this question quickly: Given +** a page number P and a maximum frame index M, return the index of the +** last frame in the wal before frame M for page P in the WAL, or return +** NULL if there are no frames for page P in the WAL prior to M. +** +** The wal-index consists of a header region, followed by an one or +** more index blocks. +** +** The wal-index header contains the total number of frames within the WAL +** in the mxFrame field. +** +** Each index block except for the first contains information on +** HASHTABLE_NPAGE frames. The first index block contains information on +** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and +** HASHTABLE_NPAGE are selected so that together the wal-index header and +** first index block are the same size as all other index blocks in the +** wal-index. +** +** Each index block contains two sections, a page-mapping that contains the +** database page number associated with each wal frame, and a hash-table +** that allows readers to query an index block for a specific page number. +** The page-mapping is an array of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE +** for the first index block) 32-bit page numbers. The first entry in the +** first index-block contains the database page number corresponding to the +** first frame in the WAL file. The first entry in the second index block +** in the WAL file corresponds to the (HASHTABLE_NPAGE_ONE+1)th frame in +** the log, and so on. +** +** The last index block in a wal-index usually contains less than the full +** complement of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE) page-numbers, +** depending on the contents of the WAL file. This does not change the +** allocated size of the page-mapping array - the page-mapping array merely +** contains unused entries. +** +** Even without using the hash table, the last frame for page P +** can be found by scanning the page-mapping sections of each index block +** starting with the last index block and moving toward the first, and +** within each index block, starting at the end and moving toward the +** beginning. The first entry that equals P corresponds to the frame +** holding the content for that page. +** +** The hash table consists of HASHTABLE_NSLOT 16-bit unsigned integers. +** HASHTABLE_NSLOT = 2*HASHTABLE_NPAGE, and there is one entry in the +** hash table for each page number in the mapping section, so the hash +** table is never more than half full. The expected number of collisions +** prior to finding a match is 1. Each entry of the hash table is an +** 1-based index of an entry in the mapping section of the same +** index block. Let K be the 1-based index of the largest entry in +** the mapping section. (For index blocks other than the last, K will +** always be exactly HASHTABLE_NPAGE (4096) and for the last index block +** K will be (mxFrame%HASHTABLE_NPAGE).) Unused slots of the hash table +** contain a value of 0. +** +** To look for page P in the hash table, first compute a hash iKey on +** P as follows: +** +** iKey = (P * 383) % HASHTABLE_NSLOT +** +** Then start scanning entries of the hash table, starting with iKey +** (wrapping around to the beginning when the end of the hash table is +** reached) until an unused hash slot is found. Let the first unused slot +** be at index iUnused. (iUnused might be less than iKey if there was +** wrap-around.) Because the hash table is never more than half full, +** the search is guaranteed to eventually hit an unused entry. Let +** iMax be the value between iKey and iUnused, closest to iUnused, +** where aHash[iMax]==P. If there is no iMax entry (if there exists +** no hash slot such that aHash[i]==p) then page P is not in the +** current index block. Otherwise the iMax-th mapping entry of the +** current index block corresponds to the last entry that references +** page P. +** +** A hash search begins with the last index block and moves toward the +** first index block, looking for entries corresponding to page P. On +** average, only two or three slots in each index block need to be +** examined in order to either find the last entry for page P, or to +** establish that no such entry exists in the block. Each index block +** holds over 4000 entries. So two or three index blocks are sufficient +** to cover a typical 10 megabyte WAL file, assuming 1K pages. 8 or 10 +** comparisons (on average) suffice to either locate a frame in the +** WAL or to establish that the frame does not exist in the WAL. This +** is much faster than scanning the entire 10MB WAL. +** +** Note that entries are added in order of increasing K. Hence, one +** reader might be using some value K0 and a second reader that started +** at a later time (after additional transactions were added to the WAL +** and to the wal-index) might be using a different value K1, where K1>K0. +** Both readers can use the same hash table and mapping section to get +** the correct result. There may be entries in the hash table with +** K>K0 but to the first reader, those entries will appear to be unused +** slots in the hash table and so the first reader will get an answer as +** if no values greater than K0 had ever been inserted into the hash table +** in the first place - which is what reader one wants. Meanwhile, the +** second reader using K1 will see additional values that were inserted +** later, which is exactly what reader two wants. +** +** When a rollback occurs, the value of K is decreased. Hash table entries +** that correspond to frames greater than the new K value are removed +** from the hash table at this point. +*/ +#ifndef SQLITE_OMIT_WAL + +/* #include "wal.h" */ + +/* +** Trace output macros +*/ +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) +SQLITE_PRIVATE int sqlite3WalTrace = 0; +# define WALTRACE(X) if(sqlite3WalTrace) sqlite3DebugPrintf X +#else +# define WALTRACE(X) +#endif + +/* +** The maximum (and only) versions of the wal and wal-index formats +** that may be interpreted by this version of SQLite. +** +** If a client begins recovering a WAL file and finds that (a) the checksum +** values in the wal-header are correct and (b) the version field is not +** WAL_MAX_VERSION, recovery fails and SQLite returns SQLITE_CANTOPEN. +** +** Similarly, if a client successfully reads a wal-index header (i.e. the +** checksum test is successful) and finds that the version field is not +** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite +** returns SQLITE_CANTOPEN. +*/ +#define WAL_MAX_VERSION 3007000 +#define WALINDEX_MAX_VERSION 3007000 + +/* +** Indices of various locking bytes. WAL_NREADER is the number +** of available reader locks and should be at least 3. +*/ +#define WAL_WRITE_LOCK 0 +#define WAL_ALL_BUT_WRITE 1 +#define WAL_CKPT_LOCK 1 +#define WAL_RECOVER_LOCK 2 +#define WAL_READ_LOCK(I) (3+(I)) +#define WAL_NREADER (SQLITE_SHM_NLOCK-3) + + +/* Object declarations */ +typedef struct WalIndexHdr WalIndexHdr; +typedef struct WalIterator WalIterator; +typedef struct WalCkptInfo WalCkptInfo; + + +/* +** The following object holds a copy of the wal-index header content. +** +** The actual header in the wal-index consists of two copies of this +** object. +** +** The szPage value can be any power of 2 between 512 and 32768, inclusive. +** Or it can be 1 to represent a 65536-byte page. The latter case was +** added in 3.7.1 when support for 64K pages was added. +*/ +struct WalIndexHdr { + u32 iVersion; /* Wal-index version */ + u32 unused; /* Unused (padding) field */ + u32 iChange; /* Counter incremented each transaction */ + u8 isInit; /* 1 when initialized */ + u8 bigEndCksum; /* True if checksums in WAL are big-endian */ + u16 szPage; /* Database page size in bytes. 1==64K */ + u32 mxFrame; /* Index of last valid frame in the WAL */ + u32 nPage; /* Size of database in pages */ + u32 aFrameCksum[2]; /* Checksum of last frame in log */ + u32 aSalt[2]; /* Two salt values copied from WAL header */ + u32 aCksum[2]; /* Checksum over all prior fields */ +}; + +/* +** A copy of the following object occurs in the wal-index immediately +** following the second copy of the WalIndexHdr. This object stores +** information used by checkpoint. +** +** nBackfill is the number of frames in the WAL that have been written +** back into the database. (We call the act of moving content from WAL to +** database "backfilling".) The nBackfill number is never greater than +** WalIndexHdr.mxFrame. nBackfill can only be increased by threads +** holding the WAL_CKPT_LOCK lock (which includes a recovery thread). +** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from +** mxFrame back to zero when the WAL is reset. +** +** There is one entry in aReadMark[] for each reader lock. If a reader +** holds read-lock K, then the value in aReadMark[K] is no greater than +** the mxFrame for that reader. The value READMARK_NOT_USED (0xffffffff) +** for any aReadMark[] means that entry is unused. aReadMark[0] is +** a special case; its value is never used and it exists as a place-holder +** to avoid having to offset aReadMark[] indexs by one. Readers holding +** WAL_READ_LOCK(0) always ignore the entire WAL and read all content +** directly from the database. +** +** The value of aReadMark[K] may only be changed by a thread that +** is holding an exclusive lock on WAL_READ_LOCK(K). Thus, the value of +** aReadMark[K] cannot changed while there is a reader is using that mark +** since the reader will be holding a shared lock on WAL_READ_LOCK(K). +** +** The checkpointer may only transfer frames from WAL to database where +** the frame numbers are less than or equal to every aReadMark[] that is +** in use (that is, every aReadMark[j] for which there is a corresponding +** WAL_READ_LOCK(j)). New readers (usually) pick the aReadMark[] with the +** largest value and will increase an unused aReadMark[] to mxFrame if there +** is not already an aReadMark[] equal to mxFrame. The exception to the +** previous sentence is when nBackfill equals mxFrame (meaning that everything +** in the WAL has been backfilled into the database) then new readers +** will choose aReadMark[0] which has value 0 and hence such reader will +** get all their all content directly from the database file and ignore +** the WAL. +** +** Writers normally append new frames to the end of the WAL. However, +** if nBackfill equals mxFrame (meaning that all WAL content has been +** written back into the database) and if no readers are using the WAL +** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then +** the writer will first "reset" the WAL back to the beginning and start +** writing new content beginning at frame 1. +** +** We assume that 32-bit loads are atomic and so no locks are needed in +** order to read from any aReadMark[] entries. +*/ +struct WalCkptInfo { + u32 nBackfill; /* Number of WAL frames backfilled into DB */ + u32 aReadMark[WAL_NREADER]; /* Reader marks */ +}; +#define READMARK_NOT_USED 0xffffffff + + +/* A block of WALINDEX_LOCK_RESERVED bytes beginning at +** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems +** only support mandatory file-locks, we do not read or write data +** from the region of the file on which locks are applied. +*/ +#define WALINDEX_LOCK_OFFSET (sizeof(WalIndexHdr)*2 + sizeof(WalCkptInfo)) +#define WALINDEX_LOCK_RESERVED 16 +#define WALINDEX_HDR_SIZE (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED) + +/* Size of header before each frame in wal */ +#define WAL_FRAME_HDRSIZE 24 + +/* Size of write ahead log header, including checksum. */ +/* #define WAL_HDRSIZE 24 */ +#define WAL_HDRSIZE 32 + +/* WAL magic value. Either this value, or the same value with the least +** significant bit also set (WAL_MAGIC | 0x00000001) is stored in 32-bit +** big-endian format in the first 4 bytes of a WAL file. +** +** If the LSB is set, then the checksums for each frame within the WAL +** file are calculated by treating all data as an array of 32-bit +** big-endian words. Otherwise, they are calculated by interpreting +** all data as 32-bit little-endian words. +*/ +#define WAL_MAGIC 0x377f0682 + +/* +** Return the offset of frame iFrame in the write-ahead log file, +** assuming a database page size of szPage bytes. The offset returned +** is to the start of the write-ahead log frame-header. +*/ +#define walFrameOffset(iFrame, szPage) ( \ + WAL_HDRSIZE + ((iFrame)-1)*(i64)((szPage)+WAL_FRAME_HDRSIZE) \ +) + +/* +** An open write-ahead log file is represented by an instance of the +** following object. +*/ +struct Wal { + sqlite3_vfs *pVfs; /* The VFS used to create pDbFd */ + sqlite3_file *pDbFd; /* File handle for the database file */ + sqlite3_file *pWalFd; /* File handle for WAL file */ + u32 iCallback; /* Value to pass to log callback (or 0) */ + i64 mxWalSize; /* Truncate WAL to this size upon reset */ + int nWiData; /* Size of array apWiData */ + int szFirstBlock; /* Size of first block written to WAL file */ + volatile u32 **apWiData; /* Pointer to wal-index content in memory */ + u32 szPage; /* Database page size */ + i16 readLock; /* Which read lock is being held. -1 for none */ + u8 syncFlags; /* Flags to use to sync header writes */ + u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ + u8 writeLock; /* True if in a write transaction */ + u8 ckptLock; /* True if holding a checkpoint lock */ + u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ + u8 truncateOnCommit; /* True to truncate WAL file on commit */ + u8 syncHeader; /* Fsync the WAL header if true */ + u8 padToSectorBoundary; /* Pad transactions out to the next sector */ + WalIndexHdr hdr; /* Wal-index header for current transaction */ + u32 minFrame; /* Ignore wal frames before this one */ + const char *zWalName; /* Name of WAL file */ + u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ +#ifdef SQLITE_DEBUG + u8 lockError; /* True if a locking error has occurred */ +#endif +}; + +/* +** Candidate values for Wal.exclusiveMode. +*/ +#define WAL_NORMAL_MODE 0 +#define WAL_EXCLUSIVE_MODE 1 +#define WAL_HEAPMEMORY_MODE 2 + +/* +** Possible values for WAL.readOnly +*/ +#define WAL_RDWR 0 /* Normal read/write connection */ +#define WAL_RDONLY 1 /* The WAL file is readonly */ +#define WAL_SHM_RDONLY 2 /* The SHM file is readonly */ + +/* +** Each page of the wal-index mapping contains a hash-table made up of +** an array of HASHTABLE_NSLOT elements of the following type. +*/ +typedef u16 ht_slot; + +/* +** This structure is used to implement an iterator that loops through +** all frames in the WAL in database page order. Where two or more frames +** correspond to the same database page, the iterator visits only the +** frame most recently written to the WAL (in other words, the frame with +** the largest index). +** +** The internals of this structure are only accessed by: +** +** walIteratorInit() - Create a new iterator, +** walIteratorNext() - Step an iterator, +** walIteratorFree() - Free an iterator. +** +** This functionality is used by the checkpoint code (see walCheckpoint()). +*/ +struct WalIterator { + int iPrior; /* Last result returned from the iterator */ + int nSegment; /* Number of entries in aSegment[] */ + struct WalSegment { + int iNext; /* Next slot in aIndex[] not yet returned */ + ht_slot *aIndex; /* i0, i1, i2... such that aPgno[iN] ascend */ + u32 *aPgno; /* Array of page numbers. */ + int nEntry; /* Nr. of entries in aPgno[] and aIndex[] */ + int iZero; /* Frame number associated with aPgno[0] */ + } aSegment[1]; /* One for every 32KB page in the wal-index */ +}; + +/* +** Define the parameters of the hash tables in the wal-index file. There +** is a hash-table following every HASHTABLE_NPAGE page numbers in the +** wal-index. +** +** Changing any of these constants will alter the wal-index format and +** create incompatibilities. +*/ +#define HASHTABLE_NPAGE 4096 /* Must be power of 2 */ +#define HASHTABLE_HASH_1 383 /* Should be prime */ +#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ + +/* +** The block of page numbers associated with the first hash-table in a +** wal-index is smaller than usual. This is so that there is a complete +** hash-table on each aligned 32KB page of the wal-index. +*/ +#define HASHTABLE_NPAGE_ONE (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32))) + +/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */ +#define WALINDEX_PGSZ ( \ + sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \ +) + +/* +** Obtain a pointer to the iPage'th page of the wal-index. The wal-index +** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are +** numbered from zero. +** +** If this call is successful, *ppPage is set to point to the wal-index +** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs, +** then an SQLite error code is returned and *ppPage is set to 0. +*/ +static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ + int rc = SQLITE_OK; + + /* Enlarge the pWal->apWiData[] array if required */ + if( pWal->nWiData<=iPage ){ + int nByte = sizeof(u32*)*(iPage+1); + volatile u32 **apNew; + apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte); + if( !apNew ){ + *ppPage = 0; + return SQLITE_NOMEM; + } + memset((void*)&apNew[pWal->nWiData], 0, + sizeof(u32*)*(iPage+1-pWal->nWiData)); + pWal->apWiData = apNew; + pWal->nWiData = iPage+1; + } + + /* Request a pointer to the required page from the VFS */ + if( pWal->apWiData[iPage]==0 ){ + if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ + pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ); + if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM; + }else{ + rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, + pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] + ); + if( rc==SQLITE_READONLY ){ + pWal->readOnly |= WAL_SHM_RDONLY; + rc = SQLITE_OK; + } + } + } + + *ppPage = pWal->apWiData[iPage]; + assert( iPage==0 || *ppPage || rc!=SQLITE_OK ); + return rc; +} + +/* +** Return a pointer to the WalCkptInfo structure in the wal-index. +*/ +static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]); +} + +/* +** Return a pointer to the WalIndexHdr structure in the wal-index. +*/ +static volatile WalIndexHdr *walIndexHdr(Wal *pWal){ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + return (volatile WalIndexHdr*)pWal->apWiData[0]; +} + +/* +** The argument to this macro must be of type u32. On a little-endian +** architecture, it returns the u32 value that results from interpreting +** the 4 bytes as a big-endian value. On a big-endian architecture, it +** returns the value that would be produced by interpreting the 4 bytes +** of the input value as a little-endian integer. +*/ +#define BYTESWAP32(x) ( \ + (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8) \ + + (((x)&0x00FF0000)>>8) + (((x)&0xFF000000)>>24) \ +) + +/* +** Generate or extend an 8 byte checksum based on the data in +** array aByte[] and the initial values of aIn[0] and aIn[1] (or +** initial values of 0 and 0 if aIn==NULL). +** +** The checksum is written back into aOut[] before returning. +** +** nByte must be a positive multiple of 8. +*/ +static void walChecksumBytes( + int nativeCksum, /* True for native byte-order, false for non-native */ + u8 *a, /* Content to be checksummed */ + int nByte, /* Bytes of content in a[]. Must be a multiple of 8. */ + const u32 *aIn, /* Initial checksum value input */ + u32 *aOut /* OUT: Final checksum value output */ +){ + u32 s1, s2; + u32 *aData = (u32 *)a; + u32 *aEnd = (u32 *)&a[nByte]; + + if( aIn ){ + s1 = aIn[0]; + s2 = aIn[1]; + }else{ + s1 = s2 = 0; + } + + assert( nByte>=8 ); + assert( (nByte&0x00000007)==0 ); + + if( nativeCksum ){ + do { + s1 += *aData++ + s2; + s2 += *aData++ + s1; + }while( aData<aEnd ); + }else{ + do { + s1 += BYTESWAP32(aData[0]) + s2; + s2 += BYTESWAP32(aData[1]) + s1; + aData += 2; + }while( aData<aEnd ); + } + + aOut[0] = s1; + aOut[1] = s2; +} + +static void walShmBarrier(Wal *pWal){ + if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){ + sqlite3OsShmBarrier(pWal->pDbFd); + } +} + +/* +** Write the header information in pWal->hdr into the wal-index. +** +** The checksum on pWal->hdr is updated before it is written. +*/ +static void walIndexWriteHdr(Wal *pWal){ + volatile WalIndexHdr *aHdr = walIndexHdr(pWal); + const int nCksum = offsetof(WalIndexHdr, aCksum); + + assert( pWal->writeLock ); + pWal->hdr.isInit = 1; + pWal->hdr.iVersion = WALINDEX_MAX_VERSION; + walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); + memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); + walShmBarrier(pWal); + memcpy((void*)&aHdr[0], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); +} + +/* +** This function encodes a single frame header and writes it to a buffer +** supplied by the caller. A frame-header is made up of a series of +** 4-byte big-endian integers, as follows: +** +** 0: Page number. +** 4: For commit records, the size of the database image in pages +** after the commit. For all other records, zero. +** 8: Salt-1 (copied from the wal-header) +** 12: Salt-2 (copied from the wal-header) +** 16: Checksum-1. +** 20: Checksum-2. +*/ +static void walEncodeFrame( + Wal *pWal, /* The write-ahead log */ + u32 iPage, /* Database page number for frame */ + u32 nTruncate, /* New db size (or 0 for non-commit frames) */ + u8 *aData, /* Pointer to page data */ + u8 *aFrame /* OUT: Write encoded frame here */ +){ + int nativeCksum; /* True for native byte-order checksums */ + u32 *aCksum = pWal->hdr.aFrameCksum; + assert( WAL_FRAME_HDRSIZE==24 ); + sqlite3Put4byte(&aFrame[0], iPage); + sqlite3Put4byte(&aFrame[4], nTruncate); + memcpy(&aFrame[8], pWal->hdr.aSalt, 8); + + nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); + walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + + sqlite3Put4byte(&aFrame[16], aCksum[0]); + sqlite3Put4byte(&aFrame[20], aCksum[1]); +} + +/* +** Check to see if the frame with header in aFrame[] and content +** in aData[] is valid. If it is a valid frame, fill *piPage and +** *pnTruncate and return true. Return if the frame is not valid. +*/ +static int walDecodeFrame( + Wal *pWal, /* The write-ahead log */ + u32 *piPage, /* OUT: Database page number for frame */ + u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */ + u8 *aData, /* Pointer to page data (for checksum) */ + u8 *aFrame /* Frame data */ +){ + int nativeCksum; /* True for native byte-order checksums */ + u32 *aCksum = pWal->hdr.aFrameCksum; + u32 pgno; /* Page number of the frame */ + assert( WAL_FRAME_HDRSIZE==24 ); + + /* A frame is only valid if the salt values in the frame-header + ** match the salt values in the wal-header. + */ + if( memcmp(&pWal->hdr.aSalt, &aFrame[8], 8)!=0 ){ + return 0; + } + + /* A frame is only valid if the page number is creater than zero. + */ + pgno = sqlite3Get4byte(&aFrame[0]); + if( pgno==0 ){ + return 0; + } + + /* A frame is only valid if a checksum of the WAL header, + ** all prior frams, the first 16 bytes of this frame-header, + ** and the frame-data matches the checksum in the last 8 + ** bytes of this frame-header. + */ + nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); + walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) + || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) + ){ + /* Checksum failed. */ + return 0; + } + + /* If we reach this point, the frame is valid. Return the page number + ** and the new database size. + */ + *piPage = pgno; + *pnTruncate = sqlite3Get4byte(&aFrame[4]); + return 1; +} + + +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) +/* +** Names of locks. This routine is used to provide debugging output and is not +** a part of an ordinary build. +*/ +static const char *walLockName(int lockIdx){ + if( lockIdx==WAL_WRITE_LOCK ){ + return "WRITE-LOCK"; + }else if( lockIdx==WAL_CKPT_LOCK ){ + return "CKPT-LOCK"; + }else if( lockIdx==WAL_RECOVER_LOCK ){ + return "RECOVER-LOCK"; + }else{ + static char zName[15]; + sqlite3_snprintf(sizeof(zName), zName, "READ-LOCK[%d]", + lockIdx-WAL_READ_LOCK(0)); + return zName; + } +} +#endif /*defined(SQLITE_TEST) || defined(SQLITE_DEBUG) */ + + +/* +** Set or release locks on the WAL. Locks are either shared or exclusive. +** A lock cannot be moved directly between shared and exclusive - it must go +** through the unlocked state first. +** +** In locking_mode=EXCLUSIVE, all of these routines become no-ops. +*/ +static int walLockShared(Wal *pWal, int lockIdx){ + int rc; + if( pWal->exclusiveMode ) return SQLITE_OK; + rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, + SQLITE_SHM_LOCK | SQLITE_SHM_SHARED); + WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal, + walLockName(lockIdx), rc ? "failed" : "ok")); + VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); ) + return rc; +} +static void walUnlockShared(Wal *pWal, int lockIdx){ + if( pWal->exclusiveMode ) return; + (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, + SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED); + WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx))); +} +static int walLockExclusive(Wal *pWal, int lockIdx, int n, int fBlock){ + int rc; + if( pWal->exclusiveMode ) return SQLITE_OK; + if( fBlock ) sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_WAL_BLOCK, 0); + rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, + SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE); + WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal, + walLockName(lockIdx), n, rc ? "failed" : "ok")); + VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); ) + return rc; +} +static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ + if( pWal->exclusiveMode ) return; + (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, + SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE); + WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal, + walLockName(lockIdx), n)); +} + +/* +** Compute a hash on a page number. The resulting hash value must land +** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances +** the hash to the next value in the event of a collision. +*/ +static int walHash(u32 iPage){ + assert( iPage>0 ); + assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 ); + return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1); +} +static int walNextHash(int iPriorHash){ + return (iPriorHash+1)&(HASHTABLE_NSLOT-1); +} + +/* +** Return pointers to the hash table and page number array stored on +** page iHash of the wal-index. The wal-index is broken into 32KB pages +** numbered starting from 0. +** +** Set output variable *paHash to point to the start of the hash table +** in the wal-index file. Set *piZero to one less than the frame +** number of the first frame indexed by this hash table. If a +** slot in the hash table is set to N, it refers to frame number +** (*piZero+N) in the log. +** +** Finally, set *paPgno so that *paPgno[1] is the page number of the +** first frame indexed by the hash table, frame (*piZero+1). +*/ +static int walHashGet( + Wal *pWal, /* WAL handle */ + int iHash, /* Find the iHash'th table */ + volatile ht_slot **paHash, /* OUT: Pointer to hash index */ + volatile u32 **paPgno, /* OUT: Pointer to page number array */ + u32 *piZero /* OUT: Frame associated with *paPgno[0] */ +){ + int rc; /* Return code */ + volatile u32 *aPgno; + + rc = walIndexPage(pWal, iHash, &aPgno); + assert( rc==SQLITE_OK || iHash>0 ); + + if( rc==SQLITE_OK ){ + u32 iZero; + volatile ht_slot *aHash; + + aHash = (volatile ht_slot *)&aPgno[HASHTABLE_NPAGE]; + if( iHash==0 ){ + aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)]; + iZero = 0; + }else{ + iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE; + } + + *paPgno = &aPgno[-1]; + *paHash = aHash; + *piZero = iZero; + } + return rc; +} + +/* +** Return the number of the wal-index page that contains the hash-table +** and page-number array that contain entries corresponding to WAL frame +** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages +** are numbered starting from 0. +*/ +static int walFramePage(u32 iFrame){ + int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE; + assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE) + && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE) + && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)) + && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE) + && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE)) + ); + return iHash; +} + +/* +** Return the page number associated with frame iFrame in this WAL. +*/ +static u32 walFramePgno(Wal *pWal, u32 iFrame){ + int iHash = walFramePage(iFrame); + if( iHash==0 ){ + return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1]; + } + return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE]; +} + +/* +** Remove entries from the hash table that point to WAL slots greater +** than pWal->hdr.mxFrame. +** +** This function is called whenever pWal->hdr.mxFrame is decreased due +** to a rollback or savepoint. +** +** At most only the hash table containing pWal->hdr.mxFrame needs to be +** updated. Any later hash tables will be automatically cleared when +** pWal->hdr.mxFrame advances to the point where those hash tables are +** actually needed. +*/ +static void walCleanupHash(Wal *pWal){ + volatile ht_slot *aHash = 0; /* Pointer to hash table to clear */ + volatile u32 *aPgno = 0; /* Page number array for hash table */ + u32 iZero = 0; /* frame == (aHash[x]+iZero) */ + int iLimit = 0; /* Zero values greater than this */ + int nByte; /* Number of bytes to zero in aPgno[] */ + int i; /* Used to iterate through aHash[] */ + + assert( pWal->writeLock ); + testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE-1 ); + testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE ); + testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE+1 ); + + if( pWal->hdr.mxFrame==0 ) return; + + /* Obtain pointers to the hash-table and page-number array containing + ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed + ** that the page said hash-table and array reside on is already mapped. + */ + assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) ); + assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] ); + walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &aHash, &aPgno, &iZero); + + /* Zero all hash-table entries that correspond to frame numbers greater + ** than pWal->hdr.mxFrame. + */ + iLimit = pWal->hdr.mxFrame - iZero; + assert( iLimit>0 ); + for(i=0; i<HASHTABLE_NSLOT; i++){ + if( aHash[i]>iLimit ){ + aHash[i] = 0; + } + } + + /* Zero the entries in the aPgno array that correspond to frames with + ** frame numbers greater than pWal->hdr.mxFrame. + */ + nByte = (int)((char *)aHash - (char *)&aPgno[iLimit+1]); + memset((void *)&aPgno[iLimit+1], 0, nByte); + +#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT + /* Verify that the every entry in the mapping region is still reachable + ** via the hash table even after the cleanup. + */ + if( iLimit ){ + int j; /* Loop counter */ + int iKey; /* Hash key */ + for(j=1; j<=iLimit; j++){ + for(iKey=walHash(aPgno[j]); aHash[iKey]; iKey=walNextHash(iKey)){ + if( aHash[iKey]==j ) break; + } + assert( aHash[iKey]==j ); + } + } +#endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ +} + + +/* +** Set an entry in the wal-index that will map database page number +** pPage into WAL frame iFrame. +*/ +static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ + int rc; /* Return code */ + u32 iZero = 0; /* One less than frame number of aPgno[1] */ + volatile u32 *aPgno = 0; /* Page number array */ + volatile ht_slot *aHash = 0; /* Hash table */ + + rc = walHashGet(pWal, walFramePage(iFrame), &aHash, &aPgno, &iZero); + + /* Assuming the wal-index file was successfully mapped, populate the + ** page number array and hash table entry. + */ + if( rc==SQLITE_OK ){ + int iKey; /* Hash table key */ + int idx; /* Value to write to hash-table slot */ + int nCollide; /* Number of hash collisions */ + + idx = iFrame - iZero; + assert( idx <= HASHTABLE_NSLOT/2 + 1 ); + + /* If this is the first entry to be added to this hash-table, zero the + ** entire hash table and aPgno[] array before proceeding. + */ + if( idx==1 ){ + int nByte = (int)((u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]); + memset((void*)&aPgno[1], 0, nByte); + } + + /* If the entry in aPgno[] is already set, then the previous writer + ** must have exited unexpectedly in the middle of a transaction (after + ** writing one or more dirty pages to the WAL to free up memory). + ** Remove the remnants of that writers uncommitted transaction from + ** the hash-table before writing any new entries. + */ + if( aPgno[idx] ){ + walCleanupHash(pWal); + assert( !aPgno[idx] ); + } + + /* Write the aPgno[] array entry and the hash-table slot. */ + nCollide = idx; + for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){ + if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT; + } + aPgno[idx] = iPage; + aHash[iKey] = (ht_slot)idx; + +#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT + /* Verify that the number of entries in the hash table exactly equals + ** the number of entries in the mapping region. + */ + { + int i; /* Loop counter */ + int nEntry = 0; /* Number of entries in the hash table */ + for(i=0; i<HASHTABLE_NSLOT; i++){ if( aHash[i] ) nEntry++; } + assert( nEntry==idx ); + } + + /* Verify that the every entry in the mapping region is reachable + ** via the hash table. This turns out to be a really, really expensive + ** thing to check, so only do this occasionally - not on every + ** iteration. + */ + if( (idx&0x3ff)==0 ){ + int i; /* Loop counter */ + for(i=1; i<=idx; i++){ + for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){ + if( aHash[iKey]==i ) break; + } + assert( aHash[iKey]==i ); + } + } +#endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ + } + + + return rc; +} + + +/* +** Recover the wal-index by reading the write-ahead log file. +** +** This routine first tries to establish an exclusive lock on the +** wal-index to prevent other threads/processes from doing anything +** with the WAL or wal-index while recovery is running. The +** WAL_RECOVER_LOCK is also held so that other threads will know +** that this thread is running recovery. If unable to establish +** the necessary locks, this routine returns SQLITE_BUSY. +*/ +static int walIndexRecover(Wal *pWal){ + int rc; /* Return Code */ + i64 nSize; /* Size of log file */ + u32 aFrameCksum[2] = {0, 0}; + int iLock; /* Lock offset to lock for checkpoint */ + int nLock; /* Number of locks to hold */ + + /* Obtain an exclusive lock on all byte in the locking range not already + ** locked by the caller. The caller is guaranteed to have locked the + ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte. + ** If successful, the same bytes that are locked here are unlocked before + ** this function returns. + */ + assert( pWal->ckptLock==1 || pWal->ckptLock==0 ); + assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 ); + assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE ); + assert( pWal->writeLock ); + iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock; + nLock = SQLITE_SHM_NLOCK - iLock; + rc = walLockExclusive(pWal, iLock, nLock, 0); + if( rc ){ + return rc; + } + WALTRACE(("WAL%p: recovery begin...\n", pWal)); + + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); + + rc = sqlite3OsFileSize(pWal->pWalFd, &nSize); + if( rc!=SQLITE_OK ){ + goto recovery_error; + } + + if( nSize>WAL_HDRSIZE ){ + u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ + u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ + int szFrame; /* Number of bytes in buffer aFrame[] */ + u8 *aData; /* Pointer to data part of aFrame buffer */ + int iFrame; /* Index of last frame read */ + i64 iOffset; /* Next offset to read from log file */ + int szPage; /* Page size according to the log */ + u32 magic; /* Magic value read from WAL header */ + u32 version; /* Magic value read from WAL header */ + int isValid; /* True if this frame is valid */ + + /* Read in the WAL header. */ + rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + goto recovery_error; + } + + /* If the database page size is not a power of two, or is greater than + ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid + ** data. Similarly, if the 'magic' value is invalid, ignore the whole + ** WAL file. + */ + magic = sqlite3Get4byte(&aBuf[0]); + szPage = sqlite3Get4byte(&aBuf[8]); + if( (magic&0xFFFFFFFE)!=WAL_MAGIC + || szPage&(szPage-1) + || szPage>SQLITE_MAX_PAGE_SIZE + || szPage<512 + ){ + goto finished; + } + pWal->hdr.bigEndCksum = (u8)(magic&0x00000001); + pWal->szPage = szPage; + pWal->nCkpt = sqlite3Get4byte(&aBuf[12]); + memcpy(&pWal->hdr.aSalt, &aBuf[16], 8); + + /* Verify that the WAL header checksum is correct */ + walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, + aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum + ); + if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24]) + || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28]) + ){ + goto finished; + } + + /* Verify that the version number on the WAL format is one that + ** are able to understand */ + version = sqlite3Get4byte(&aBuf[4]); + if( version!=WAL_MAX_VERSION ){ + rc = SQLITE_CANTOPEN_BKPT; + goto finished; + } + + /* Malloc a buffer to read frames into. */ + szFrame = szPage + WAL_FRAME_HDRSIZE; + aFrame = (u8 *)sqlite3_malloc64(szFrame); + if( !aFrame ){ + rc = SQLITE_NOMEM; + goto recovery_error; + } + aData = &aFrame[WAL_FRAME_HDRSIZE]; + + /* Read all frames from the log file. */ + iFrame = 0; + for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ + u32 pgno; /* Database page number for frame */ + u32 nTruncate; /* dbsize field from frame header */ + + /* Read and decode the next log frame. */ + iFrame++; + rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); + if( rc!=SQLITE_OK ) break; + isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); + if( !isValid ) break; + rc = walIndexAppend(pWal, iFrame, pgno); + if( rc!=SQLITE_OK ) break; + + /* If nTruncate is non-zero, this is a commit record. */ + if( nTruncate ){ + pWal->hdr.mxFrame = iFrame; + pWal->hdr.nPage = nTruncate; + pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); + testcase( szPage<=32768 ); + testcase( szPage>=65536 ); + aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; + aFrameCksum[1] = pWal->hdr.aFrameCksum[1]; + } + } + + sqlite3_free(aFrame); + } + +finished: + if( rc==SQLITE_OK ){ + volatile WalCkptInfo *pInfo; + int i; + pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; + pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; + walIndexWriteHdr(pWal); + + /* Reset the checkpoint-header. This is safe because this thread is + ** currently holding locks that exclude all other readers, writers and + ** checkpointers. + */ + pInfo = walCkptInfo(pWal); + pInfo->nBackfill = 0; + pInfo->aReadMark[0] = 0; + for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; + if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame; + + /* If more than one frame was recovered from the log file, report an + ** event via sqlite3_log(). This is to help with identifying performance + ** problems caused by applications routinely shutting down without + ** checkpointing the log file. + */ + if( pWal->hdr.nPage ){ + sqlite3_log(SQLITE_NOTICE_RECOVER_WAL, + "recovered %d frames from WAL file %s", + pWal->hdr.mxFrame, pWal->zWalName + ); + } + } + +recovery_error: + WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok")); + walUnlockExclusive(pWal, iLock, nLock); + return rc; +} + +/* +** Close an open wal-index. +*/ +static void walIndexClose(Wal *pWal, int isDelete){ + if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ + int i; + for(i=0; i<pWal->nWiData; i++){ + sqlite3_free((void *)pWal->apWiData[i]); + pWal->apWiData[i] = 0; + } + }else{ + sqlite3OsShmUnmap(pWal->pDbFd, isDelete); + } +} + +/* +** Open a connection to the WAL file zWalName. The database file must +** already be opened on connection pDbFd. The buffer that zWalName points +** to must remain valid for the lifetime of the returned Wal* handle. +** +** A SHARED lock should be held on the database file when this function +** is called. The purpose of this SHARED lock is to prevent any other +** client from unlinking the WAL or wal-index file. If another process +** were to do this just after this client opened one of these files, the +** system would be badly broken. +** +** If the log file is successfully opened, SQLITE_OK is returned and +** *ppWal is set to point to a new WAL handle. If an error occurs, +** an SQLite error code is returned and *ppWal is left unmodified. +*/ +SQLITE_PRIVATE int sqlite3WalOpen( + sqlite3_vfs *pVfs, /* vfs module to open wal and wal-index */ + sqlite3_file *pDbFd, /* The open database file */ + const char *zWalName, /* Name of the WAL file */ + int bNoShm, /* True to run in heap-memory mode */ + i64 mxWalSize, /* Truncate WAL to this size on reset */ + Wal **ppWal /* OUT: Allocated Wal handle */ +){ + int rc; /* Return Code */ + Wal *pRet; /* Object to allocate and return */ + int flags; /* Flags passed to OsOpen() */ + + assert( zWalName && zWalName[0] ); + assert( pDbFd ); + + /* In the amalgamation, the os_unix.c and os_win.c source files come before + ** this source file. Verify that the #defines of the locking byte offsets + ** in os_unix.c and os_win.c agree with the WALINDEX_LOCK_OFFSET value. + */ +#ifdef WIN_SHM_BASE + assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET ); +#endif +#ifdef UNIX_SHM_BASE + assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET ); +#endif + + + /* Allocate an instance of struct Wal to return. */ + *ppWal = 0; + pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile); + if( !pRet ){ + return SQLITE_NOMEM; + } + + pRet->pVfs = pVfs; + pRet->pWalFd = (sqlite3_file *)&pRet[1]; + pRet->pDbFd = pDbFd; + pRet->readLock = -1; + pRet->mxWalSize = mxWalSize; + pRet->zWalName = zWalName; + pRet->syncHeader = 1; + pRet->padToSectorBoundary = 1; + pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); + + /* Open file handle on the write-ahead log file. */ + flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); + rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); + if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ + pRet->readOnly = WAL_RDONLY; + } + + if( rc!=SQLITE_OK ){ + walIndexClose(pRet, 0); + sqlite3OsClose(pRet->pWalFd); + sqlite3_free(pRet); + }else{ + int iDC = sqlite3OsDeviceCharacteristics(pDbFd); + if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; } + if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){ + pRet->padToSectorBoundary = 0; + } + *ppWal = pRet; + WALTRACE(("WAL%d: opened\n", pRet)); + } + return rc; +} + +#if 0 +/* +** Change the size to which the WAL file is trucated on each reset. +*/ +SQLITE_PRIVATE void sqlite3WalLimit(Wal *pWal, i64 iLimit){ + if( pWal ) pWal->mxWalSize = iLimit; +} +#endif + +/* +** Find the smallest page number out of all pages held in the WAL that +** has not been returned by any prior invocation of this method on the +** same WalIterator object. Write into *piFrame the frame index where +** that page was last written into the WAL. Write into *piPage the page +** number. +** +** Return 0 on success. If there are no pages in the WAL with a page +** number larger than *piPage, then return 1. +*/ +static int walIteratorNext( + WalIterator *p, /* Iterator */ + u32 *piPage, /* OUT: The page number of the next page */ + u32 *piFrame /* OUT: Wal frame index of next page */ +){ + u32 iMin; /* Result pgno must be greater than iMin */ + u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */ + int i; /* For looping through segments */ + + iMin = p->iPrior; + assert( iMin<0xffffffff ); + for(i=p->nSegment-1; i>=0; i--){ + struct WalSegment *pSegment = &p->aSegment[i]; + while( pSegment->iNext<pSegment->nEntry ){ + u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]]; + if( iPg>iMin ){ + if( iPg<iRet ){ + iRet = iPg; + *piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext]; + } + break; + } + pSegment->iNext++; + } + } + + *piPage = p->iPrior = iRet; + return (iRet==0xFFFFFFFF); +} + +/* +** This function merges two sorted lists into a single sorted list. +** +** aLeft[] and aRight[] are arrays of indices. The sort key is +** aContent[aLeft[]] and aContent[aRight[]]. Upon entry, the following +** is guaranteed for all J<K: +** +** aContent[aLeft[J]] < aContent[aLeft[K]] +** aContent[aRight[J]] < aContent[aRight[K]] +** +** This routine overwrites aRight[] with a new (probably longer) sequence +** of indices such that the aRight[] contains every index that appears in +** either aLeft[] or the old aRight[] and such that the second condition +** above is still met. +** +** The aContent[aLeft[X]] values will be unique for all X. And the +** aContent[aRight[X]] values will be unique too. But there might be +** one or more combinations of X and Y such that +** +** aLeft[X]!=aRight[Y] && aContent[aLeft[X]] == aContent[aRight[Y]] +** +** When that happens, omit the aLeft[X] and use the aRight[Y] index. +*/ +static void walMerge( + const u32 *aContent, /* Pages in wal - keys for the sort */ + ht_slot *aLeft, /* IN: Left hand input list */ + int nLeft, /* IN: Elements in array *paLeft */ + ht_slot **paRight, /* IN/OUT: Right hand input list */ + int *pnRight, /* IN/OUT: Elements in *paRight */ + ht_slot *aTmp /* Temporary buffer */ +){ + int iLeft = 0; /* Current index in aLeft */ + int iRight = 0; /* Current index in aRight */ + int iOut = 0; /* Current index in output buffer */ + int nRight = *pnRight; + ht_slot *aRight = *paRight; + + assert( nLeft>0 && nRight>0 ); + while( iRight<nRight || iLeft<nLeft ){ + ht_slot logpage; + Pgno dbpage; + + if( (iLeft<nLeft) + && (iRight>=nRight || aContent[aLeft[iLeft]]<aContent[aRight[iRight]]) + ){ + logpage = aLeft[iLeft++]; + }else{ + logpage = aRight[iRight++]; + } + dbpage = aContent[logpage]; + + aTmp[iOut++] = logpage; + if( iLeft<nLeft && aContent[aLeft[iLeft]]==dbpage ) iLeft++; + + assert( iLeft>=nLeft || aContent[aLeft[iLeft]]>dbpage ); + assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage ); + } + + *paRight = aLeft; + *pnRight = iOut; + memcpy(aLeft, aTmp, sizeof(aTmp[0])*iOut); +} + +/* +** Sort the elements in list aList using aContent[] as the sort key. +** Remove elements with duplicate keys, preferring to keep the +** larger aList[] values. +** +** The aList[] entries are indices into aContent[]. The values in +** aList[] are to be sorted so that for all J<K: +** +** aContent[aList[J]] < aContent[aList[K]] +** +** For any X and Y such that +** +** aContent[aList[X]] == aContent[aList[Y]] +** +** Keep the larger of the two values aList[X] and aList[Y] and discard +** the smaller. +*/ +static void walMergesort( + const u32 *aContent, /* Pages in wal */ + ht_slot *aBuffer, /* Buffer of at least *pnList items to use */ + ht_slot *aList, /* IN/OUT: List to sort */ + int *pnList /* IN/OUT: Number of elements in aList[] */ +){ + struct Sublist { + int nList; /* Number of elements in aList */ + ht_slot *aList; /* Pointer to sub-list content */ + }; + + const int nList = *pnList; /* Size of input list */ + int nMerge = 0; /* Number of elements in list aMerge */ + ht_slot *aMerge = 0; /* List to be merged */ + int iList; /* Index into input list */ + u32 iSub = 0; /* Index into aSub array */ + struct Sublist aSub[13]; /* Array of sub-lists */ + + memset(aSub, 0, sizeof(aSub)); + assert( nList<=HASHTABLE_NPAGE && nList>0 ); + assert( HASHTABLE_NPAGE==(1<<(ArraySize(aSub)-1)) ); + + for(iList=0; iList<nList; iList++){ + nMerge = 1; + aMerge = &aList[iList]; + for(iSub=0; iList & (1<<iSub); iSub++){ + struct Sublist *p; + assert( iSub<ArraySize(aSub) ); + p = &aSub[iSub]; + assert( p->aList && p->nList<=(1<<iSub) ); + assert( p->aList==&aList[iList&~((2<<iSub)-1)] ); + walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer); + } + aSub[iSub].aList = aMerge; + aSub[iSub].nList = nMerge; + } + + for(iSub++; iSub<ArraySize(aSub); iSub++){ + if( nList & (1<<iSub) ){ + struct Sublist *p; + assert( iSub<ArraySize(aSub) ); + p = &aSub[iSub]; + assert( p->nList<=(1<<iSub) ); + assert( p->aList==&aList[nList&~((2<<iSub)-1)] ); + walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer); + } + } + assert( aMerge==aList ); + *pnList = nMerge; + +#ifdef SQLITE_DEBUG + { + int i; + for(i=1; i<*pnList; i++){ + assert( aContent[aList[i]] > aContent[aList[i-1]] ); + } + } +#endif +} + +/* +** Free an iterator allocated by walIteratorInit(). +*/ +static void walIteratorFree(WalIterator *p){ + sqlite3_free(p); +} + +/* +** Construct a WalInterator object that can be used to loop over all +** pages in the WAL in ascending order. The caller must hold the checkpoint +** lock. +** +** On success, make *pp point to the newly allocated WalInterator object +** return SQLITE_OK. Otherwise, return an error code. If this routine +** returns an error, the value of *pp is undefined. +** +** The calling routine should invoke walIteratorFree() to destroy the +** WalIterator object when it has finished with it. +*/ +static int walIteratorInit(Wal *pWal, WalIterator **pp){ + WalIterator *p; /* Return value */ + int nSegment; /* Number of segments to merge */ + u32 iLast; /* Last frame in log */ + int nByte; /* Number of bytes to allocate */ + int i; /* Iterator variable */ + ht_slot *aTmp; /* Temp space used by merge-sort */ + int rc = SQLITE_OK; /* Return Code */ + + /* This routine only runs while holding the checkpoint lock. And + ** it only runs if there is actually content in the log (mxFrame>0). + */ + assert( pWal->ckptLock && pWal->hdr.mxFrame>0 ); + iLast = pWal->hdr.mxFrame; + + /* Allocate space for the WalIterator object. */ + nSegment = walFramePage(iLast) + 1; + nByte = sizeof(WalIterator) + + (nSegment-1)*sizeof(struct WalSegment) + + iLast*sizeof(ht_slot); + p = (WalIterator *)sqlite3_malloc64(nByte); + if( !p ){ + return SQLITE_NOMEM; + } + memset(p, 0, nByte); + p->nSegment = nSegment; + + /* Allocate temporary space used by the merge-sort routine. This block + ** of memory will be freed before this function returns. + */ + aTmp = (ht_slot *)sqlite3_malloc64( + sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) + ); + if( !aTmp ){ + rc = SQLITE_NOMEM; + } + + for(i=0; rc==SQLITE_OK && i<nSegment; i++){ + volatile ht_slot *aHash; + u32 iZero; + volatile u32 *aPgno; + + rc = walHashGet(pWal, i, &aHash, &aPgno, &iZero); + if( rc==SQLITE_OK ){ + int j; /* Counter variable */ + int nEntry; /* Number of entries in this segment */ + ht_slot *aIndex; /* Sorted index for this segment */ + + aPgno++; + if( (i+1)==nSegment ){ + nEntry = (int)(iLast - iZero); + }else{ + nEntry = (int)((u32*)aHash - (u32*)aPgno); + } + aIndex = &((ht_slot *)&p->aSegment[p->nSegment])[iZero]; + iZero++; + + for(j=0; j<nEntry; j++){ + aIndex[j] = (ht_slot)j; + } + walMergesort((u32 *)aPgno, aTmp, aIndex, &nEntry); + p->aSegment[i].iZero = iZero; + p->aSegment[i].nEntry = nEntry; + p->aSegment[i].aIndex = aIndex; + p->aSegment[i].aPgno = (u32 *)aPgno; + } + } + sqlite3_free(aTmp); + + if( rc!=SQLITE_OK ){ + walIteratorFree(p); + } + *pp = p; + return rc; +} + +/* +** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and +** n. If the attempt fails and parameter xBusy is not NULL, then it is a +** busy-handler function. Invoke it and retry the lock until either the +** lock is successfully obtained or the busy-handler returns 0. +*/ +static int walBusyLock( + Wal *pWal, /* WAL connection */ + int (*xBusy)(void*), /* Function to call when busy */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int lockIdx, /* Offset of first byte to lock */ + int n /* Number of bytes to lock */ +){ + int rc; + do { + rc = walLockExclusive(pWal, lockIdx, n, 0); + }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) ); + return rc; +} + +/* +** The cache of the wal-index header must be valid to call this function. +** Return the page-size in bytes used by the database. +*/ +static int walPagesize(Wal *pWal){ + return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); +} + +/* +** The following is guaranteed when this function is called: +** +** a) the WRITER lock is held, +** b) the entire log file has been checkpointed, and +** c) any existing readers are reading exclusively from the database +** file - there are no readers that may attempt to read a frame from +** the log file. +** +** This function updates the shared-memory structures so that the next +** client to write to the database (which may be this one) does so by +** writing frames into the start of the log file. +** +** The value of parameter salt1 is used as the aSalt[1] value in the +** new wal-index header. It should be passed a pseudo-random value (i.e. +** one obtained from sqlite3_randomness()). +*/ +static void walRestartHdr(Wal *pWal, u32 salt1){ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + int i; /* Loop counter */ + u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ + pWal->nCkpt++; + pWal->hdr.mxFrame = 0; + sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); + memcpy(&pWal->hdr.aSalt[1], &salt1, 4); + walIndexWriteHdr(pWal); + pInfo->nBackfill = 0; + pInfo->aReadMark[1] = 0; + for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; + assert( pInfo->aReadMark[0]==0 ); +} + +/* +** Copy as much content as we can from the WAL back into the database file +** in response to an sqlite3_wal_checkpoint() request or the equivalent. +** +** The amount of information copies from WAL to database might be limited +** by active readers. This routine will never overwrite a database page +** that a concurrent reader might be using. +** +** All I/O barrier operations (a.k.a fsyncs) occur in this routine when +** SQLite is in WAL-mode in synchronous=NORMAL. That means that if +** checkpoints are always run by a background thread or background +** process, foreground threads will never block on a lengthy fsync call. +** +** Fsync is called on the WAL before writing content out of the WAL and +** into the database. This ensures that if the new content is persistent +** in the WAL and can be recovered following a power-loss or hard reset. +** +** Fsync is also called on the database file if (and only if) the entire +** WAL content is copied into the database file. This second fsync makes +** it safe to delete the WAL since the new content will persist in the +** database file. +** +** This routine uses and updates the nBackfill field of the wal-index header. +** This is the only routine that will increase the value of nBackfill. +** (A WAL reset or recovery will revert nBackfill to zero, but not increase +** its value.) +** +** The caller must be holding sufficient locks to ensure that no other +** checkpoint is running (in any other thread or process) at the same +** time. +*/ +static int walCheckpoint( + Wal *pWal, /* Wal connection */ + int eMode, /* One of PASSIVE, FULL or RESTART */ + int (*xBusy)(void*), /* Function to call when busy */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int sync_flags, /* Flags for OsSync() (or 0) */ + u8 *zBuf /* Temporary buffer to use */ +){ + int rc = SQLITE_OK; /* Return code */ + int szPage; /* Database page-size */ + WalIterator *pIter = 0; /* Wal iterator context */ + u32 iDbpage = 0; /* Next database page to write */ + u32 iFrame = 0; /* Wal frame containing data for iDbpage */ + u32 mxSafeFrame; /* Max frame that can be backfilled */ + u32 mxPage; /* Max database page to write */ + int i; /* Loop counter */ + volatile WalCkptInfo *pInfo; /* The checkpoint status information */ + + szPage = walPagesize(pWal); + testcase( szPage<=32768 ); + testcase( szPage>=65536 ); + pInfo = walCkptInfo(pWal); + if( pInfo->nBackfill<pWal->hdr.mxFrame ){ + + /* Allocate the iterator */ + rc = walIteratorInit(pWal, &pIter); + if( rc!=SQLITE_OK ){ + return rc; + } + assert( pIter ); + + /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked + ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ + assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); + + /* Compute in mxSafeFrame the index of the last frame of the WAL that is + ** safe to write into the database. Frames beyond mxSafeFrame might + ** overwrite database pages that are in use by active readers and thus + ** cannot be backfilled from the WAL. + */ + mxSafeFrame = pWal->hdr.mxFrame; + mxPage = pWal->hdr.nPage; + for(i=1; i<WAL_NREADER; i++){ + /* Thread-sanitizer reports that the following is an unsafe read, + ** as some other thread may be in the process of updating the value + ** of the aReadMark[] slot. The assumption here is that if that is + ** happening, the other client may only be increasing the value, + ** not decreasing it. So assuming either that either the "old" or + ** "new" version of the value is read, and not some arbitrary value + ** that would never be written by a real client, things are still + ** safe. */ + u32 y = pInfo->aReadMark[i]; + if( mxSafeFrame>y ){ + assert( y<=pWal->hdr.mxFrame ); + rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); + if( rc==SQLITE_OK ){ + pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED); + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + }else if( rc==SQLITE_BUSY ){ + mxSafeFrame = y; + xBusy = 0; + }else{ + goto walcheckpoint_out; + } + } + } + + if( pInfo->nBackfill<mxSafeFrame + && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK + ){ + i64 nSize; /* Current size of database file */ + u32 nBackfill = pInfo->nBackfill; + + /* Sync the WAL to disk */ + if( sync_flags ){ + rc = sqlite3OsSync(pWal->pWalFd, sync_flags); + } + + /* If the database may grow as a result of this checkpoint, hint + ** about the eventual size of the db file to the VFS layer. + */ + if( rc==SQLITE_OK ){ + i64 nReq = ((i64)mxPage * szPage); + rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); + if( rc==SQLITE_OK && nSize<nReq ){ + sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); + } + } + + + /* Iterate through the contents of the WAL, copying data to the db file */ + while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ + i64 iOffset; + assert( walFramePgno(pWal, iFrame)==iDbpage ); + if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){ + continue; + } + iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; + /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ + rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); + if( rc!=SQLITE_OK ) break; + iOffset = (iDbpage-1)*(i64)szPage; + testcase( IS_BIG_INT(iOffset) ); + rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); + if( rc!=SQLITE_OK ) break; + } + + /* If work was actually accomplished... */ + if( rc==SQLITE_OK ){ + if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ + i64 szDb = pWal->hdr.nPage*(i64)szPage; + testcase( IS_BIG_INT(szDb) ); + rc = sqlite3OsTruncate(pWal->pDbFd, szDb); + if( rc==SQLITE_OK && sync_flags ){ + rc = sqlite3OsSync(pWal->pDbFd, sync_flags); + } + } + if( rc==SQLITE_OK ){ + pInfo->nBackfill = mxSafeFrame; + } + } + + /* Release the reader lock held while backfilling */ + walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); + } + + if( rc==SQLITE_BUSY ){ + /* Reset the return code so as not to report a checkpoint failure + ** just because there are active readers. */ + rc = SQLITE_OK; + } + } + + /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the + ** entire wal file has been copied into the database file, then block + ** until all readers have finished using the wal file. This ensures that + ** the next process to write to the database restarts the wal file. + */ + if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + assert( pWal->writeLock ); + if( pInfo->nBackfill<pWal->hdr.mxFrame ){ + rc = SQLITE_BUSY; + }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){ + u32 salt1; + sqlite3_randomness(4, &salt1); + assert( pInfo->nBackfill==pWal->hdr.mxFrame ); + rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); + if( rc==SQLITE_OK ){ + if( eMode==SQLITE_CHECKPOINT_TRUNCATE ){ + /* IMPLEMENTATION-OF: R-44699-57140 This mode works the same way as + ** SQLITE_CHECKPOINT_RESTART with the addition that it also + ** truncates the log file to zero bytes just prior to a + ** successful return. + ** + ** In theory, it might be safe to do this without updating the + ** wal-index header in shared memory, as all subsequent reader or + ** writer clients should see that the entire log file has been + ** checkpointed and behave accordingly. This seems unsafe though, + ** as it would leave the system in a state where the contents of + ** the wal-index header do not match the contents of the + ** file-system. To avoid this, update the wal-index header to + ** indicate that the log file contains zero valid frames. */ + walRestartHdr(pWal, salt1); + rc = sqlite3OsTruncate(pWal->pWalFd, 0); + } + walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); + } + } + } + + walcheckpoint_out: + walIteratorFree(pIter); + return rc; +} + +/* +** If the WAL file is currently larger than nMax bytes in size, truncate +** it to exactly nMax bytes. If an error occurs while doing so, ignore it. +*/ +static void walLimitSize(Wal *pWal, i64 nMax){ + i64 sz; + int rx; + rx = sqlite3OsFileSize(pWal->pWalFd, &sz); + if( rx==SQLITE_OK && (sz > nMax ) ){ + rx = sqlite3OsTruncate(pWal->pWalFd, nMax); + } + if( rx ){ + sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName); + } +} + +/* +** Close a connection to a log file. +*/ +SQLITE_PRIVATE int sqlite3WalClose( + Wal *pWal, /* Wal to close */ + int sync_flags, /* Flags to pass to OsSync() (or 0) */ + int nBuf, + u8 *zBuf /* Buffer of at least nBuf bytes */ +){ + int rc = SQLITE_OK; + if( pWal ){ + int isDelete = 0; /* True to unlink wal and wal-index files */ + + /* If an EXCLUSIVE lock can be obtained on the database file (using the + ** ordinary, rollback-mode locking methods, this guarantees that the + ** connection associated with this log file is the only connection to + ** the database. In this case checkpoint the database and unlink both + ** the wal and wal-index files. + ** + ** The EXCLUSIVE lock is not released before returning. + */ + rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE); + if( rc==SQLITE_OK ){ + if( pWal->exclusiveMode==WAL_NORMAL_MODE ){ + pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; + } + rc = sqlite3WalCheckpoint( + pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 + ); + if( rc==SQLITE_OK ){ + int bPersist = -1; + sqlite3OsFileControlHint( + pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist + ); + if( bPersist!=1 ){ + /* Try to delete the WAL file if the checkpoint completed and + ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal + ** mode (!bPersist) */ + isDelete = 1; + }else if( pWal->mxWalSize>=0 ){ + /* Try to truncate the WAL file to zero bytes if the checkpoint + ** completed and fsynced (rc==SQLITE_OK) and we are in persistent + ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a + ** non-negative value (pWal->mxWalSize>=0). Note that we truncate + ** to zero bytes as truncating to the journal_size_limit might + ** leave a corrupt WAL file on disk. */ + walLimitSize(pWal, 0); + } + } + } + + walIndexClose(pWal, isDelete); + sqlite3OsClose(pWal->pWalFd); + if( isDelete ){ + sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); + } + WALTRACE(("WAL%p: closed\n", pWal)); + sqlite3_free((void *)pWal->apWiData); + sqlite3_free(pWal); + } + return rc; +} + +/* +** Try to read the wal-index header. Return 0 on success and 1 if +** there is a problem. +** +** The wal-index is in shared memory. Another thread or process might +** be writing the header at the same time this procedure is trying to +** read it, which might result in inconsistency. A dirty read is detected +** by verifying that both copies of the header are the same and also by +** a checksum on the header. +** +** If and only if the read is consistent and the header is different from +** pWal->hdr, then pWal->hdr is updated to the content of the new header +** and *pChanged is set to 1. +** +** If the checksum cannot be verified return non-zero. If the header +** is read successfully and the checksum verified, return zero. +*/ +static int walIndexTryHdr(Wal *pWal, int *pChanged){ + u32 aCksum[2]; /* Checksum on the header content */ + WalIndexHdr h1, h2; /* Two copies of the header content */ + WalIndexHdr volatile *aHdr; /* Header in shared memory */ + + /* The first page of the wal-index must be mapped at this point. */ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + + /* Read the header. This might happen concurrently with a write to the + ** same area of shared memory on a different CPU in a SMP, + ** meaning it is possible that an inconsistent snapshot is read + ** from the file. If this happens, return non-zero. + ** + ** There are two copies of the header at the beginning of the wal-index. + ** When reading, read [0] first then [1]. Writes are in the reverse order. + ** Memory barriers are used to prevent the compiler or the hardware from + ** reordering the reads and writes. + */ + aHdr = walIndexHdr(pWal); + memcpy(&h1, (void *)&aHdr[0], sizeof(h1)); + walShmBarrier(pWal); + memcpy(&h2, (void *)&aHdr[1], sizeof(h2)); + + if( memcmp(&h1, &h2, sizeof(h1))!=0 ){ + return 1; /* Dirty read */ + } + if( h1.isInit==0 ){ + return 1; /* Malformed header - probably all zeros */ + } + walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum); + if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){ + return 1; /* Checksum does not match */ + } + + if( memcmp(&pWal->hdr, &h1, sizeof(WalIndexHdr)) ){ + *pChanged = 1; + memcpy(&pWal->hdr, &h1, sizeof(WalIndexHdr)); + pWal->szPage = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); + testcase( pWal->szPage<=32768 ); + testcase( pWal->szPage>=65536 ); + } + + /* The header was successfully read. Return zero. */ + return 0; +} + +/* +** Read the wal-index header from the wal-index and into pWal->hdr. +** If the wal-header appears to be corrupt, try to reconstruct the +** wal-index from the WAL before returning. +** +** Set *pChanged to 1 if the wal-index header value in pWal->hdr is +** changed by this operation. If pWal->hdr is unchanged, set *pChanged +** to 0. +** +** If the wal-index header is successfully read, return SQLITE_OK. +** Otherwise an SQLite error code. +*/ +static int walIndexReadHdr(Wal *pWal, int *pChanged){ + int rc; /* Return code */ + int badHdr; /* True if a header read failed */ + volatile u32 *page0; /* Chunk of wal-index containing header */ + + /* Ensure that page 0 of the wal-index (the page that contains the + ** wal-index header) is mapped. Return early if an error occurs here. + */ + assert( pChanged ); + rc = walIndexPage(pWal, 0, &page0); + if( rc!=SQLITE_OK ){ + return rc; + }; + assert( page0 || pWal->writeLock==0 ); + + /* If the first page of the wal-index has been mapped, try to read the + ** wal-index header immediately, without holding any lock. This usually + ** works, but may fail if the wal-index header is corrupt or currently + ** being modified by another thread or process. + */ + badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1); + + /* If the first attempt failed, it might have been due to a race + ** with a writer. So get a WRITE lock and try again. + */ + assert( badHdr==0 || pWal->writeLock==0 ); + if( badHdr ){ + if( pWal->readOnly & WAL_SHM_RDONLY ){ + if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ + walUnlockShared(pWal, WAL_WRITE_LOCK); + rc = SQLITE_READONLY_RECOVERY; + } + }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 1)) ){ + pWal->writeLock = 1; + if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ + badHdr = walIndexTryHdr(pWal, pChanged); + if( badHdr ){ + /* If the wal-index header is still malformed even while holding + ** a WRITE lock, it can only mean that the header is corrupted and + ** needs to be reconstructed. So run recovery to do exactly that. + */ + rc = walIndexRecover(pWal); + *pChanged = 1; + } + } + pWal->writeLock = 0; + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + } + } + + /* If the header is read successfully, check the version number to make + ** sure the wal-index was not constructed with some future format that + ** this version of SQLite cannot understand. + */ + if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ + rc = SQLITE_CANTOPEN_BKPT; + } + + return rc; +} + +/* +** This is the value that walTryBeginRead returns when it needs to +** be retried. +*/ +#define WAL_RETRY (-1) + +/* +** Attempt to start a read transaction. This might fail due to a race or +** other transient condition. When that happens, it returns WAL_RETRY to +** indicate to the caller that it is safe to retry immediately. +** +** On success return SQLITE_OK. On a permanent failure (such an +** I/O error or an SQLITE_BUSY because another process is running +** recovery) return a positive error code. +** +** The useWal parameter is true to force the use of the WAL and disable +** the case where the WAL is bypassed because it has been completely +** checkpointed. If useWal==0 then this routine calls walIndexReadHdr() +** to make a copy of the wal-index header into pWal->hdr. If the +** wal-index header has changed, *pChanged is set to 1 (as an indication +** to the caller that the local paget cache is obsolete and needs to be +** flushed.) When useWal==1, the wal-index header is assumed to already +** be loaded and the pChanged parameter is unused. +** +** The caller must set the cnt parameter to the number of prior calls to +** this routine during the current read attempt that returned WAL_RETRY. +** This routine will start taking more aggressive measures to clear the +** race conditions after multiple WAL_RETRY returns, and after an excessive +** number of errors will ultimately return SQLITE_PROTOCOL. The +** SQLITE_PROTOCOL return indicates that some other process has gone rogue +** and is not honoring the locking protocol. There is a vanishingly small +** chance that SQLITE_PROTOCOL could be returned because of a run of really +** bad luck when there is lots of contention for the wal-index, but that +** possibility is so small that it can be safely neglected, we believe. +** +** On success, this routine obtains a read lock on +** WAL_READ_LOCK(pWal->readLock). The pWal->readLock integer is +** in the range 0 <= pWal->readLock < WAL_NREADER. If pWal->readLock==(-1) +** that means the Wal does not hold any read lock. The reader must not +** access any database page that is modified by a WAL frame up to and +** including frame number aReadMark[pWal->readLock]. The reader will +** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0 +** Or if pWal->readLock==0, then the reader will ignore the WAL +** completely and get all content directly from the database file. +** If the useWal parameter is 1 then the WAL will never be ignored and +** this routine will always set pWal->readLock>0 on success. +** When the read transaction is completed, the caller must release the +** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1. +** +** This routine uses the nBackfill and aReadMark[] fields of the header +** to select a particular WAL_READ_LOCK() that strives to let the +** checkpoint process do as much work as possible. This routine might +** update values of the aReadMark[] array in the header, but if it does +** so it takes care to hold an exclusive lock on the corresponding +** WAL_READ_LOCK() while changing values. +*/ +static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ + volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ + u32 mxReadMark; /* Largest aReadMark[] value */ + int mxI; /* Index of largest aReadMark[] value */ + int i; /* Loop counter */ + int rc = SQLITE_OK; /* Return code */ + + assert( pWal->readLock<0 ); /* Not currently locked */ + + /* Take steps to avoid spinning forever if there is a protocol error. + ** + ** Circumstances that cause a RETRY should only last for the briefest + ** instances of time. No I/O or other system calls are done while the + ** locks are held, so the locks should not be held for very long. But + ** if we are unlucky, another process that is holding a lock might get + ** paged out or take a page-fault that is time-consuming to resolve, + ** during the few nanoseconds that it is holding the lock. In that case, + ** it might take longer than normal for the lock to free. + ** + ** After 5 RETRYs, we begin calling sqlite3OsSleep(). The first few + ** calls to sqlite3OsSleep() have a delay of 1 microsecond. Really this + ** is more of a scheduler yield than an actual delay. But on the 10th + ** an subsequent retries, the delays start becoming longer and longer, + ** so that on the 100th (and last) RETRY we delay for 323 milliseconds. + ** The total delay time before giving up is less than 10 seconds. + */ + if( cnt>5 ){ + int nDelay = 1; /* Pause time in microseconds */ + if( cnt>100 ){ + VVA_ONLY( pWal->lockError = 1; ) + return SQLITE_PROTOCOL; + } + if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39; + sqlite3OsSleep(pWal->pVfs, nDelay); + } + + if( !useWal ){ + rc = walIndexReadHdr(pWal, pChanged); + if( rc==SQLITE_BUSY ){ + /* If there is not a recovery running in another thread or process + ** then convert BUSY errors to WAL_RETRY. If recovery is known to + ** be running, convert BUSY to BUSY_RECOVERY. There is a race here + ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY + ** would be technically correct. But the race is benign since with + ** WAL_RETRY this routine will be called again and will probably be + ** right on the second iteration. + */ + if( pWal->apWiData[0]==0 ){ + /* This branch is taken when the xShmMap() method returns SQLITE_BUSY. + ** We assume this is a transient condition, so return WAL_RETRY. The + ** xShmMap() implementation used by the default unix and win32 VFS + ** modules may return SQLITE_BUSY due to a race condition in the + ** code that determines whether or not the shared-memory region + ** must be zeroed before the requested page is returned. + */ + rc = WAL_RETRY; + }else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_RECOVER_LOCK)) ){ + walUnlockShared(pWal, WAL_RECOVER_LOCK); + rc = WAL_RETRY; + }else if( rc==SQLITE_BUSY ){ + rc = SQLITE_BUSY_RECOVERY; + } + } + if( rc!=SQLITE_OK ){ + return rc; + } + } + + pInfo = walCkptInfo(pWal); + if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){ + /* The WAL has been completely backfilled (or it is empty). + ** and can be safely ignored. + */ + rc = walLockShared(pWal, WAL_READ_LOCK(0)); + walShmBarrier(pWal); + if( rc==SQLITE_OK ){ + if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ + /* It is not safe to allow the reader to continue here if frames + ** may have been appended to the log before READ_LOCK(0) was obtained. + ** When holding READ_LOCK(0), the reader ignores the entire log file, + ** which implies that the database file contains a trustworthy + ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from + ** happening, this is usually correct. + ** + ** However, if frames have been appended to the log (or if the log + ** is wrapped and written for that matter) before the READ_LOCK(0) + ** is obtained, that is not necessarily true. A checkpointer may + ** have started to backfill the appended frames but crashed before + ** it finished. Leaving a corrupt image in the database file. + */ + walUnlockShared(pWal, WAL_READ_LOCK(0)); + return WAL_RETRY; + } + pWal->readLock = 0; + return SQLITE_OK; + }else if( rc!=SQLITE_BUSY ){ + return rc; + } + } + + /* If we get this far, it means that the reader will want to use + ** the WAL to get at content from recent commits. The job now is + ** to select one of the aReadMark[] entries that is closest to + ** but not exceeding pWal->hdr.mxFrame and lock that entry. + */ + mxReadMark = 0; + mxI = 0; + for(i=1; i<WAL_NREADER; i++){ + u32 thisMark = pInfo->aReadMark[i]; + if( mxReadMark<=thisMark && thisMark<=pWal->hdr.mxFrame ){ + assert( thisMark!=READMARK_NOT_USED ); + mxReadMark = thisMark; + mxI = i; + } + } + /* There was once an "if" here. The extra "{" is to preserve indentation. */ + { + if( (pWal->readOnly & WAL_SHM_RDONLY)==0 + && (mxReadMark<pWal->hdr.mxFrame || mxI==0) + ){ + for(i=1; i<WAL_NREADER; i++){ + rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1, 0); + if( rc==SQLITE_OK ){ + mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame; + mxI = i; + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + break; + }else if( rc!=SQLITE_BUSY ){ + return rc; + } + } + } + if( mxI==0 ){ + assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); + return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK; + } + + rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); + if( rc ){ + return rc==SQLITE_BUSY ? WAL_RETRY : rc; + } + /* Now that the read-lock has been obtained, check that neither the + ** value in the aReadMark[] array or the contents of the wal-index + ** header have changed. + ** + ** It is necessary to check that the wal-index header did not change + ** between the time it was read and when the shared-lock was obtained + ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility + ** that the log file may have been wrapped by a writer, or that frames + ** that occur later in the log than pWal->hdr.mxFrame may have been + ** copied into the database by a checkpointer. If either of these things + ** happened, then reading the database with the current value of + ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry + ** instead. + ** + ** Before checking that the live wal-index header has not changed + ** since it was read, set Wal.minFrame to the first frame in the wal + ** file that has not yet been checkpointed. This client will not need + ** to read any frames earlier than minFrame from the wal file - they + ** can be safely read directly from the database file. + ** + ** Because a ShmBarrier() call is made between taking the copy of + ** nBackfill and checking that the wal-header in shared-memory still + ** matches the one cached in pWal->hdr, it is guaranteed that the + ** checkpointer that set nBackfill was not working with a wal-index + ** header newer than that cached in pWal->hdr. If it were, that could + ** cause a problem. The checkpointer could omit to checkpoint + ** a version of page X that lies before pWal->minFrame (call that version + ** A) on the basis that there is a newer version (version B) of the same + ** page later in the wal file. But if version B happens to like past + ** frame pWal->hdr.mxFrame - then the client would incorrectly assume + ** that it can read version A from the database file. However, since + ** we can guarantee that the checkpointer that set nBackfill could not + ** see any pages past pWal->hdr.mxFrame, this problem does not come up. + */ + pWal->minFrame = pInfo->nBackfill+1; + walShmBarrier(pWal); + if( pInfo->aReadMark[mxI]!=mxReadMark + || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) + ){ + walUnlockShared(pWal, WAL_READ_LOCK(mxI)); + return WAL_RETRY; + }else{ + assert( mxReadMark<=pWal->hdr.mxFrame ); + pWal->readLock = (i16)mxI; + } + } + return rc; +} + +/* +** Begin a read transaction on the database. +** +** This routine used to be called sqlite3OpenSnapshot() and with good reason: +** it takes a snapshot of the state of the WAL and wal-index for the current +** instant in time. The current thread will continue to use this snapshot. +** Other threads might append new content to the WAL and wal-index but +** that extra content is ignored by the current thread. +** +** If the database contents have changes since the previous read +** transaction, then *pChanged is set to 1 before returning. The +** Pager layer will use this to know that is cache is stale and +** needs to be flushed. +*/ +SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ + int rc; /* Return code */ + int cnt = 0; /* Number of TryBeginRead attempts */ + + do{ + rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); + }while( rc==WAL_RETRY ); + testcase( (rc&0xff)==SQLITE_BUSY ); + testcase( (rc&0xff)==SQLITE_IOERR ); + testcase( rc==SQLITE_PROTOCOL ); + testcase( rc==SQLITE_OK ); + return rc; +} + +/* +** Finish with a read transaction. All this does is release the +** read-lock. +*/ +SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal){ + sqlite3WalEndWriteTransaction(pWal); + if( pWal->readLock>=0 ){ + walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + pWal->readLock = -1; + } +} + +/* +** Search the wal file for page pgno. If found, set *piRead to the frame that +** contains the page. Otherwise, if pgno is not in the wal file, set *piRead +** to zero. +** +** Return SQLITE_OK if successful, or an error code if an error occurs. If an +** error does occur, the final value of *piRead is undefined. +*/ +SQLITE_PRIVATE int sqlite3WalFindFrame( + Wal *pWal, /* WAL handle */ + Pgno pgno, /* Database page number to read data for */ + u32 *piRead /* OUT: Frame number (or zero) */ +){ + u32 iRead = 0; /* If !=0, WAL frame to return data from */ + u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ + int iHash; /* Used to loop through N hash tables */ + int iMinHash; + + /* This routine is only be called from within a read transaction. */ + assert( pWal->readLock>=0 || pWal->lockError ); + + /* If the "last page" field of the wal-index header snapshot is 0, then + ** no data will be read from the wal under any circumstances. Return early + ** in this case as an optimization. Likewise, if pWal->readLock==0, + ** then the WAL is ignored by the reader so return early, as if the + ** WAL were empty. + */ + if( iLast==0 || pWal->readLock==0 ){ + *piRead = 0; + return SQLITE_OK; + } + + /* Search the hash table or tables for an entry matching page number + ** pgno. Each iteration of the following for() loop searches one + ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). + ** + ** This code might run concurrently to the code in walIndexAppend() + ** that adds entries to the wal-index (and possibly to this hash + ** table). This means the value just read from the hash + ** slot (aHash[iKey]) may have been added before or after the + ** current read transaction was opened. Values added after the + ** read transaction was opened may have been written incorrectly - + ** i.e. these slots may contain garbage data. However, we assume + ** that any slots written before the current read transaction was + ** opened remain unmodified. + ** + ** For the reasons above, the if(...) condition featured in the inner + ** loop of the following block is more stringent that would be required + ** if we had exclusive access to the hash-table: + ** + ** (aPgno[iFrame]==pgno): + ** This condition filters out normal hash-table collisions. + ** + ** (iFrame<=iLast): + ** This condition filters out entries that were added to the hash + ** table after the current read-transaction had started. + */ + iMinHash = walFramePage(pWal->minFrame); + for(iHash=walFramePage(iLast); iHash>=iMinHash && iRead==0; iHash--){ + volatile ht_slot *aHash; /* Pointer to hash table */ + volatile u32 *aPgno; /* Pointer to array of page numbers */ + u32 iZero; /* Frame number corresponding to aPgno[0] */ + int iKey; /* Hash slot index */ + int nCollide; /* Number of hash collisions remaining */ + int rc; /* Error code */ + + rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero); + if( rc!=SQLITE_OK ){ + return rc; + } + nCollide = HASHTABLE_NSLOT; + for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ + u32 iFrame = aHash[iKey] + iZero; + if( iFrame<=iLast && iFrame>=pWal->minFrame && aPgno[aHash[iKey]]==pgno ){ + assert( iFrame>iRead || CORRUPT_DB ); + iRead = iFrame; + } + if( (nCollide--)==0 ){ + return SQLITE_CORRUPT_BKPT; + } + } + } + +#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT + /* If expensive assert() statements are available, do a linear search + ** of the wal-index file content. Make sure the results agree with the + ** result obtained using the hash indexes above. */ + { + u32 iRead2 = 0; + u32 iTest; + assert( pWal->minFrame>0 ); + for(iTest=iLast; iTest>=pWal->minFrame; iTest--){ + if( walFramePgno(pWal, iTest)==pgno ){ + iRead2 = iTest; + break; + } + } + assert( iRead==iRead2 ); + } +#endif + + *piRead = iRead; + return SQLITE_OK; +} + +/* +** Read the contents of frame iRead from the wal file into buffer pOut +** (which is nOut bytes in size). Return SQLITE_OK if successful, or an +** error code otherwise. +*/ +SQLITE_PRIVATE int sqlite3WalReadFrame( + Wal *pWal, /* WAL handle */ + u32 iRead, /* Frame to read */ + int nOut, /* Size of buffer pOut in bytes */ + u8 *pOut /* Buffer to write page data to */ +){ + int sz; + i64 iOffset; + sz = pWal->hdr.szPage; + sz = (sz&0xfe00) + ((sz&0x0001)<<16); + testcase( sz<=32768 ); + testcase( sz>=65536 ); + iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; + /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ + return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); +} + +/* +** Return the size of the database in pages (or zero, if unknown). +*/ +SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal){ + if( pWal && ALWAYS(pWal->readLock>=0) ){ + return pWal->hdr.nPage; + } + return 0; +} + + +/* +** This function starts a write transaction on the WAL. +** +** A read transaction must have already been started by a prior call +** to sqlite3WalBeginReadTransaction(). +** +** If another thread or process has written into the database since +** the read transaction was started, then it is not possible for this +** thread to write as doing so would cause a fork. So this routine +** returns SQLITE_BUSY in that case and no write transaction is started. +** +** There can only be a single writer active at a time. +*/ +SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal){ + int rc; + + /* Cannot start a write transaction without first holding a read + ** transaction. */ + assert( pWal->readLock>=0 ); + + if( pWal->readOnly ){ + return SQLITE_READONLY; + } + + /* Only one writer allowed at a time. Get the write lock. Return + ** SQLITE_BUSY if unable. + */ + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 0); + if( rc ){ + return rc; + } + pWal->writeLock = 1; + + /* If another connection has written to the database file since the + ** time the read transaction on this connection was started, then + ** the write is disallowed. + */ + if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + rc = SQLITE_BUSY_SNAPSHOT; + } + + return rc; +} + +/* +** End a write transaction. The commit has already been done. This +** routine merely releases the lock. +*/ +SQLITE_PRIVATE int sqlite3WalEndWriteTransaction(Wal *pWal){ + if( pWal->writeLock ){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + pWal->truncateOnCommit = 0; + } + return SQLITE_OK; +} + +/* +** If any data has been written (but not committed) to the log file, this +** function moves the write-pointer back to the start of the transaction. +** +** Additionally, the callback function is invoked for each frame written +** to the WAL since the start of the transaction. If the callback returns +** other than SQLITE_OK, it is not invoked again and the error code is +** returned to the caller. +** +** Otherwise, if the callback function does not return an error, this +** function returns SQLITE_OK. +*/ +SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ + int rc = SQLITE_OK; + if( ALWAYS(pWal->writeLock) ){ + Pgno iMax = pWal->hdr.mxFrame; + Pgno iFrame; + + /* Restore the clients cache of the wal-index header to the state it + ** was in before the client began writing to the database. + */ + memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr)); + + for(iFrame=pWal->hdr.mxFrame+1; + ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; + iFrame++ + ){ + /* This call cannot fail. Unless the page for which the page number + ** is passed as the second argument is (a) in the cache and + ** (b) has an outstanding reference, then xUndo is either a no-op + ** (if (a) is false) or simply expels the page from the cache (if (b) + ** is false). + ** + ** If the upper layer is doing a rollback, it is guaranteed that there + ** are no outstanding references to any page other than page 1. And + ** page 1 is never written to the log until the transaction is + ** committed. As a result, the call to xUndo may not fail. + */ + assert( walFramePgno(pWal, iFrame)!=1 ); + rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); + } + if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal); + } + return rc; +} + +/* +** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 +** values. This function populates the array with values required to +** "rollback" the write position of the WAL handle back to the current +** point in the event of a savepoint rollback (via WalSavepointUndo()). +*/ +SQLITE_PRIVATE void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ + assert( pWal->writeLock ); + aWalData[0] = pWal->hdr.mxFrame; + aWalData[1] = pWal->hdr.aFrameCksum[0]; + aWalData[2] = pWal->hdr.aFrameCksum[1]; + aWalData[3] = pWal->nCkpt; +} + +/* +** Move the write position of the WAL back to the point identified by +** the values in the aWalData[] array. aWalData must point to an array +** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated +** by a call to WalSavepoint(). +*/ +SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ + int rc = SQLITE_OK; + + assert( pWal->writeLock ); + assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame ); + + if( aWalData[3]!=pWal->nCkpt ){ + /* This savepoint was opened immediately after the write-transaction + ** was started. Right after that, the writer decided to wrap around + ** to the start of the log. Update the savepoint values to match. + */ + aWalData[0] = 0; + aWalData[3] = pWal->nCkpt; + } + + if( aWalData[0]<pWal->hdr.mxFrame ){ + pWal->hdr.mxFrame = aWalData[0]; + pWal->hdr.aFrameCksum[0] = aWalData[1]; + pWal->hdr.aFrameCksum[1] = aWalData[2]; + walCleanupHash(pWal); + } + + return rc; +} + +/* +** This function is called just before writing a set of frames to the log +** file (see sqlite3WalFrames()). It checks to see if, instead of appending +** to the current log file, it is possible to overwrite the start of the +** existing log file with the new frames (i.e. "reset" the log). If so, +** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left +** unchanged. +** +** SQLITE_OK is returned if no error is encountered (regardless of whether +** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned +** if an error occurs. +*/ +static int walRestartLog(Wal *pWal){ + int rc = SQLITE_OK; + int cnt; + + if( pWal->readLock==0 ){ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + assert( pInfo->nBackfill==pWal->hdr.mxFrame ); + if( pInfo->nBackfill>0 ){ + u32 salt1; + sqlite3_randomness(4, &salt1); + rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1, 0); + if( rc==SQLITE_OK ){ + /* If all readers are using WAL_READ_LOCK(0) (in other words if no + ** readers are currently using the WAL), then the transactions + ** frames will overwrite the start of the existing log. Update the + ** wal-index header to reflect this. + ** + ** In theory it would be Ok to update the cache of the header only + ** at this point. But updating the actual wal-index header is also + ** safe and means there is no special case for sqlite3WalUndo() + ** to handle if this transaction is rolled back. */ + walRestartHdr(pWal, salt1); + walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); + }else if( rc!=SQLITE_BUSY ){ + return rc; + } + } + walUnlockShared(pWal, WAL_READ_LOCK(0)); + pWal->readLock = -1; + cnt = 0; + do{ + int notUsed; + rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); + }while( rc==WAL_RETRY ); + assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ + testcase( (rc&0xff)==SQLITE_IOERR ); + testcase( rc==SQLITE_PROTOCOL ); + testcase( rc==SQLITE_OK ); + } + return rc; +} + +/* +** Information about the current state of the WAL file and where +** the next fsync should occur - passed from sqlite3WalFrames() into +** walWriteToLog(). +*/ +typedef struct WalWriter { + Wal *pWal; /* The complete WAL information */ + sqlite3_file *pFd; /* The WAL file to which we write */ + sqlite3_int64 iSyncPoint; /* Fsync at this offset */ + int syncFlags; /* Flags for the fsync */ + int szPage; /* Size of one page */ +} WalWriter; + +/* +** Write iAmt bytes of content into the WAL file beginning at iOffset. +** Do a sync when crossing the p->iSyncPoint boundary. +** +** In other words, if iSyncPoint is in between iOffset and iOffset+iAmt, +** first write the part before iSyncPoint, then sync, then write the +** rest. +*/ +static int walWriteToLog( + WalWriter *p, /* WAL to write to */ + void *pContent, /* Content to be written */ + int iAmt, /* Number of bytes to write */ + sqlite3_int64 iOffset /* Start writing at this offset */ +){ + int rc; + if( iOffset<p->iSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ + int iFirstAmt = (int)(p->iSyncPoint - iOffset); + rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); + if( rc ) return rc; + iOffset += iFirstAmt; + iAmt -= iFirstAmt; + pContent = (void*)(iFirstAmt + (char*)pContent); + assert( p->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) ); + rc = sqlite3OsSync(p->pFd, p->syncFlags & SQLITE_SYNC_MASK); + if( iAmt==0 || rc ) return rc; + } + rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); + return rc; +} + +/* +** Write out a single frame of the WAL +*/ +static int walWriteOneFrame( + WalWriter *p, /* Where to write the frame */ + PgHdr *pPage, /* The page of the frame to be written */ + int nTruncate, /* The commit flag. Usually 0. >0 for commit */ + sqlite3_int64 iOffset /* Byte offset at which to write */ +){ + int rc; /* Result code from subfunctions */ + void *pData; /* Data actually written */ + u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ +#if defined(SQLITE_HAS_CODEC) + if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM; +#else + pData = pPage->pData; +#endif + walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); + rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); + if( rc ) return rc; + /* Write the page data */ + rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); + return rc; +} + +/* +** Write a set of frames to the log. The caller must hold the write-lock +** on the log file (obtained using sqlite3WalBeginWriteTransaction()). +*/ +SQLITE_PRIVATE int sqlite3WalFrames( + Wal *pWal, /* Wal handle to write to */ + int szPage, /* Database page-size in bytes */ + PgHdr *pList, /* List of dirty pages to write */ + Pgno nTruncate, /* Database size after this commit */ + int isCommit, /* True if this is a commit */ + int sync_flags /* Flags to pass to OsSync() (or 0) */ +){ + int rc; /* Used to catch return codes */ + u32 iFrame; /* Next frame address */ + PgHdr *p; /* Iterator to run through pList with. */ + PgHdr *pLast = 0; /* Last frame in list */ + int nExtra = 0; /* Number of extra copies of last page */ + int szFrame; /* The size of a single frame */ + i64 iOffset; /* Next byte to write in WAL file */ + WalWriter w; /* The writer */ + + assert( pList ); + assert( pWal->writeLock ); + + /* If this frame set completes a transaction, then nTruncate>0. If + ** nTruncate==0 then this frame set does not complete the transaction. */ + assert( (isCommit!=0)==(nTruncate!=0) ); + +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) + { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} + WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", + pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); + } +#endif + + /* See if it is possible to write these frames into the start of the + ** log file, instead of appending to it at pWal->hdr.mxFrame. + */ + if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ + return rc; + } + + /* If this is the first frame written into the log, write the WAL + ** header to the start of the WAL file. See comments at the top of + ** this source file for a description of the WAL header format. + */ + iFrame = pWal->hdr.mxFrame; + if( iFrame==0 ){ + u8 aWalHdr[WAL_HDRSIZE]; /* Buffer to assemble wal-header in */ + u32 aCksum[2]; /* Checksum for wal-header */ + + sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN)); + sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION); + sqlite3Put4byte(&aWalHdr[8], szPage); + sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt); + if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt); + memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8); + walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum); + sqlite3Put4byte(&aWalHdr[24], aCksum[0]); + sqlite3Put4byte(&aWalHdr[28], aCksum[1]); + + pWal->szPage = szPage; + pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN; + pWal->hdr.aFrameCksum[0] = aCksum[0]; + pWal->hdr.aFrameCksum[1] = aCksum[1]; + pWal->truncateOnCommit = 1; + + rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); + WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless + ** all syncing is turned off by PRAGMA synchronous=OFF). Otherwise + ** an out-of-order write following a WAL restart could result in + ** database corruption. See the ticket: + ** + ** http://localhost:591/sqlite/info/ff5be73dee + */ + if( pWal->syncHeader && sync_flags ){ + rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK); + if( rc ) return rc; + } + } + assert( (int)pWal->szPage==szPage ); + + /* Setup information needed to write frames into the WAL */ + w.pWal = pWal; + w.pFd = pWal->pWalFd; + w.iSyncPoint = 0; + w.syncFlags = sync_flags; + w.szPage = szPage; + iOffset = walFrameOffset(iFrame+1, szPage); + szFrame = szPage + WAL_FRAME_HDRSIZE; + + /* Write all frames into the log file exactly once */ + for(p=pList; p; p=p->pDirty){ + int nDbSize; /* 0 normally. Positive == commit flag */ + iFrame++; + assert( iOffset==walFrameOffset(iFrame, szPage) ); + nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; + rc = walWriteOneFrame(&w, p, nDbSize, iOffset); + if( rc ) return rc; + pLast = p; + iOffset += szFrame; + } + + /* If this is the end of a transaction, then we might need to pad + ** the transaction and/or sync the WAL file. + ** + ** Padding and syncing only occur if this set of frames complete a + ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL + ** or synchronous==OFF, then no padding or syncing are needed. + ** + ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not + ** needed and only the sync is done. If padding is needed, then the + ** final frame is repeated (with its commit mark) until the next sector + ** boundary is crossed. Only the part of the WAL prior to the last + ** sector boundary is synced; the part of the last frame that extends + ** past the sector boundary is written after the sync. + */ + if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){ + if( pWal->padToSectorBoundary ){ + int sectorSize = sqlite3SectorSize(pWal->pWalFd); + w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; + while( iOffset<w.iSyncPoint ){ + rc = walWriteOneFrame(&w, pLast, nTruncate, iOffset); + if( rc ) return rc; + iOffset += szFrame; + nExtra++; + } + }else{ + rc = sqlite3OsSync(w.pFd, sync_flags & SQLITE_SYNC_MASK); + } + } + + /* If this frame set completes the first transaction in the WAL and + ** if PRAGMA journal_size_limit is set, then truncate the WAL to the + ** journal size limit, if possible. + */ + if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){ + i64 sz = pWal->mxWalSize; + if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ + sz = walFrameOffset(iFrame+nExtra+1, szPage); + } + walLimitSize(pWal, sz); + pWal->truncateOnCommit = 0; + } + + /* Append data to the wal-index. It is not necessary to lock the + ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index + ** guarantees that there are no other writers, and no data that may + ** be in use by existing readers is being overwritten. + */ + iFrame = pWal->hdr.mxFrame; + for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ + iFrame++; + rc = walIndexAppend(pWal, iFrame, p->pgno); + } + while( rc==SQLITE_OK && nExtra>0 ){ + iFrame++; + nExtra--; + rc = walIndexAppend(pWal, iFrame, pLast->pgno); + } + + if( rc==SQLITE_OK ){ + /* Update the private copy of the header. */ + pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); + testcase( szPage<=32768 ); + testcase( szPage>=65536 ); + pWal->hdr.mxFrame = iFrame; + if( isCommit ){ + pWal->hdr.iChange++; + pWal->hdr.nPage = nTruncate; + } + /* If this is a commit, update the wal-index header too. */ + if( isCommit ){ + walIndexWriteHdr(pWal); + pWal->iCallback = iFrame; + } + } + + WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); + return rc; +} + +/* +** This routine is called to implement sqlite3_wal_checkpoint() and +** related interfaces. +** +** Obtain a CHECKPOINT lock and then backfill as much information as +** we can from WAL into the database. +** +** If parameter xBusy is not NULL, it is a pointer to a busy-handler +** callback. In this case this function runs a blocking checkpoint. +*/ +SQLITE_PRIVATE int sqlite3WalCheckpoint( + Wal *pWal, /* Wal connection */ + int eMode, /* PASSIVE, FULL, RESTART, or TRUNCATE */ + int (*xBusy)(void*), /* Function to call when busy */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int sync_flags, /* Flags to sync db file with (or 0) */ + int nBuf, /* Size of temporary buffer */ + u8 *zBuf, /* Temporary buffer to use */ + int *pnLog, /* OUT: Number of frames in WAL */ + int *pnCkpt /* OUT: Number of backfilled frames in WAL */ +){ + int rc; /* Return code */ + int isChanged = 0; /* True if a new wal-index header is loaded */ + int eMode2 = eMode; /* Mode to pass to walCheckpoint() */ + int (*xBusy2)(void*) = xBusy; /* Busy handler for eMode2 */ + + assert( pWal->ckptLock==0 ); + assert( pWal->writeLock==0 ); + + /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked + ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ + assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); + + if( pWal->readOnly ) return SQLITE_READONLY; + WALTRACE(("WAL%p: checkpoint begins\n", pWal)); + + /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive + ** "checkpoint" lock on the database file. */ + rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1, 0); + if( rc ){ + /* EVIDENCE-OF: R-10421-19736 If any other process is running a + ** checkpoint operation at the same time, the lock cannot be obtained and + ** SQLITE_BUSY is returned. + ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured, + ** it will not be invoked in this case. + */ + testcase( rc==SQLITE_BUSY ); + testcase( xBusy!=0 ); + return rc; + } + pWal->ckptLock = 1; + + /* IMPLEMENTATION-OF: R-59782-36818 The SQLITE_CHECKPOINT_FULL, RESTART and + ** TRUNCATE modes also obtain the exclusive "writer" lock on the database + ** file. + ** + ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained + ** immediately, and a busy-handler is configured, it is invoked and the + ** writer lock retried until either the busy-handler returns 0 or the + ** lock is successfully obtained. + */ + if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1); + if( rc==SQLITE_OK ){ + pWal->writeLock = 1; + }else if( rc==SQLITE_BUSY ){ + eMode2 = SQLITE_CHECKPOINT_PASSIVE; + xBusy2 = 0; + rc = SQLITE_OK; + } + } + + /* Read the wal-index header. */ + if( rc==SQLITE_OK ){ + rc = walIndexReadHdr(pWal, &isChanged); + if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ + sqlite3OsUnfetch(pWal->pDbFd, 0, 0); + } + } + + /* Copy data from the log to the database file. */ + if( rc==SQLITE_OK ){ + if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + rc = walCheckpoint(pWal, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); + } + + /* If no error occurred, set the output variables. */ + if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ + if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; + if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill); + } + } + + if( isChanged ){ + /* If a new wal-index header was loaded before the checkpoint was + ** performed, then the pager-cache associated with pWal is now + ** out of date. So zero the cached wal-index header to ensure that + ** next time the pager opens a snapshot on this database it knows that + ** the cache needs to be reset. + */ + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); + } + + /* Release the locks. */ + sqlite3WalEndWriteTransaction(pWal); + walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); + pWal->ckptLock = 0; + WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok")); + return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc); +} + +#if 0 +/* Return the value to pass to a sqlite3_wal_hook callback, the +** number of frames in the WAL at the point of the last commit since +** sqlite3WalCallback() was called. If no commits have occurred since +** the last call, then return 0. +*/ +SQLITE_PRIVATE int sqlite3WalCallback(Wal *pWal){ + u32 ret = 0; + if( pWal ){ + ret = pWal->iCallback; + pWal->iCallback = 0; + } + return (int)ret; +} +#endif + +/* +** This function is called to change the WAL subsystem into or out +** of locking_mode=EXCLUSIVE. +** +** If op is zero, then attempt to change from locking_mode=EXCLUSIVE +** into locking_mode=NORMAL. This means that we must acquire a lock +** on the pWal->readLock byte. If the WAL is already in locking_mode=NORMAL +** or if the acquisition of the lock fails, then return 0. If the +** transition out of exclusive-mode is successful, return 1. This +** operation must occur while the pager is still holding the exclusive +** lock on the main database file. +** +** If op is one, then change from locking_mode=NORMAL into +** locking_mode=EXCLUSIVE. This means that the pWal->readLock must +** be released. Return 1 if the transition is made and 0 if the +** WAL is already in exclusive-locking mode - meaning that this +** routine is a no-op. The pager must already hold the exclusive lock +** on the main database file before invoking this operation. +** +** If op is negative, then do a dry-run of the op==1 case but do +** not actually change anything. The pager uses this to see if it +** should acquire the database exclusive lock prior to invoking +** the op==1 case. +*/ +SQLITE_PRIVATE int sqlite3WalExclusiveMode(Wal *pWal, int op){ + int rc; + assert( pWal->writeLock==0 ); + assert( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE || op==-1 ); + + /* pWal->readLock is usually set, but might be -1 if there was a + ** prior error while attempting to acquire are read-lock. This cannot + ** happen if the connection is actually in exclusive mode (as no xShmLock + ** locks are taken in this case). Nor should the pager attempt to + ** upgrade to exclusive-mode following such an error. + */ + assert( pWal->readLock>=0 || pWal->lockError ); + assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) ); + + if( op==0 ){ + if( pWal->exclusiveMode ){ + pWal->exclusiveMode = 0; + if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){ + pWal->exclusiveMode = 1; + } + rc = pWal->exclusiveMode==0; + }else{ + /* Already in locking_mode=NORMAL */ + rc = 0; + } + }else if( op>0 ){ + assert( pWal->exclusiveMode==0 ); + assert( pWal->readLock>=0 ); + walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + pWal->exclusiveMode = 1; + rc = 1; + }else{ + rc = pWal->exclusiveMode==0; + } + return rc; +} + +#if 0 +/* +** Return true if the argument is non-NULL and the WAL module is using +** heap-memory for the wal-index. Otherwise, if the argument is NULL or the +** WAL module is using shared-memory, return false. +*/ +SQLITE_PRIVATE int sqlite3WalHeapMemory(Wal *pWal){ + return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ); +} +#endif + +#ifdef SQLITE_ENABLE_ZIPVFS +/* +** If the argument is not NULL, it points to a Wal object that holds a +** read-lock. This function returns the database page-size if it is known, +** or zero if it is not (or if pWal is NULL). +*/ +SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal){ + assert( pWal==0 || pWal->readLock>=0 ); + return (pWal ? pWal->szPage : 0); +} +#endif + +#endif /* #ifndef SQLITE_OMIT_WAL */ + +/************** End of wal.c *************************************************/ +/************** Begin file btmutex.c *****************************************/ +/* +** 2007 August 27 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code used to implement mutexes on Btree objects. +** This code really belongs in btree.c. But btree.c is getting too +** big and we want to break it down some. This packaged seemed like +** a good breakout. +*/ +/* #include "btreeInt.h" */ +#ifndef SQLITE_OMIT_SHARED_CACHE +#if SQLITE_THREADSAFE + +/* +** Obtain the BtShared mutex associated with B-Tree handle p. Also, +** set BtShared.db to the database handle associated with p and the +** p->locked boolean to true. +*/ +static void lockBtreeMutex(Btree *p){ + assert( p->locked==0 ); + assert( sqlite3_mutex_notheld(p->pBt->mutex) ); + assert( sqlite3_mutex_held(p->mutex) ); + + sqlite3_mutex_enter(p->pBt->mutex); + p->pBt->pBt = p; + p->locked = 1; +} + +/* +** Release the BtShared mutex associated with B-Tree handle p and +** clear the p->locked boolean. +*/ +static void SQLITE_NOINLINE unlockBtreeMutex(Btree *p){ + BtShared *pBt = p->pBt; + assert( p->locked==1 ); + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( sqlite3_mutex_held(p->db->mutex) ); + assert( p->db==pBt->db ); + + sqlite3_mutex_leave(pBt->mutex); + p->locked = 0; +} + +/* Forward reference */ +static void SQLITE_NOINLINE btreeLockCarefully(Btree *p); + +/* +** Enter a mutex on the given BTree object. +** +** If the object is not sharable, then no mutex is ever required +** and this routine is a no-op. The underlying mutex is non-recursive. +** But we keep a reference count in Btree.wantToLock so the behavior +** of this interface is recursive. +** +** To avoid deadlocks, multiple Btrees are locked in the same order +** by all database connections. The p->pNext is a list of other +** Btrees belonging to the same database connection as the p Btree +** which need to be locked after p. If we cannot get a lock on +** p, then first unlock all of the others on p->pNext, then wait +** for the lock to become available on p, then relock all of the +** subsequent Btrees that desire a lock. +*/ +SQLITE_API void SQLITE_STDCALL sqlite3BtreeEnter(Btree *p){ + /* Some basic sanity checking on the Btree. The list of Btrees + ** connected by pNext and pPrev should be in sorted order by + ** Btree.pBt value. All elements of the list should belong to + ** the same connection. Only shared Btrees are on the list. */ + assert( p->pNext==0 || p->pNext->pBt>p->pBt ); + assert( p->pPrev==0 || p->pPrev->pBt<p->pBt ); + assert( p->pNext==0 || p->pNext->db==p->db ); + assert( p->pPrev==0 || p->pPrev->db==p->db ); + assert( p->sharable || (p->pNext==0 && p->pPrev==0) ); + + /* Check for locking consistency */ + assert( !p->locked || p->wantToLock>0 ); + assert( p->sharable || p->wantToLock==0 ); + + /* We should already hold a lock on the database connection */ + assert( sqlite3_mutex_held(p->db->mutex) ); + + /* Unless the database is sharable and unlocked, then BtShared.db + ** should already be set correctly. */ + assert( (p->locked==0 && p->sharable) || p->pBt->db==p->db ); + + if( !p->sharable ) return; + p->wantToLock++; + if( p->locked ) return; + btreeLockCarefully(p); +} + +/* This is a helper function for sqlite3BtreeLock(). By moving +** complex, but seldom used logic, out of sqlite3BtreeLock() and +** into this routine, we avoid unnecessary stack pointer changes +** and thus help the sqlite3BtreeLock() routine to run much faster +** in the common case. +*/ +static void SQLITE_NOINLINE btreeLockCarefully(Btree *p){ + Btree *pLater; + + /* In most cases, we should be able to acquire the lock we + ** want without having to go through the ascending lock + ** procedure that follows. Just be sure not to block. + */ + if( sqlite3_mutex_try(p->pBt->mutex)==SQLITE_OK ){ + p->pBt->pBt = p; + p->locked = 1; + return; + } + + /* To avoid deadlock, first release all locks with a larger + ** BtShared address. Then acquire our lock. Then reacquire + ** the other BtShared locks that we used to hold in ascending + ** order. + */ + for(pLater=p->pNext; pLater; pLater=pLater->pNext){ + assert( pLater->sharable ); + assert( pLater->pNext==0 || pLater->pNext->pBt>pLater->pBt ); + assert( !pLater->locked || pLater->wantToLock>0 ); + if( pLater->locked ){ + unlockBtreeMutex(pLater); + } + } + lockBtreeMutex(p); + for(pLater=p->pNext; pLater; pLater=pLater->pNext){ + if( pLater->wantToLock ){ + lockBtreeMutex(pLater); + } + } +} + + +/* +** Exit the recursive mutex on a Btree. +*/ +SQLITE_API void SQLITE_STDCALL sqlite3BtreeLeave(Btree *p){ + assert( sqlite3_mutex_held(p->db->mutex) ); + if( p->sharable ){ + assert( p->wantToLock>0 ); + p->wantToLock--; + if( p->wantToLock==0 ){ + unlockBtreeMutex(p); + } + } +} + +#ifndef NDEBUG +/* +** Return true if the BtShared mutex is held on the btree, or if the +** B-Tree is not marked as sharable. +** +** This routine is used only from within assert() statements. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeHoldsMutex(Btree *p){ + assert( p->sharable==0 || p->locked==0 || p->wantToLock>0 ); + assert( p->sharable==0 || p->locked==0 || p->db==p->pBt->db ); + assert( p->sharable==0 || p->locked==0 || sqlite3_mutex_held(p->pBt->mutex) ); + assert( p->sharable==0 || p->locked==0 || sqlite3_mutex_held(p->db->mutex) ); + + return (p->sharable==0 || p->locked); +} +#endif + + +#ifndef SQLITE_OMIT_INCRBLOB +/* +** Enter and leave a mutex on a Btree given a cursor owned by that +** Btree. These entry points are used by incremental I/O and can be +** omitted if that module is not used. +*/ +SQLITE_API void SQLITE_STDCALL sqlite3BtreeEnterCursor(BtCursor *pCur){ + sqlite3BtreeEnter(pCur->pBtree); +} +SQLITE_API void SQLITE_STDCALL sqlite3BtreeLeaveCursor(BtCursor *pCur){ + sqlite3BtreeLeave(pCur->pBtree); +} +#endif /* SQLITE_OMIT_INCRBLOB */ + +/* +** Return true if a particular Btree requires a lock. Return FALSE if +** no lock is ever required since it is not sharable. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSharable(Btree *p){ + return p->sharable; +} + +#else /* SQLITE_THREADSAFE>0 above. SQLITE_THREADSAFE==0 below */ +/* +** The following are special cases for mutex enter routines for use +** in single threaded applications that use shared cache. Except for +** these two routines, all mutex operations are no-ops in that case and +** are null #defines in btree.h. +** +** If shared cache is disabled, then all btree mutex routines, including +** the ones below, are no-ops and are null #defines in btree.h. +*/ + +SQLITE_API void SQLITE_STDCALL sqlite3BtreeEnter(Btree *p){ + p->pBt->db = p->db; +} +#endif /* if SQLITE_THREADSAFE */ +#endif /* ifndef SQLITE_OMIT_SHARED_CACHE */ + +/************** End of btmutex.c *********************************************/ +/************** Begin file btree.c *******************************************/ +/* +** 2004 April 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements an external (disk-based) database using BTrees. +** See the header comment on "btreeInt.h" for additional information. +** Including a description of file format and an overview of operation. +*/ +/* #include "btreeInt.h" */ + +/* +** The header string that appears at the beginning of every +** SQLite database. +*/ +static const char zMagicHeader[] = SQLITE_FILE_HEADER; + +/* +** Set this global variable to 1 to enable tracing using the TRACE +** macro. +*/ +#if 0 +int sqlite3BtreeTrace=1; /* True to enable tracing */ +# define TRACE(X) if(sqlite3BtreeTrace){printf X;fflush(stdout);} +#else +# define TRACE(X) +#endif + +/* +** Extract a 2-byte big-endian integer from an array of unsigned bytes. +** But if the value is zero, make it 65536. +** +** This routine is used to extract the "offset to cell content area" value +** from the header of a btree page. If the page size is 65536 and the page +** is empty, the offset should be 65536, but the 2-byte value stores zero. +** This routine makes the necessary adjustment to 65536. +*/ +#define get2byteNotZero(X) (((((int)get2byte(X))-1)&0xffff)+1) + +/* +** Values passed as the 5th argument to allocateBtreePage() +*/ +#define BTALLOC_ANY 0 /* Allocate any page */ +#define BTALLOC_EXACT 1 /* Allocate exact page if possible */ +#define BTALLOC_LE 2 /* Allocate any page <= the parameter */ + +/* +** Macro IfNotOmitAV(x) returns (x) if SQLITE_OMIT_AUTOVACUUM is not +** defined, or 0 if it is. For example: +** +** bIncrVacuum = IfNotOmitAV(pBtShared->incrVacuum); +*/ +#ifndef SQLITE_OMIT_AUTOVACUUM +#define IfNotOmitAV(expr) (expr) +#else +#define IfNotOmitAV(expr) 0 +#endif + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** A list of BtShared objects that are eligible for participation +** in shared cache. This variable has file scope during normal builds, +** but the test harness needs to access it so we make it global for +** test builds. +** +** Access to this variable is protected by SQLITE_MUTEX_STATIC_MASTER. +*/ +#ifdef SQLITE_TEST +SQLITE_PRIVATE BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; +#else +static BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; +#endif +#endif /* SQLITE_OMIT_SHARED_CACHE */ + +#ifdef SQLITE_OMIT_SHARED_CACHE + /* + ** The functions querySharedCacheTableLock(), setSharedCacheTableLock(), + ** and clearAllSharedCacheTableLocks() + ** manipulate entries in the BtShared.pLock linked list used to store + ** shared-cache table level locks. If the library is compiled with the + ** shared-cache feature disabled, then there is only ever one user + ** of each BtShared structure and so this locking is not necessary. + ** So define the lock related functions as no-ops. + */ + #define querySharedCacheTableLock(a,b,c) SQLITE_OK + #define setSharedCacheTableLock(a,b,c) SQLITE_OK + #define clearAllSharedCacheTableLocks(a) + #define downgradeAllSharedCacheTableLocks(a) + #define hasSharedCacheTableLock(a,b,c,d) 1 + #define hasReadConflicts(a, b) 0 +#endif + +#ifndef SQLITE_OMIT_SHARED_CACHE + +#ifdef SQLITE_DEBUG +/* +**** This function is only used as part of an assert() statement. *** +** +** Check to see if pBtree holds the required locks to read or write to the +** table with root page iRoot. Return 1 if it does and 0 if not. +** +** For example, when writing to a table with root-page iRoot via +** Btree connection pBtree: +** +** assert( hasSharedCacheTableLock(pBtree, iRoot, 0, WRITE_LOCK) ); +** +** When writing to an index that resides in a sharable database, the +** caller should have first obtained a lock specifying the root page of +** the corresponding table. This makes things a bit more complicated, +** as this module treats each table as a separate structure. To determine +** the table corresponding to the index being written, this +** function has to search through the database schema. +** +** Instead of a lock on the table/index rooted at page iRoot, the caller may +** hold a write-lock on the schema table (root page 1). This is also +** acceptable. +*/ +static int hasSharedCacheTableLock( + Btree *pBtree, /* Handle that must hold lock */ + Pgno iRoot, /* Root page of b-tree */ + int isIndex, /* True if iRoot is the root of an index b-tree */ + int eLockType /* Required lock type (READ_LOCK or WRITE_LOCK) */ +){ + Schema *pSchema = (Schema *)pBtree->pBt->pSchema; + Pgno iTab = 0; + BtLock *pLock; + + /* If this database is not shareable, or if the client is reading + ** and has the read-uncommitted flag set, then no lock is required. + ** Return true immediately. + */ + if( (pBtree->sharable==0) + || (eLockType==READ_LOCK && (pBtree->flags & SQLITE_ReadUncommitted)) + ){ + return 1; + } + + /* If the client is reading or writing an index and the schema is + ** not loaded, then it is too difficult to actually check to see if + ** the correct locks are held. So do not bother - just return true. + ** This case does not come up very often anyhow. + */ + if( isIndex && (!pSchema || (pSchema->schemaFlags&DB_SchemaLoaded)==0) ){ + return 1; + } + + /* Figure out the root-page that the lock should be held on. For table + ** b-trees, this is just the root page of the b-tree being read or + ** written. For index b-trees, it is the root page of the associated + ** table. */ + if( isIndex ){ + HashElem *p; + for(p=sqliteHashFirst(&pSchema->idxHash); p; p=sqliteHashNext(p)){ + Index *pIdx = (Index *)sqliteHashData(p); + if( pIdx->tnum==(int)iRoot ){ + if( iTab ){ + /* Two or more indexes share the same root page. There must + ** be imposter tables. So just return true. The assert is not + ** useful in that case. */ + return 1; + } + iTab = pIdx->pTable->tnum; + } + } + }else{ + iTab = iRoot; + } + + /* Search for the required lock. Either a write-lock on root-page iTab, a + ** write-lock on the schema table, or (if the client is reading) a + ** read-lock on iTab will suffice. Return 1 if any of these are found. */ + for(pLock=pBtree->pBt->pLock; pLock; pLock=pLock->pNext){ + if( pLock->pBtree==pBtree + && (pLock->iTable==iTab || (pLock->eLock==WRITE_LOCK && pLock->iTable==1)) + && pLock->eLock>=eLockType + ){ + return 1; + } + } + + /* Failed to find the required lock. */ + return 0; +} +#endif /* SQLITE_DEBUG */ + +#ifdef SQLITE_DEBUG +/* +**** This function may be used as part of assert() statements only. **** +** +** Return true if it would be illegal for pBtree to write into the +** table or index rooted at iRoot because other shared connections are +** simultaneously reading that same table or index. +** +** It is illegal for pBtree to write if some other Btree object that +** shares the same BtShared object is currently reading or writing +** the iRoot table. Except, if the other Btree object has the +** read-uncommitted flag set, then it is OK for the other object to +** have a read cursor. +** +** For example, before writing to any part of the table or index +** rooted at page iRoot, one should call: +** +** assert( !hasReadConflicts(pBtree, iRoot) ); +*/ +static int hasReadConflicts(Btree *pBtree, Pgno iRoot){ + BtCursor *p; + for(p=pBtree->pBt->pCursor; p; p=p->pNext){ + if( p->pgnoRoot==iRoot + && p->pBtree!=pBtree + && 0==(p->pBtree->flags & SQLITE_ReadUncommitted) + ){ + return 1; + } + } + return 0; +} +#endif /* #ifdef SQLITE_DEBUG */ + +/* +** Query to see if Btree handle p may obtain a lock of type eLock +** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return +** SQLITE_OK if the lock may be obtained (by calling +** setSharedCacheTableLock()), or SQLITE_LOCKED if not. +*/ +static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){ + BtShared *pBt = p->pBt; + BtLock *pIter; + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( eLock==READ_LOCK || eLock==WRITE_LOCK ); + assert( !(p->flags&SQLITE_ReadUncommitted)||eLock==WRITE_LOCK||iTab==1 ); + + /* If requesting a write-lock, then the Btree must have an open write + ** transaction on this file. And, obviously, for this to be so there + ** must be an open write transaction on the file itself. + */ + assert( eLock==READ_LOCK || (p==pBt->pWriter && p->inTrans==TRANS_WRITE) ); + assert( eLock==READ_LOCK || pBt->inTransaction==TRANS_WRITE ); + + /* This routine is a no-op if the shared-cache is not enabled */ + if( !p->sharable ){ + return SQLITE_OK; + } + + /* If some other connection is holding an exclusive lock, the + ** requested lock may not be obtained. + */ + if( pBt->pWriter!=p && (pBt->btsFlags & BTS_EXCLUSIVE)!=0 ){ + sqlite3ConnectionBlocked(p->db, pBt->pWriter->db); + return SQLITE_LOCKED_SHAREDCACHE; + } + + for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ + /* The condition (pIter->eLock!=eLock) in the following if(...) + ** statement is a simplification of: + ** + ** (eLock==WRITE_LOCK || pIter->eLock==WRITE_LOCK) + ** + ** since we know that if eLock==WRITE_LOCK, then no other connection + ** may hold a WRITE_LOCK on any table in this file (since there can + ** only be a single writer). + */ + assert( pIter->eLock==READ_LOCK || pIter->eLock==WRITE_LOCK ); + assert( eLock==READ_LOCK || pIter->pBtree==p || pIter->eLock==READ_LOCK); + if( pIter->pBtree!=p && pIter->iTable==iTab && pIter->eLock!=eLock ){ + sqlite3ConnectionBlocked(p->db, pIter->pBtree->db); + if( eLock==WRITE_LOCK ){ + assert( p==pBt->pWriter ); + pBt->btsFlags |= BTS_PENDING; + } + return SQLITE_LOCKED_SHAREDCACHE; + } + } + return SQLITE_OK; +} +#endif /* !SQLITE_OMIT_SHARED_CACHE */ + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** Add a lock on the table with root-page iTable to the shared-btree used +** by Btree handle p. Parameter eLock must be either READ_LOCK or +** WRITE_LOCK. +** +** This function assumes the following: +** +** (a) The specified Btree object p is connected to a sharable +** database (one with the BtShared.sharable flag set), and +** +** (b) No other Btree objects hold a lock that conflicts +** with the requested lock (i.e. querySharedCacheTableLock() has +** already been called and returned SQLITE_OK). +** +** SQLITE_OK is returned if the lock is added successfully. SQLITE_NOMEM +** is returned if a malloc attempt fails. +*/ +static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){ + BtShared *pBt = p->pBt; + BtLock *pLock = 0; + BtLock *pIter; + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( eLock==READ_LOCK || eLock==WRITE_LOCK ); + assert( p->db!=0 ); + + /* A connection with the read-uncommitted flag set will never try to + ** obtain a read-lock using this function. The only read-lock obtained + ** by a connection in read-uncommitted mode is on the sqlite_master + ** table, and that lock is obtained in BtreeBeginTrans(). */ + assert( 0==(p->db->flags&SQLITE_ReadUncommitted) || eLock==WRITE_LOCK ); + + /* This function should only be called on a sharable b-tree after it + ** has been determined that no other b-tree holds a conflicting lock. */ + assert( p->sharable ); + assert( SQLITE_OK==querySharedCacheTableLock(p, iTable, eLock) ); + + /* First search the list for an existing lock on this table. */ + for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ + if( pIter->iTable==iTable && pIter->pBtree==p ){ + pLock = pIter; + break; + } + } + + /* If the above search did not find a BtLock struct associating Btree p + ** with table iTable, allocate one and link it into the list. + */ + if( !pLock ){ + pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock)); + if( !pLock ){ + return SQLITE_NOMEM; + } + pLock->iTable = iTable; + pLock->pBtree = p; + pLock->pNext = pBt->pLock; + pBt->pLock = pLock; + } + + /* Set the BtLock.eLock variable to the maximum of the current lock + ** and the requested lock. This means if a write-lock was already held + ** and a read-lock requested, we don't incorrectly downgrade the lock. + */ + assert( WRITE_LOCK>READ_LOCK ); + if( eLock>pLock->eLock ){ + pLock->eLock = eLock; + } + + return SQLITE_OK; +} +#endif /* !SQLITE_OMIT_SHARED_CACHE */ + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** Release all the table locks (locks obtained via calls to +** the setSharedCacheTableLock() procedure) held by Btree object p. +** +** This function assumes that Btree p has an open read or write +** transaction. If it does not, then the BTS_PENDING flag +** may be incorrectly cleared. +*/ +static void clearAllSharedCacheTableLocks(Btree *p){ + BtShared *pBt = p->pBt; + BtLock **ppIter = &pBt->pLock; + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( p->sharable || 0==*ppIter ); + assert( p->inTrans>0 ); + + while( *ppIter ){ + BtLock *pLock = *ppIter; + assert( (pBt->btsFlags & BTS_EXCLUSIVE)==0 || pBt->pWriter==pLock->pBtree ); + assert( pLock->pBtree->inTrans>=pLock->eLock ); + if( pLock->pBtree==p ){ + *ppIter = pLock->pNext; + assert( pLock->iTable!=1 || pLock==&p->lock ); + if( pLock->iTable!=1 ){ + sqlite3_free(pLock); + } + }else{ + ppIter = &pLock->pNext; + } + } + + assert( (pBt->btsFlags & BTS_PENDING)==0 || pBt->pWriter ); + if( pBt->pWriter==p ){ + pBt->pWriter = 0; + pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING); + }else if( pBt->nTransaction==2 ){ + /* This function is called when Btree p is concluding its + ** transaction. If there currently exists a writer, and p is not + ** that writer, then the number of locks held by connections other + ** than the writer must be about to drop to zero. In this case + ** set the BTS_PENDING flag to 0. + ** + ** If there is not currently a writer, then BTS_PENDING must + ** be zero already. So this next line is harmless in that case. + */ + pBt->btsFlags &= ~BTS_PENDING; + } +} + +/* +** This function changes all write-locks held by Btree p into read-locks. +*/ +static void downgradeAllSharedCacheTableLocks(Btree *p){ + BtShared *pBt = p->pBt; + if( pBt->pWriter==p ){ + BtLock *pLock; + pBt->pWriter = 0; + pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING); + for(pLock=pBt->pLock; pLock; pLock=pLock->pNext){ + assert( pLock->eLock==READ_LOCK || pLock->pBtree==p ); + pLock->eLock = READ_LOCK; + } + } +} + +#endif /* SQLITE_OMIT_SHARED_CACHE */ + +static void releasePage(MemPage *pPage); /* Forward reference */ + +/* +***** This routine is used inside of assert() only **** +** +** Verify that the cursor holds the mutex on its BtShared +*/ +#ifdef SQLITE_DEBUG +static int cursorHoldsMutex(BtCursor *p){ + return sqlite3_mutex_held(p->pBt->mutex); +} +#endif + +/* +** Invalidate the overflow cache of the cursor passed as the first argument. +** on the shared btree structure pBt. +*/ +#define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl) + +/* +** Invalidate the overflow page-list cache for all cursors opened +** on the shared btree structure pBt. +*/ +static void invalidateAllOverflowCache(BtShared *pBt){ + BtCursor *p; + assert( sqlite3_mutex_held(pBt->mutex) ); + for(p=pBt->pCursor; p; p=p->pNext){ + invalidateOverflowCache(p); + } +} + +#ifndef SQLITE_OMIT_INCRBLOB +/* +** This function is called before modifying the contents of a table +** to invalidate any incrblob cursors that are open on the +** row or one of the rows being modified. +** +** If argument isClearTable is true, then the entire contents of the +** table is about to be deleted. In this case invalidate all incrblob +** cursors open on any row within the table with root-page pgnoRoot. +** +** Otherwise, if argument isClearTable is false, then the row with +** rowid iRow is being replaced or deleted. In this case invalidate +** only those incrblob cursors open on that specific row. +*/ +static void invalidateIncrblobCursors( + Btree *pBtree, /* The database file to check */ + i64 iRow, /* The rowid that might be changing */ + int isClearTable /* True if all rows are being deleted */ +){ + BtCursor *p; + if( pBtree->hasIncrblobCur==0 ) return; + assert( sqlite3BtreeHoldsMutex(pBtree) ); + pBtree->hasIncrblobCur = 0; + for(p=pBtree->pBt->pCursor; p; p=p->pNext){ + if( (p->curFlags & BTCF_Incrblob)!=0 ){ + pBtree->hasIncrblobCur = 1; + if( isClearTable || p->info.nKey==iRow ){ + p->eState = CURSOR_INVALID; + } + } + } +} + +#else + /* Stub function when INCRBLOB is omitted */ + #define invalidateIncrblobCursors(x,y,z) +#endif /* SQLITE_OMIT_INCRBLOB */ + +/* +** Set bit pgno of the BtShared.pHasContent bitvec. This is called +** when a page that previously contained data becomes a free-list leaf +** page. +** +** The BtShared.pHasContent bitvec exists to work around an obscure +** bug caused by the interaction of two useful IO optimizations surrounding +** free-list leaf pages: +** +** 1) When all data is deleted from a page and the page becomes +** a free-list leaf page, the page is not written to the database +** (as free-list leaf pages contain no meaningful data). Sometimes +** such a page is not even journalled (as it will not be modified, +** why bother journalling it?). +** +** 2) When a free-list leaf page is reused, its content is not read +** from the database or written to the journal file (why should it +** be, if it is not at all meaningful?). +** +** By themselves, these optimizations work fine and provide a handy +** performance boost to bulk delete or insert operations. However, if +** a page is moved to the free-list and then reused within the same +** transaction, a problem comes up. If the page is not journalled when +** it is moved to the free-list and it is also not journalled when it +** is extracted from the free-list and reused, then the original data +** may be lost. In the event of a rollback, it may not be possible +** to restore the database to its original configuration. +** +** The solution is the BtShared.pHasContent bitvec. Whenever a page is +** moved to become a free-list leaf page, the corresponding bit is +** set in the bitvec. Whenever a leaf page is extracted from the free-list, +** optimization 2 above is omitted if the corresponding bit is already +** set in BtShared.pHasContent. The contents of the bitvec are cleared +** at the end of every transaction. +*/ +static int btreeSetHasContent(BtShared *pBt, Pgno pgno){ + int rc = SQLITE_OK; + if( !pBt->pHasContent ){ + assert( pgno<=pBt->nPage ); + pBt->pHasContent = sqlite3BitvecCreate(pBt->nPage); + if( !pBt->pHasContent ){ + rc = SQLITE_NOMEM; + } + } + if( rc==SQLITE_OK && pgno<=sqlite3BitvecSize(pBt->pHasContent) ){ + rc = sqlite3BitvecSet(pBt->pHasContent, pgno); + } + return rc; +} + +/* +** Query the BtShared.pHasContent vector. +** +** This function is called when a free-list leaf page is removed from the +** free-list for reuse. It returns false if it is safe to retrieve the +** page from the pager layer with the 'no-content' flag set. True otherwise. +*/ +static int btreeGetHasContent(BtShared *pBt, Pgno pgno){ + Bitvec *p = pBt->pHasContent; + return (p && (pgno>sqlite3BitvecSize(p) || sqlite3BitvecTest(p, pgno))); +} + +/* +** Clear (destroy) the BtShared.pHasContent bitvec. This should be +** invoked at the conclusion of each write-transaction. +*/ +static void btreeClearHasContent(BtShared *pBt){ + sqlite3BitvecDestroy(pBt->pHasContent); + pBt->pHasContent = 0; +} + +/* +** Release all of the apPage[] pages for a cursor. +*/ +static void btreeReleaseAllCursorPages(BtCursor *pCur){ + int i; + for(i=0; i<=pCur->iPage; i++){ + releasePage(pCur->apPage[i]); + pCur->apPage[i] = 0; + } + pCur->iPage = -1; +} + +/* +** The cursor passed as the only argument must point to a valid entry +** when this function is called (i.e. have eState==CURSOR_VALID). This +** function saves the current cursor key in variables pCur->nKey and +** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error +** code otherwise. +** +** If the cursor is open on an intkey table, then the integer key +** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to +** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is +** set to point to a malloced buffer pCur->nKey bytes in size containing +** the key. +*/ +static int saveCursorKey(BtCursor *pCur){ + int rc; + assert( CURSOR_VALID==pCur->eState ); + assert( 0==pCur->pKey ); + assert( cursorHoldsMutex(pCur) ); + + rc = sqlite3BtreeKeySize(pCur, &pCur->nKey); + assert( rc==SQLITE_OK ); /* KeySize() cannot fail */ + + /* If this is an intKey table, then the above call to BtreeKeySize() + ** stores the integer key in pCur->nKey. In this case this value is + ** all that is required. Otherwise, if pCur is not open on an intKey + ** table, then malloc space for and store the pCur->nKey bytes of key + ** data. */ + if( 0==pCur->curIntKey ){ + void *pKey = sqlite3Malloc( pCur->nKey ); + if( pKey ){ + rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); + if( rc==SQLITE_OK ){ + pCur->pKey = pKey; + }else{ + sqlite3_free(pKey); + } + }else{ + rc = SQLITE_NOMEM; + } + } + assert( !pCur->curIntKey || !pCur->pKey ); + return rc; +} + +/* +** Save the current cursor position in the variables BtCursor.nKey +** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK. +** +** The caller must ensure that the cursor is valid (has eState==CURSOR_VALID) +** prior to calling this routine. +*/ +static int saveCursorPosition(BtCursor *pCur){ + int rc; + + assert( CURSOR_VALID==pCur->eState || CURSOR_SKIPNEXT==pCur->eState ); + assert( 0==pCur->pKey ); + assert( cursorHoldsMutex(pCur) ); + + if( pCur->eState==CURSOR_SKIPNEXT ){ + pCur->eState = CURSOR_VALID; + }else{ + pCur->skipNext = 0; + } + + rc = saveCursorKey(pCur); + if( rc==SQLITE_OK ){ + btreeReleaseAllCursorPages(pCur); + pCur->eState = CURSOR_REQUIRESEEK; + } + + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl|BTCF_AtLast); + return rc; +} + +/* Forward reference */ +static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*); + +/* +** Save the positions of all cursors (except pExcept) that are open on +** the table with root-page iRoot. "Saving the cursor position" means that +** the location in the btree is remembered in such a way that it can be +** moved back to the same spot after the btree has been modified. This +** routine is called just before cursor pExcept is used to modify the +** table, for example in BtreeDelete() or BtreeInsert(). +** +** If there are two or more cursors on the same btree, then all such +** cursors should have their BTCF_Multiple flag set. The btreeCursor() +** routine enforces that rule. This routine only needs to be called in +** the uncommon case when pExpect has the BTCF_Multiple flag set. +** +** If pExpect!=NULL and if no other cursors are found on the same root-page, +** then the BTCF_Multiple flag on pExpect is cleared, to avoid another +** pointless call to this routine. +** +** Implementation note: This routine merely checks to see if any cursors +** need to be saved. It calls out to saveCursorsOnList() in the (unusual) +** event that cursors are in need to being saved. +*/ +static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ + BtCursor *p; + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( pExcept==0 || pExcept->pBt==pBt ); + for(p=pBt->pCursor; p; p=p->pNext){ + if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break; + } + if( p ) return saveCursorsOnList(p, iRoot, pExcept); + if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple; + return SQLITE_OK; +} + +/* This helper routine to saveAllCursors does the actual work of saving +** the cursors if and when a cursor is found that actually requires saving. +** The common case is that no cursors need to be saved, so this routine is +** broken out from its caller to avoid unnecessary stack pointer movement. +*/ +static int SQLITE_NOINLINE saveCursorsOnList( + BtCursor *p, /* The first cursor that needs saving */ + Pgno iRoot, /* Only save cursor with this iRoot. Save all if zero */ + BtCursor *pExcept /* Do not save this cursor */ +){ + do{ + if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){ + if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){ + int rc = saveCursorPosition(p); + if( SQLITE_OK!=rc ){ + return rc; + } + }else{ + testcase( p->iPage>0 ); + btreeReleaseAllCursorPages(p); + } + } + p = p->pNext; + }while( p ); + return SQLITE_OK; +} + +/* +** Clear the current cursor position. +*/ +SQLITE_API void SQLITE_STDCALL sqlite3BtreeClearCursor(BtCursor *pCur){ + assert( cursorHoldsMutex(pCur) ); + sqlite3_free(pCur->pKey); + pCur->pKey = 0; + pCur->eState = CURSOR_INVALID; +} + +/* +** In this version of BtreeMoveto, pKey is a packed index record +** such as is generated by the OP_MakeRecord opcode. Unpack the +** record and then call BtreeMovetoUnpacked() to do the work. +*/ +static int btreeMoveto( + BtCursor *pCur, /* Cursor open on the btree to be searched */ + const void *pKey, /* Packed key if the btree is an index */ + i64 nKey, /* Integer key for tables. Size of pKey for indices */ + int bias, /* Bias search to the high end */ + int *pRes /* Write search results here */ +){ + int rc; /* Status code */ + UnpackedRecord *pIdxKey; /* Unpacked index key */ + char aSpace[200]; /* Temp space for pIdxKey - to avoid a malloc */ + char *pFree = 0; + + if( pKey ){ + assert( nKey==(i64)(int)nKey ); + pIdxKey = sqlite3VdbeAllocUnpackedRecord( + pCur->pKeyInfo, aSpace, sizeof(aSpace), &pFree + ); + if( pIdxKey==0 ) return SQLITE_NOMEM; + sqlite3VdbeRecordUnpack(pCur->pKeyInfo, (int)nKey, pKey, pIdxKey); + if( pIdxKey->nField==0 ){ + sqlite3DbFree(pCur->pKeyInfo->pBtree, pFree); + return SQLITE_CORRUPT_BKPT; + } + }else{ + pIdxKey = 0; + } + rc = sqlite3BtreeMovetoUnpacked(pCur, pIdxKey, nKey, bias, pRes); + if( pFree ){ + sqlite3DbFree(pCur->pKeyInfo->pBtree, pFree); + } + return rc; +} + +/* +** Restore the cursor to the position it was in (or as close to as possible) +** when saveCursorPosition() was called. Note that this call deletes the +** saved position info stored by saveCursorPosition(), so there can be +** at most one effective restoreCursorPosition() call after each +** saveCursorPosition(). +*/ +static int btreeRestoreCursorPosition(BtCursor *pCur){ + int rc; + int skipNext; + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState>=CURSOR_REQUIRESEEK ); + if( pCur->eState==CURSOR_FAULT ){ + return pCur->skipNext; + } + pCur->eState = CURSOR_INVALID; + rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &skipNext); + if( rc==SQLITE_OK ){ + sqlite3_free(pCur->pKey); + pCur->pKey = 0; + assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); + pCur->skipNext |= skipNext; + if( pCur->skipNext && pCur->eState==CURSOR_VALID ){ + pCur->eState = CURSOR_SKIPNEXT; + } + } + return rc; +} + +#define restoreCursorPosition(p) \ + (p->eState>=CURSOR_REQUIRESEEK ? \ + btreeRestoreCursorPosition(p) : \ + SQLITE_OK) + +/* +** Determine whether or not a cursor has moved from the position where +** it was last placed, or has been invalidated for any other reason. +** Cursors can move when the row they are pointing at is deleted out +** from under them, for example. Cursor might also move if a btree +** is rebalanced. +** +** Calling this routine with a NULL cursor pointer returns false. +** +** Use the separate sqlite3BtreeCursorRestore() routine to restore a cursor +** back to where it ought to be if this routine returns true. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursorHasMoved(BtCursor *pCur){ + return pCur->eState!=CURSOR_VALID; +} + +/* +** This routine restores a cursor back to its original position after it +** has been moved by some outside activity (such as a btree rebalance or +** a row having been deleted out from under the cursor). +** +** On success, the *pDifferentRow parameter is false if the cursor is left +** pointing at exactly the same row. *pDifferntRow is the row the cursor +** was pointing to has been deleted, forcing the cursor to point to some +** nearby row. +** +** This routine should only be called for a cursor that just returned +** TRUE from sqlite3BtreeCursorHasMoved(). +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){ + int rc; + + assert( pCur!=0 ); + assert( pCur->eState!=CURSOR_VALID ); + rc = restoreCursorPosition(pCur); + if( rc ){ + *pDifferentRow = 1; + return rc; + } + if( pCur->eState!=CURSOR_VALID ){ + *pDifferentRow = 1; + }else{ + assert( pCur->skipNext==0 ); + *pDifferentRow = 0; + } + return SQLITE_OK; +} + +#ifndef SQLITE_OMIT_AUTOVACUUM +/* +** Given a page number of a regular database page, return the page +** number for the pointer-map page that contains the entry for the +** input page number. +** +** Return 0 (not a valid page) for pgno==1 since there is +** no pointer map associated with page 1. The integrity_check logic +** requires that ptrmapPageno(*,1)!=1. +*/ +static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){ + int nPagesPerMapPage; + Pgno iPtrMap, ret; + assert( sqlite3_mutex_held(pBt->mutex) ); + if( pgno<2 ) return 0; + nPagesPerMapPage = (pBt->usableSize/5)+1; + iPtrMap = (pgno-2)/nPagesPerMapPage; + ret = (iPtrMap*nPagesPerMapPage) + 2; + if( ret==PENDING_BYTE_PAGE(pBt) ){ + ret++; + } + return ret; +} + +/* +** Write an entry into the pointer map. +** +** This routine updates the pointer map entry for page number 'key' +** so that it maps to type 'eType' and parent page number 'pgno'. +** +** If *pRC is initially non-zero (non-SQLITE_OK) then this routine is +** a no-op. If an error occurs, the appropriate error code is written +** into *pRC. +*/ +static void ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent, int *pRC){ + DbPage *pDbPage; /* The pointer map page */ + u8 *pPtrmap; /* The pointer map data */ + Pgno iPtrmap; /* The pointer map page number */ + int offset; /* Offset in pointer map page */ + int rc; /* Return code from subfunctions */ + + if( *pRC ) return; + + assert( sqlite3_mutex_held(pBt->mutex) ); + /* The master-journal page number must never be used as a pointer map page */ + assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) ); + + assert( pBt->autoVacuum ); + if( key==0 ){ + *pRC = SQLITE_CORRUPT_BKPT; + return; + } + iPtrmap = PTRMAP_PAGENO(pBt, key); + rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage); + if( rc!=SQLITE_OK ){ + *pRC = rc; + return; + } + offset = PTRMAP_PTROFFSET(iPtrmap, key); + if( offset<0 ){ + *pRC = SQLITE_CORRUPT_BKPT; + goto ptrmap_exit; + } + assert( offset <= (int)pBt->usableSize-5 ); + pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); + + if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){ + TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent)); + *pRC= rc = sqlite3PagerWrite(pDbPage); + if( rc==SQLITE_OK ){ + pPtrmap[offset] = eType; + put4byte(&pPtrmap[offset+1], parent); + } + } + +ptrmap_exit: + sqlite3PagerUnref(pDbPage); +} + +/* +** Read an entry from the pointer map. +** +** This routine retrieves the pointer map entry for page 'key', writing +** the type and parent page number to *pEType and *pPgno respectively. +** An error code is returned if something goes wrong, otherwise SQLITE_OK. +*/ +static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ + DbPage *pDbPage; /* The pointer map page */ + int iPtrmap; /* Pointer map page index */ + u8 *pPtrmap; /* Pointer map page data */ + int offset; /* Offset of entry in pointer map */ + int rc; + + assert( sqlite3_mutex_held(pBt->mutex) ); + + iPtrmap = PTRMAP_PAGENO(pBt, key); + rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage); + if( rc!=0 ){ + return rc; + } + pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); + + offset = PTRMAP_PTROFFSET(iPtrmap, key); + if( offset<0 ){ + sqlite3PagerUnref(pDbPage); + return SQLITE_CORRUPT_BKPT; + } + assert( offset <= (int)pBt->usableSize-5 ); + assert( pEType!=0 ); + *pEType = pPtrmap[offset]; + if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]); + + sqlite3PagerUnref(pDbPage); + if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT; + return SQLITE_OK; +} + +#else /* if defined SQLITE_OMIT_AUTOVACUUM */ + #define ptrmapPut(w,x,y,z,rc) + #define ptrmapGet(w,x,y,z) SQLITE_OK + #define ptrmapPutOvflPtr(x, y, rc) +#endif + +/* +** Given a btree page and a cell index (0 means the first cell on +** the page, 1 means the second cell, and so forth) return a pointer +** to the cell content. +** +** findCellPastPtr() does the same except it skips past the initial +** 4-byte child pointer found on interior pages, if there is one. +** +** This routine works only for pages that do not contain overflow cells. +*/ +#define findCell(P,I) \ + ((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) +#define findCellPastPtr(P,I) \ + ((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) + + +/* +** This is common tail processing for btreeParseCellPtr() and +** btreeParseCellPtrIndex() for the case when the cell does not fit entirely +** on a single B-tree page. Make necessary adjustments to the CellInfo +** structure. +*/ +static SQLITE_NOINLINE void btreeParseCellAdjustSizeForOverflow( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + /* If the payload will not fit completely on the local page, we have + ** to decide how much to store locally and how much to spill onto + ** overflow pages. The strategy is to minimize the amount of unused + ** space on overflow pages while keeping the amount of local storage + ** in between minLocal and maxLocal. + ** + ** Warning: changing the way overflow payload is distributed in any + ** way will result in an incompatible file format. + */ + int minLocal; /* Minimum amount of payload held locally */ + int maxLocal; /* Maximum amount of payload held locally */ + int surplus; /* Overflow payload available for local storage */ + + minLocal = pPage->minLocal; + maxLocal = pPage->maxLocal; + surplus = minLocal + (pInfo->nPayload - minLocal)%(pPage->pBt->usableSize-4); + testcase( surplus==maxLocal ); + testcase( surplus==maxLocal+1 ); + if( surplus <= maxLocal ){ + pInfo->nLocal = (u16)surplus; + }else{ + pInfo->nLocal = (u16)minLocal; + } + pInfo->iOverflow = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell); + pInfo->nSize = pInfo->iOverflow + 4; +} + +/* +** The following routines are implementations of the MemPage.xParseCell() +** method. +** +** Parse a cell content block and fill in the CellInfo structure. +** +** btreeParseCellPtr() => table btree leaf nodes +** btreeParseCellNoPayload() => table btree internal nodes +** btreeParseCellPtrIndex() => index btree nodes +** +** There is also a wrapper function btreeParseCell() that works for +** all MemPage types and that references the cell by index rather than +** by pointer. +*/ +static void btreeParseCellPtrNoPayload( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->leaf==0 ); + assert( pPage->noPayload ); + assert( pPage->childPtrSize==4 ); +#ifndef SQLITE_DEBUG + UNUSED_PARAMETER(pPage); +#endif + pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); + pInfo->nPayload = 0; + pInfo->nLocal = 0; + pInfo->iOverflow = 0; + pInfo->pPayload = 0; + return; +} +static void btreeParseCellPtr( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + u8 *pIter; /* For scanning through pCell */ + u32 nPayload; /* Number of bytes of cell payload */ + u64 iKey; /* Extracted Key value */ + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->leaf==0 || pPage->leaf==1 ); + assert( pPage->intKeyLeaf || pPage->noPayload ); + assert( pPage->noPayload==0 ); + assert( pPage->intKeyLeaf ); + assert( pPage->childPtrSize==0 ); + pIter = pCell; + + /* The next block of code is equivalent to: + ** + ** pIter += getVarint32(pIter, nPayload); + ** + ** The code is inlined to avoid a function call. + */ + nPayload = *pIter; + if( nPayload>=0x80 ){ + u8 *pEnd = &pIter[8]; + nPayload &= 0x7f; + do{ + nPayload = (nPayload<<7) | (*++pIter & 0x7f); + }while( (*pIter)>=0x80 && pIter<pEnd ); + } + pIter++; + + /* The next block of code is equivalent to: + ** + ** pIter += getVarint(pIter, (u64*)&pInfo->nKey); + ** + ** The code is inlined to avoid a function call. + */ + iKey = *pIter; + if( iKey>=0x80 ){ + u8 *pEnd = &pIter[7]; + iKey &= 0x7f; + while(1){ + iKey = (iKey<<7) | (*++pIter & 0x7f); + if( (*pIter)<0x80 ) break; + if( pIter>=pEnd ){ + iKey = (iKey<<8) | *++pIter; + break; + } + } + } + pIter++; + + pInfo->nKey = *(i64*)&iKey; + pInfo->nPayload = nPayload; + pInfo->pPayload = pIter; + testcase( nPayload==pPage->maxLocal ); + testcase( nPayload==pPage->maxLocal+1 ); + if( nPayload<=pPage->maxLocal ){ + /* This is the (easy) common case where the entire payload fits + ** on the local page. No overflow is required. + */ + pInfo->nSize = nPayload + (u16)(pIter - pCell); + if( pInfo->nSize<4 ) pInfo->nSize = 4; + pInfo->nLocal = (u16)nPayload; + pInfo->iOverflow = 0; + }else{ + btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); + } +} +static void btreeParseCellPtrIndex( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + u8 *pIter; /* For scanning through pCell */ + u32 nPayload; /* Number of bytes of cell payload */ + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->leaf==0 || pPage->leaf==1 ); + assert( pPage->intKeyLeaf==0 ); + assert( pPage->noPayload==0 ); + pIter = pCell + pPage->childPtrSize; + nPayload = *pIter; + if( nPayload>=0x80 ){ + u8 *pEnd = &pIter[8]; + nPayload &= 0x7f; + do{ + nPayload = (nPayload<<7) | (*++pIter & 0x7f); + }while( *(pIter)>=0x80 && pIter<pEnd ); + } + pIter++; + pInfo->nKey = nPayload; + pInfo->nPayload = nPayload; + pInfo->pPayload = pIter; + testcase( nPayload==pPage->maxLocal ); + testcase( nPayload==pPage->maxLocal+1 ); + if( nPayload<=pPage->maxLocal ){ + /* This is the (easy) common case where the entire payload fits + ** on the local page. No overflow is required. + */ + pInfo->nSize = nPayload + (u16)(pIter - pCell); + if( pInfo->nSize<4 ) pInfo->nSize = 4; + pInfo->nLocal = (u16)nPayload; + pInfo->iOverflow = 0; + }else{ + btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); + } +} +static void btreeParseCell( + MemPage *pPage, /* Page containing the cell */ + int iCell, /* The cell index. First cell is 0 */ + CellInfo *pInfo /* Fill in this structure */ +){ + pPage->xParseCell(pPage, findCell(pPage, iCell), pInfo); +} + +/* +** The following routines are implementations of the MemPage.xCellSize +** method. +** +** Compute the total number of bytes that a Cell needs in the cell +** data area of the btree-page. The return number includes the cell +** data header and the local payload, but not any overflow page or +** the space used by the cell pointer. +** +** cellSizePtrNoPayload() => table internal nodes +** cellSizePtr() => all index nodes & table leaf nodes +*/ +static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ + u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */ + u8 *pEnd; /* End mark for a varint */ + u32 nSize; /* Size value to return */ + +#ifdef SQLITE_DEBUG + /* The value returned by this function should always be the same as + ** the (CellInfo.nSize) value found by doing a full parse of the + ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of + ** this function verifies that this invariant is not violated. */ + CellInfo debuginfo; + pPage->xParseCell(pPage, pCell, &debuginfo); +#endif + + assert( pPage->noPayload==0 ); + nSize = *pIter; + if( nSize>=0x80 ){ + pEnd = &pIter[8]; + nSize &= 0x7f; + do{ + nSize = (nSize<<7) | (*++pIter & 0x7f); + }while( *(pIter)>=0x80 && pIter<pEnd ); + } + pIter++; + if( pPage->intKey ){ + /* pIter now points at the 64-bit integer key value, a variable length + ** integer. The following block moves pIter to point at the first byte + ** past the end of the key value. */ + pEnd = &pIter[9]; + while( (*pIter++)&0x80 && pIter<pEnd ); + } + testcase( nSize==pPage->maxLocal ); + testcase( nSize==pPage->maxLocal+1 ); + if( nSize<=pPage->maxLocal ){ + nSize += (u32)(pIter - pCell); + if( nSize<4 ) nSize = 4; + }else{ + int minLocal = pPage->minLocal; + nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4); + testcase( nSize==pPage->maxLocal ); + testcase( nSize==pPage->maxLocal+1 ); + if( nSize>pPage->maxLocal ){ + nSize = minLocal; + } + nSize += 4 + (u16)(pIter - pCell); + } + assert( nSize==debuginfo.nSize || CORRUPT_DB ); + return (u16)nSize; +} +static u16 cellSizePtrNoPayload(MemPage *pPage, u8 *pCell){ + u8 *pIter = pCell + 4; /* For looping over bytes of pCell */ + u8 *pEnd; /* End mark for a varint */ + +#ifdef SQLITE_DEBUG + /* The value returned by this function should always be the same as + ** the (CellInfo.nSize) value found by doing a full parse of the + ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of + ** this function verifies that this invariant is not violated. */ + CellInfo debuginfo; + pPage->xParseCell(pPage, pCell, &debuginfo); +#else + UNUSED_PARAMETER(pPage); +#endif + + assert( pPage->childPtrSize==4 ); + pEnd = pIter + 9; + while( (*pIter++)&0x80 && pIter<pEnd ); + assert( debuginfo.nSize==(u16)(pIter - pCell) || CORRUPT_DB ); + return (u16)(pIter - pCell); +} + + +#ifdef SQLITE_DEBUG +/* This variation on cellSizePtr() is used inside of assert() statements +** only. */ +static u16 cellSize(MemPage *pPage, int iCell){ + return pPage->xCellSize(pPage, findCell(pPage, iCell)); +} +#endif + +#ifndef SQLITE_OMIT_AUTOVACUUM +/* +** If the cell pCell, part of page pPage contains a pointer +** to an overflow page, insert an entry into the pointer-map +** for the overflow page. +*/ +static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){ + CellInfo info; + if( *pRC ) return; + assert( pCell!=0 ); + pPage->xParseCell(pPage, pCell, &info); + if( info.iOverflow ){ + Pgno ovfl = get4byte(&pCell[info.iOverflow]); + ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC); + } +} +#endif + + +/* +** Defragment the page given. All Cells are moved to the +** end of the page and all free space is collected into one +** big FreeBlk that occurs in between the header and cell +** pointer array and the cell content area. +** +** EVIDENCE-OF: R-44582-60138 SQLite may from time to time reorganize a +** b-tree page so that there are no freeblocks or fragment bytes, all +** unused bytes are contained in the unallocated space region, and all +** cells are packed tightly at the end of the page. +*/ +static int defragmentPage(MemPage *pPage){ + int i; /* Loop counter */ + int pc; /* Address of the i-th cell */ + int hdr; /* Offset to the page header */ + int size; /* Size of a cell */ + int usableSize; /* Number of usable bytes on a page */ + int cellOffset; /* Offset to the cell pointer array */ + int cbrk; /* Offset to the cell content area */ + int nCell; /* Number of cells on the page */ + unsigned char *data; /* The page data */ + unsigned char *temp; /* Temp area for cell content */ + unsigned char *src; /* Source of content */ + int iCellFirst; /* First allowable cell index */ + int iCellLast; /* Last possible cell index */ + + + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + assert( pPage->pBt!=0 ); + assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE ); + assert( pPage->nOverflow==0 ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + temp = 0; + src = data = pPage->aData; + hdr = pPage->hdrOffset; + cellOffset = pPage->cellOffset; + nCell = pPage->nCell; + assert( nCell==get2byte(&data[hdr+3]) ); + usableSize = pPage->pBt->usableSize; + cbrk = usableSize; + iCellFirst = cellOffset + 2*nCell; + iCellLast = usableSize - 4; + for(i=0; i<nCell; i++){ + u8 *pAddr; /* The i-th cell pointer */ + pAddr = &data[cellOffset + i*2]; + pc = get2byte(pAddr); + testcase( pc==iCellFirst ); + testcase( pc==iCellLast ); + /* These conditions have already been verified in btreeInitPage() + ** if PRAGMA cell_size_check=ON. + */ + if( pc<iCellFirst || pc>iCellLast ){ + return SQLITE_CORRUPT_BKPT; + } + assert( pc>=iCellFirst && pc<=iCellLast ); + size = pPage->xCellSize(pPage, &src[pc]); + cbrk -= size; + if( cbrk<iCellFirst || pc+size>usableSize ){ + return SQLITE_CORRUPT_BKPT; + } + assert( cbrk+size<=usableSize && cbrk>=iCellFirst ); + testcase( cbrk+size==usableSize ); + testcase( pc+size==usableSize ); + put2byte(pAddr, cbrk); + if( temp==0 ){ + int x; + if( cbrk==pc ) continue; + temp = sqlite3PagerTempSpace(pPage->pBt->pPager); + x = get2byte(&data[hdr+5]); + memcpy(&temp[x], &data[x], (cbrk+size) - x); + src = temp; + } + memcpy(&data[cbrk], &src[pc], size); + } + assert( cbrk>=iCellFirst ); + put2byte(&data[hdr+5], cbrk); + data[hdr+1] = 0; + data[hdr+2] = 0; + data[hdr+7] = 0; + memset(&data[iCellFirst], 0, cbrk-iCellFirst); + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + if( cbrk-iCellFirst!=pPage->nFree ){ + return SQLITE_CORRUPT_BKPT; + } + return SQLITE_OK; +} + +/* +** Search the free-list on page pPg for space to store a cell nByte bytes in +** size. If one can be found, return a pointer to the space and remove it +** from the free-list. +** +** If no suitable space can be found on the free-list, return NULL. +** +** This function may detect corruption within pPg. If corruption is +** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned. +** +** Slots on the free list that are between 1 and 3 bytes larger than nByte +** will be ignored if adding the extra space to the fragmentation count +** causes the fragmentation count to exceed 60. +*/ +static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){ + const int hdr = pPg->hdrOffset; + u8 * const aData = pPg->aData; + int iAddr = hdr + 1; + int pc = get2byte(&aData[iAddr]); + int x; + int usableSize = pPg->pBt->usableSize; + + assert( pc>0 ); + do{ + int size; /* Size of the free slot */ + /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of + ** increasing offset. */ + if( pc>usableSize-4 || pc<iAddr+4 ){ + *pRc = SQLITE_CORRUPT_BKPT; + return 0; + } + /* EVIDENCE-OF: R-22710-53328 The third and fourth bytes of each + ** freeblock form a big-endian integer which is the size of the freeblock + ** in bytes, including the 4-byte header. */ + size = get2byte(&aData[pc+2]); + if( (x = size - nByte)>=0 ){ + testcase( x==4 ); + testcase( x==3 ); + if( pc < pPg->cellOffset+2*pPg->nCell || size+pc > usableSize ){ + *pRc = SQLITE_CORRUPT_BKPT; + return 0; + }else if( x<4 ){ + /* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total + ** number of bytes in fragments may not exceed 60. */ + if( aData[hdr+7]>57 ) return 0; + + /* Remove the slot from the free-list. Update the number of + ** fragmented bytes within the page. */ + memcpy(&aData[iAddr], &aData[pc], 2); + aData[hdr+7] += (u8)x; + }else{ + /* The slot remains on the free-list. Reduce its size to account + ** for the portion used by the new allocation. */ + put2byte(&aData[pc+2], x); + } + return &aData[pc + x]; + } + iAddr = pc; + pc = get2byte(&aData[pc]); + }while( pc ); + + return 0; +} + +/* +** Allocate nByte bytes of space from within the B-Tree page passed +** as the first argument. Write into *pIdx the index into pPage->aData[] +** of the first byte of allocated space. Return either SQLITE_OK or +** an error code (usually SQLITE_CORRUPT). +** +** The caller guarantees that there is sufficient space to make the +** allocation. This routine might need to defragment in order to bring +** all the space together, however. This routine will avoid using +** the first two bytes past the cell pointer area since presumably this +** allocation is being made in order to insert a new cell, so we will +** also end up needing a new cell pointer. +*/ +static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ + const int hdr = pPage->hdrOffset; /* Local cache of pPage->hdrOffset */ + u8 * const data = pPage->aData; /* Local cache of pPage->aData */ + int top; /* First byte of cell content area */ + int rc = SQLITE_OK; /* Integer return code */ + int gap; /* First byte of gap between cell pointers and cell content */ + + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + assert( pPage->pBt ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( nByte>=0 ); /* Minimum cell size is 4 */ + assert( pPage->nFree>=nByte ); + assert( pPage->nOverflow==0 ); + assert( nByte < (int)(pPage->pBt->usableSize-8) ); + + assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf ); + gap = pPage->cellOffset + 2*pPage->nCell; + assert( gap<=65536 ); + /* EVIDENCE-OF: R-29356-02391 If the database uses a 65536-byte page size + ** and the reserved space is zero (the usual value for reserved space) + ** then the cell content offset of an empty page wants to be 65536. + ** However, that integer is too large to be stored in a 2-byte unsigned + ** integer, so a value of 0 is used in its place. */ + top = get2byte(&data[hdr+5]); + assert( top<=(int)pPage->pBt->usableSize ); /* Prevent by getAndInitPage() */ + if( gap>top ){ + if( top==0 && pPage->pBt->usableSize==65536 ){ + top = 65536; + }else{ + return SQLITE_CORRUPT_BKPT; + } + } + + /* If there is enough space between gap and top for one more cell pointer + ** array entry offset, and if the freelist is not empty, then search the + ** freelist looking for a free slot big enough to satisfy the request. + */ + testcase( gap+2==top ); + testcase( gap+1==top ); + testcase( gap==top ); + if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){ + u8 *pSpace = pageFindSlot(pPage, nByte, &rc); + if( pSpace ){ + assert( pSpace>=data && (pSpace - data)<65536 ); + *pIdx = (int)(pSpace - data); + return SQLITE_OK; + }else if( rc ){ + return rc; + } + } + + /* The request could not be fulfilled using a freelist slot. Check + ** to see if defragmentation is necessary. + */ + testcase( gap+2+nByte==top ); + if( gap+2+nByte>top ){ + assert( pPage->nCell>0 || CORRUPT_DB ); + rc = defragmentPage(pPage); + if( rc ) return rc; + top = get2byteNotZero(&data[hdr+5]); + assert( gap+nByte<=top ); + } + + + /* Allocate memory from the gap in between the cell pointer array + ** and the cell content area. The btreeInitPage() call has already + ** validated the freelist. Given that the freelist is valid, there + ** is no way that the allocation can extend off the end of the page. + ** The assert() below verifies the previous sentence. + */ + top -= nByte; + put2byte(&data[hdr+5], top); + assert( top+nByte <= (int)pPage->pBt->usableSize ); + *pIdx = top; + return SQLITE_OK; +} + +/* +** Return a section of the pPage->aData to the freelist. +** The first byte of the new free block is pPage->aData[iStart] +** and the size of the block is iSize bytes. +** +** Adjacent freeblocks are coalesced. +** +** Note that even though the freeblock list was checked by btreeInitPage(), +** that routine will not detect overlap between cells or freeblocks. Nor +** does it detect cells or freeblocks that encrouch into the reserved bytes +** at the end of the page. So do additional corruption checks inside this +** routine and return SQLITE_CORRUPT if any problems are found. +*/ +static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ + u16 iPtr; /* Address of ptr to next freeblock */ + u16 iFreeBlk; /* Address of the next freeblock */ + u8 hdr; /* Page header size. 0 or 100 */ + u8 nFrag = 0; /* Reduction in fragmentation */ + u16 iOrigSize = iSize; /* Original value of iSize */ + u32 iLast = pPage->pBt->usableSize-4; /* Largest possible freeblock offset */ + u32 iEnd = iStart + iSize; /* First byte past the iStart buffer */ + unsigned char *data = pPage->aData; /* Page content */ + + assert( pPage->pBt!=0 ); + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + assert( CORRUPT_DB || iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); + assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( iSize>=4 ); /* Minimum cell size is 4 */ + assert( iStart<=iLast ); + + /* Overwrite deleted information with zeros when the secure_delete + ** option is enabled */ + if( pPage->pBt->btsFlags & BTS_SECURE_DELETE ){ + memset(&data[iStart], 0, iSize); + } + + /* The list of freeblocks must be in ascending order. Find the + ** spot on the list where iStart should be inserted. + */ + hdr = pPage->hdrOffset; + iPtr = hdr + 1; + if( data[iPtr+1]==0 && data[iPtr]==0 ){ + iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */ + }else{ + while( (iFreeBlk = get2byte(&data[iPtr]))>0 && iFreeBlk<iStart ){ + if( iFreeBlk<iPtr+4 ) return SQLITE_CORRUPT_BKPT; + iPtr = iFreeBlk; + } + if( iFreeBlk>iLast ) return SQLITE_CORRUPT_BKPT; + assert( iFreeBlk>iPtr || iFreeBlk==0 ); + + /* At this point: + ** iFreeBlk: First freeblock after iStart, or zero if none + ** iPtr: The address of a pointer to iFreeBlk + ** + ** Check to see if iFreeBlk should be coalesced onto the end of iStart. + */ + if( iFreeBlk && iEnd+3>=iFreeBlk ){ + nFrag = iFreeBlk - iEnd; + if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_BKPT; + iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]); + if( iEnd > pPage->pBt->usableSize ) return SQLITE_CORRUPT_BKPT; + iSize = iEnd - iStart; + iFreeBlk = get2byte(&data[iFreeBlk]); + } + + /* If iPtr is another freeblock (that is, if iPtr is not the freelist + ** pointer in the page header) then check to see if iStart should be + ** coalesced onto the end of iPtr. + */ + if( iPtr>hdr+1 ){ + int iPtrEnd = iPtr + get2byte(&data[iPtr+2]); + if( iPtrEnd+3>=iStart ){ + if( iPtrEnd>iStart ) return SQLITE_CORRUPT_BKPT; + nFrag += iStart - iPtrEnd; + iSize = iEnd - iPtr; + iStart = iPtr; + } + } + if( nFrag>data[hdr+7] ) return SQLITE_CORRUPT_BKPT; + data[hdr+7] -= nFrag; + } + if( iStart==get2byte(&data[hdr+5]) ){ + /* The new freeblock is at the beginning of the cell content area, + ** so just extend the cell content area rather than create another + ** freelist entry */ + if( iPtr!=hdr+1 ) return SQLITE_CORRUPT_BKPT; + put2byte(&data[hdr+1], iFreeBlk); + put2byte(&data[hdr+5], iEnd); + }else{ + /* Insert the new freeblock into the freelist */ + put2byte(&data[iPtr], iStart); + put2byte(&data[iStart], iFreeBlk); + put2byte(&data[iStart+2], iSize); + } + pPage->nFree += iOrigSize; + return SQLITE_OK; +} + +/* +** Decode the flags byte (the first byte of the header) for a page +** and initialize fields of the MemPage structure accordingly. +** +** Only the following combinations are supported. Anything different +** indicates a corrupt database files: +** +** PTF_ZERODATA +** PTF_ZERODATA | PTF_LEAF +** PTF_LEAFDATA | PTF_INTKEY +** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF +*/ +static int decodeFlags(MemPage *pPage, int flagByte){ + BtShared *pBt; /* A copy of pPage->pBt */ + + assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 ); + flagByte &= ~PTF_LEAF; + pPage->childPtrSize = 4-4*pPage->leaf; + pPage->xCellSize = cellSizePtr; + pBt = pPage->pBt; + if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ + /* EVIDENCE-OF: R-03640-13415 A value of 5 means the page is an interior + ** table b-tree page. */ + assert( (PTF_LEAFDATA|PTF_INTKEY)==5 ); + /* EVIDENCE-OF: R-20501-61796 A value of 13 means the page is a leaf + ** table b-tree page. */ + assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 ); + pPage->intKey = 1; + if( pPage->leaf ){ + pPage->intKeyLeaf = 1; + pPage->noPayload = 0; + pPage->xParseCell = btreeParseCellPtr; + }else{ + pPage->intKeyLeaf = 0; + pPage->noPayload = 1; + pPage->xCellSize = cellSizePtrNoPayload; + pPage->xParseCell = btreeParseCellPtrNoPayload; + } + pPage->maxLocal = pBt->maxLeaf; + pPage->minLocal = pBt->minLeaf; + }else if( flagByte==PTF_ZERODATA ){ + /* EVIDENCE-OF: R-27225-53936 A value of 2 means the page is an interior + ** index b-tree page. */ + assert( (PTF_ZERODATA)==2 ); + /* EVIDENCE-OF: R-16571-11615 A value of 10 means the page is a leaf + ** index b-tree page. */ + assert( (PTF_ZERODATA|PTF_LEAF)==10 ); + pPage->intKey = 0; + pPage->intKeyLeaf = 0; + pPage->noPayload = 0; + pPage->xParseCell = btreeParseCellPtrIndex; + pPage->maxLocal = pBt->maxLocal; + pPage->minLocal = pBt->minLocal; + }else{ + /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is + ** an error. */ + return SQLITE_CORRUPT_BKPT; + } + pPage->max1bytePayload = pBt->max1bytePayload; + return SQLITE_OK; +} + +/* +** Initialize the auxiliary information for a disk block. +** +** Return SQLITE_OK on success. If we see that the page does +** not contain a well-formed database page, then return +** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not +** guarantee that the page is well-formed. It only shows that +** we failed to detect any corruption. +*/ +static int btreeInitPage(MemPage *pPage){ + + assert( pPage->pBt!=0 ); + assert( pPage->pBt->db!=0 ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); + assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); + assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); + + if( !pPage->isInit ){ + u16 pc; /* Address of a freeblock within pPage->aData[] */ + u8 hdr; /* Offset to beginning of page header */ + u8 *data; /* Equal to pPage->aData */ + BtShared *pBt; /* The main btree structure */ + int usableSize; /* Amount of usable space on each page */ + u16 cellOffset; /* Offset from start of page to first cell pointer */ + int nFree; /* Number of unused bytes on the page */ + int top; /* First byte of the cell content area */ + int iCellFirst; /* First allowable cell or freeblock offset */ + int iCellLast; /* Last possible cell or freeblock offset */ + + pBt = pPage->pBt; + + hdr = pPage->hdrOffset; + data = pPage->aData; + /* EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating + ** the b-tree page type. */ + if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT; + assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); + pPage->maskPage = (u16)(pBt->pageSize - 1); + pPage->nOverflow = 0; + usableSize = pBt->usableSize; + pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize; + pPage->aDataEnd = &data[usableSize]; + pPage->aCellIdx = &data[cellOffset]; + pPage->aDataOfst = &data[pPage->childPtrSize]; + /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates + ** the start of the cell content area. A zero value for this integer is + ** interpreted as 65536. */ + top = get2byteNotZero(&data[hdr+5]); + /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the + ** number of cells on the page. */ + pPage->nCell = get2byte(&data[hdr+3]); + if( pPage->nCell>MX_CELL(pBt) ){ + /* To many cells for a single page. The page must be corrupt */ + return SQLITE_CORRUPT_BKPT; + } + testcase( pPage->nCell==MX_CELL(pBt) ); + /* EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only + ** possible for a root page of a table that contains no rows) then the + ** offset to the cell content area will equal the page size minus the + ** bytes of reserved space. */ + assert( pPage->nCell>0 || top==usableSize || CORRUPT_DB ); + + /* A malformed database page might cause us to read past the end + ** of page when parsing a cell. + ** + ** The following block of code checks early to see if a cell extends + ** past the end of a page boundary and causes SQLITE_CORRUPT to be + ** returned if it does. + */ + iCellFirst = cellOffset + 2*pPage->nCell; + iCellLast = usableSize - 4; + if( pBt->pBt->flags & SQLITE_CellSizeCk ){ + int i; /* Index into the cell pointer array */ + int sz; /* Size of a cell */ + + if( !pPage->leaf ) iCellLast--; + for(i=0; i<pPage->nCell; i++){ + pc = get2byteAligned(&data[cellOffset+i*2]); + testcase( pc==iCellFirst ); + testcase( pc==iCellLast ); + if( pc<iCellFirst || pc>iCellLast ){ + return SQLITE_CORRUPT_BKPT; + } + sz = pPage->xCellSize(pPage, &data[pc]); + testcase( pc+sz==usableSize ); + if( pc+sz>usableSize ){ + return SQLITE_CORRUPT_BKPT; + } + } + if( !pPage->leaf ) iCellLast++; + } + + /* Compute the total free space on the page + ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the + ** start of the first freeblock on the page, or is zero if there are no + ** freeblocks. */ + pc = get2byte(&data[hdr+1]); + nFree = data[hdr+7] + top; /* Init nFree to non-freeblock free space */ + while( pc>0 ){ + u16 next, size; + if( pc<iCellFirst || pc>iCellLast ){ + /* EVIDENCE-OF: R-55530-52930 In a well-formed b-tree page, there will + ** always be at least one cell before the first freeblock. + ** + ** Or, the freeblock is off the end of the page + */ + return SQLITE_CORRUPT_BKPT; + } + next = get2byte(&data[pc]); + size = get2byte(&data[pc+2]); + if( (next>0 && next<=pc+size+3) || pc+size>usableSize ){ + /* Free blocks must be in ascending order. And the last byte of + ** the free-block must lie on the database page. */ + return SQLITE_CORRUPT_BKPT; + } + nFree = nFree + size; + pc = next; + } + + /* At this point, nFree contains the sum of the offset to the start + ** of the cell-content area plus the number of free bytes within + ** the cell-content area. If this is greater than the usable-size + ** of the page, then the page must be corrupted. This check also + ** serves to verify that the offset to the start of the cell-content + ** area, according to the page header, lies within the page. + */ + if( nFree>usableSize ){ + return SQLITE_CORRUPT_BKPT; + } + pPage->nFree = (u16)(nFree - iCellFirst); + pPage->isInit = 1; + } + return SQLITE_OK; +} + +/* +** Set up a raw page so that it looks like a database page holding +** no entries. +*/ +static void zeroPage(MemPage *pPage, int flags){ + unsigned char *data = pPage->aData; + BtShared *pBt = pPage->pBt; + u8 hdr = pPage->hdrOffset; + u16 first; + + assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno ); + assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); + assert( sqlite3PagerGetData(pPage->pDbPage) == data ); + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + assert( sqlite3_mutex_held(pBt->mutex) ); + if( pBt->btsFlags & BTS_SECURE_DELETE ){ + memset(&data[hdr], 0, pBt->usableSize - hdr); + } + data[hdr] = (char)flags; + first = hdr + ((flags&PTF_LEAF)==0 ? 12 : 8); + memset(&data[hdr+1], 0, 4); + data[hdr+7] = 0; + put2byte(&data[hdr+5], pBt->usableSize); + pPage->nFree = (u16)(pBt->usableSize - first); + decodeFlags(pPage, flags); + pPage->cellOffset = first; + pPage->aDataEnd = &data[pBt->usableSize]; + pPage->aCellIdx = &data[first]; + pPage->aDataOfst = &data[pPage->childPtrSize]; + pPage->nOverflow = 0; + assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); + pPage->maskPage = (u16)(pBt->pageSize - 1); + pPage->nCell = 0; + pPage->isInit = 1; +} + + +/* +** Convert a DbPage obtained from the pager into a MemPage used by +** the btree layer. +*/ +static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){ + MemPage *pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage); + pPage->aData = sqlite3PagerGetData(pDbPage); + pPage->pDbPage = pDbPage; + pPage->pBt = pBt; + pPage->pgno = pgno; + pPage->hdrOffset = pgno==1 ? 100 : 0; + return pPage; +} + +/* +** Get a page from the pager. Initialize the MemPage.pBt and +** MemPage.aData elements if needed. See also: btreeGetUnusedPage(). +** +** If the PAGER_GET_NOCONTENT flag is set, it means that we do not care +** about the content of the page at this time. So do not go to the disk +** to fetch the content. Just fill in the content with zeros for now. +** If in the future we call sqlite3PagerWrite() on this page, that +** means we have started to be concerned about content and the disk +** read should occur at that point. +*/ +static int btreeGetPage( + BtShared *pBt, /* The btree */ + Pgno pgno, /* Number of the page to fetch */ + MemPage **ppPage, /* Return the page in this parameter */ + int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ +){ + int rc; + DbPage *pDbPage; + + assert( flags==0 || flags==PAGER_GET_NOCONTENT || flags==PAGER_GET_READONLY ); + assert( sqlite3_mutex_held(pBt->mutex) ); + rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags); + if( rc ) return rc; + *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); + return SQLITE_OK; +} + +/* +** Retrieve a page from the pager cache. If the requested page is not +** already in the pager cache return NULL. Initialize the MemPage.pBt and +** MemPage.aData elements if needed. +*/ +static MemPage *btreePageLookup(BtShared *pBt, Pgno pgno){ + DbPage *pDbPage; + assert( sqlite3_mutex_held(pBt->mutex) ); + pDbPage = sqlite3PagerLookup(pBt->pPager, pgno); + if( pDbPage ){ + return btreePageFromDbPage(pDbPage, pgno, pBt); + } + return 0; +} + +/* +** Return the size of the database file in pages. If there is any kind of +** error, return ((unsigned int)-1). +*/ +static Pgno btreePagecount(BtShared *pBt){ + return pBt->nPage; +} +SQLITE_API u32 SQLITE_STDCALL sqlite3BtreeLastPage(Btree *p){ + assert( sqlite3BtreeHoldsMutex(p) ); + assert( ((p->pBt->nPage)&0x8000000)==0 ); + return btreePagecount(p->pBt); +} + +/* +** Get a page from the pager and initialize it. +** +** If pCur!=0 then the page is being fetched as part of a moveToChild() +** call. Do additional sanity checking on the page in this case. +** And if the fetch fails, this routine must decrement pCur->iPage. +** +** The page is fetched as read-write unless pCur is not NULL and is +** a read-only cursor. +** +** If an error occurs, then *ppPage is undefined. It +** may remain unchanged, or it may be set to an invalid value. +*/ +static int getAndInitPage( + BtShared *pBt, /* The database file */ + Pgno pgno, /* Number of the page to get */ + MemPage **ppPage, /* Write the page pointer here */ + BtCursor *pCur, /* Cursor to receive the page, or NULL */ + int bReadOnly /* True for a read-only page */ +){ + int rc; + DbPage *pDbPage; + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( pCur==0 || ppPage==&pCur->apPage[pCur->iPage] ); + assert( pCur==0 || bReadOnly==pCur->curPagerFlags ); + assert( pCur==0 || pCur->iPage>0 ); + + if( pgno>btreePagecount(pBt) ){ + rc = SQLITE_CORRUPT_BKPT; + goto getAndInitPage_error; + } + rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly); + if( rc ){ + goto getAndInitPage_error; + } + *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); + if( (*ppPage)->isInit==0 ){ + rc = btreeInitPage(*ppPage); + if( rc!=SQLITE_OK ){ + releasePage(*ppPage); + goto getAndInitPage_error; + } + } + + /* If obtaining a child page for a cursor, we must verify that the page is + ** compatible with the root page. */ + if( pCur + && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) + ){ + rc = SQLITE_CORRUPT_BKPT; + releasePage(*ppPage); + goto getAndInitPage_error; + } + return SQLITE_OK; + +getAndInitPage_error: + if( pCur ) pCur->iPage--; + testcase( pgno==0 ); + assert( pgno!=0 || rc==SQLITE_CORRUPT ); + return rc; +} + +/* +** Release a MemPage. This should be called once for each prior +** call to btreeGetPage. +*/ +static void releasePageNotNull(MemPage *pPage){ + assert( pPage->aData ); + assert( pPage->pBt ); + assert( pPage->pDbPage!=0 ); + assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); + assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + sqlite3PagerUnrefNotNull(pPage->pDbPage); +} +static void releasePage(MemPage *pPage){ + if( pPage ) releasePageNotNull(pPage); +} + +/* +** Get an unused page. +** +** This works just like btreeGetPage() with the addition: +** +** * If the page is already in use for some other purpose, immediately +** release it and return an SQLITE_CURRUPT error. +** * Make sure the isInit flag is clear +*/ +static int btreeGetUnusedPage( + BtShared *pBt, /* The btree */ + Pgno pgno, /* Number of the page to fetch */ + MemPage **ppPage, /* Return the page in this parameter */ + int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ +){ + int rc = btreeGetPage(pBt, pgno, ppPage, flags); + if( rc==SQLITE_OK ){ + if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ + releasePage(*ppPage); + *ppPage = 0; + return SQLITE_CORRUPT_BKPT; + } + (*ppPage)->isInit = 0; + }else{ + *ppPage = 0; + } + return rc; +} + + +/* +** During a rollback, when the pager reloads information into the cache +** so that the cache is restored to its original state at the start of +** the transaction, for each page restored this routine is called. +** +** This routine needs to reset the extra data section at the end of the +** page to agree with the restored data. +*/ +static void pageReinit(DbPage *pData){ + MemPage *pPage; + pPage = (MemPage *)sqlite3PagerGetExtra(pData); + assert( sqlite3PagerPageRefcount(pData)>0 ); + if( pPage->isInit ){ + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + pPage->isInit = 0; + if( sqlite3PagerPageRefcount(pData)>1 ){ + /* pPage might not be a btree page; it might be an overflow page + ** or ptrmap page or a free page. In those cases, the following + ** call to btreeInitPage() will likely return SQLITE_CORRUPT. + ** But no harm is done by this. And it is very important that + ** btreeInitPage() be called on every btree page so we make + ** the call for every page that comes in for re-initing. */ + btreeInitPage(pPage); + } + } +} + +/* +** Invoke the busy handler for a btree. +*/ +static int btreeInvokeBusyHandler(void *pArg){ + BtShared *pBt = (BtShared*)pArg; + assert( pBt->pBt ); + assert( sqlite3_mutex_held(pBt->pBt->mutex) ); + return sqlite3InvokeBusyHandler(&pBt->pBt->busyHandler); +} + +/* +** Open a database file. +** +** zFilename is the name of the database file. If zFilename is NULL +** then an ephemeral database is created. The ephemeral database might +** be exclusively in memory, or it might use a disk-based memory cache. +** Either way, the ephemeral database will be automatically deleted +** when sqlite3BtreeClose() is called. +** +** If zFilename is ":memory:" then an in-memory database is created +** that is automatically destroyed when it is closed. +** +** The "flags" parameter is a bitmask that might contain bits like +** BTREE_OMIT_JOURNAL and/or BTREE_MEMORY. +** +** If the database is already opened in the same database connection +** and we are in shared cache mode, then the open will fail with an +** SQLITE_CONSTRAINT error. We cannot allow two or more BtShared +** objects in the same database connection since doing so will lead +** to problems with locking. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeOpen( + const char *zVfs, /* VFS to use for this b-tree */ + const char *zFilename, /* Name of the file containing the BTree database */ + Btree **ppBtree, /* Pointer to new Btree object written here */ + int flags, /* Options */ + int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */ +){ + BtShared *pBt = 0; /* Shared part of btree structure */ + Btree *p; /* Handle to return */ + sqlite3_mutex *mutexOpen = 0; /* Prevents a race condition. Ticket #3537 */ + int rc = SQLITE_OK; /* Result code from this function */ + u8 nReserve; /* Byte of unused space on each page */ + unsigned char zDbHeader[100]; /* Database header content */ + int isThreadsafe; /* True for threadsafe connections */ + +#ifdef SQLITE_ENABLE_API_ARMOR + if( ppBtree==0 ) return SQLITE_MISUSE_BKPT; +#endif + *ppBtree = 0; +#ifndef SQLITE_OMIT_AUTOINIT + rc = sqlite3BtreeInitialize(); + if( rc ) return rc; +#endif + + /* Only allow sensible combinations of bits in the vfsFlags argument. + ** Throw an error if any non-sense combination is used. If we + ** do not block illegal combinations here, it could trigger + ** assert() statements in deeper layers. Sensible combinations + ** are: + ** + ** 1: SQLITE_OPEN_READONLY + ** 2: SQLITE_OPEN_READWRITE + ** 6: SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE + */ + assert( SQLITE_OPEN_READONLY == 0x01 ); + assert( SQLITE_OPEN_READWRITE == 0x02 ); + assert( SQLITE_OPEN_CREATE == 0x04 ); + testcase( (1<<(vfsFlags&7))==0x02 ); /* READONLY */ + testcase( (1<<(vfsFlags&7))==0x04 ); /* READWRITE */ + testcase( (1<<(vfsFlags&7))==0x40 ); /* READWRITE | CREATE */ + if( ((1<<(vfsFlags&7)) & 0x46)==0 ){ + return SQLITE_MISUSE_BKPT; /* IMP: R-65497-44594 */ + } + + if( sqlite3GlobalConfig.bCoreMutex==0 ){ + isThreadsafe = 0; + }else if( vfsFlags & SQLITE_OPEN_NOMUTEX ){ + isThreadsafe = 0; + }else if( vfsFlags & SQLITE_OPEN_FULLMUTEX ){ + isThreadsafe = 1; + }else{ + isThreadsafe = sqlite3GlobalConfig.bFullMutex; + } + if( vfsFlags & SQLITE_OPEN_PRIVATECACHE ){ + vfsFlags &= ~SQLITE_OPEN_SHAREDCACHE; + }else if( sqlite3GlobalConfig.sharedCacheEnabled ){ + vfsFlags |= SQLITE_OPEN_SHAREDCACHE; + } + + /* Remove harmful bits from the vfsFlags parameter + ** + ** The SQLITE_OPEN_NOMUTEX and SQLITE_OPEN_FULLMUTEX vfsFlags were + ** dealt with in the previous code block. Besides these, the only + ** valid input vfsFlags for sqlite3_open_v2() are SQLITE_OPEN_READONLY, + ** SQLITE_OPEN_READWRITE, SQLITE_OPEN_CREATE, SQLITE_OPEN_SHAREDCACHE, + ** SQLITE_OPEN_PRIVATECACHE, and some reserved bits. Silently mask + ** off all other vfsFlags. + */ + vfsFlags &= ~( SQLITE_OPEN_DELETEONCLOSE | + SQLITE_OPEN_EXCLUSIVE | + SQLITE_OPEN_MAIN_DB | + SQLITE_OPEN_TEMP_DB | + SQLITE_OPEN_TRANSIENT_DB | + SQLITE_OPEN_MAIN_JOURNAL | + SQLITE_OPEN_TEMP_JOURNAL | + SQLITE_OPEN_SUBJOURNAL | + SQLITE_OPEN_MASTER_JOURNAL | + SQLITE_OPEN_NOMUTEX | + SQLITE_OPEN_FULLMUTEX | + SQLITE_OPEN_WAL + ); + + assert( (vfsFlags&0xff)==vfsFlags ); /* flags fit in 8 bits */ + + /* Only a BTREE_SINGLE database can be BTREE_UNORDERED */ + assert( (vfsFlags & BTREE_UNORDERED)==0 || (vfsFlags & BTREE_SINGLE)!=0 ); + + sqlite3_vfs *pVfs = + sqlite3_vfs_find(zVfs); + if( pVfs==0 ){ + return SQLITE_ERROR; + } + + p = sqlite3MallocZero(sizeof(Btree)); + if( !p ){ + return SQLITE_NOMEM; + } + p->inTrans = TRANS_NONE; +#ifndef SQLITE_OMIT_SHARED_CACHE + p->lock.pBtree = p; + p->lock.iTable = 1; +#endif + p->magic = SQLITE_MAGIC_BUSY; + + if( isThreadsafe ){ + p->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE); + if( p->mutex==0 ){ + sqlite3_free(p); + p = 0; + goto btree_open_out; + } + } + sqlite3_mutex_enter(p->mutex); + + p->enc = SQLITE_UTF8; + p->szMmap = sqlite3GlobalConfig.szMmap; + p->flags |= 0 +#if SQLITE_DEFAULT_CKPTFULLFSYNC + | SQLITE_CkptFullFSync +#endif +#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) + | SQLITE_CellSizeCk +#endif +; + +#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) + /* + ** If this Btree is a candidate for shared cache, try to find an + ** existing BtShared object that we can share with + */ + if( (vfsFlags&SQLITE_OPEN_URI)!=0 ){ + if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){ + int nFilename = sqlite3Strlen30(zFilename)+1; + int nFullPathname = pVfs->mxPathname+1; + char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename)); + MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) + + p->sharable = 1; + if( !zFullPathname ){ + sqlite3_free(p); + return SQLITE_NOMEM; + } + rc = sqlite3OsFullPathname(pVfs, zFilename, + nFullPathname, zFullPathname); + if( rc ){ + sqlite3_free(zFullPathname); + sqlite3_free(p); + return rc; + } +#if SQLITE_THREADSAFE + mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN); + sqlite3_mutex_enter(mutexOpen); + mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); + sqlite3_mutex_enter(mutexShared); +#endif + for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){ + assert( pBt->nRef>0 ); + if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager, 0)) + && sqlite3PagerVfs(pBt->pPager)==pVfs ){ + p->pBt = pBt; + pBt->nRef++; + break; + } + } + sqlite3_mutex_leave(mutexShared); + sqlite3_free(zFullPathname); + } +#ifdef SQLITE_DEBUG + else{ + /* In debug mode, we mark all persistent databases as sharable + ** even when they are not. This exercises the locking code and + ** gives more opportunity for asserts(sqlite3_mutex_held()) + ** statements to find locking problems. + */ + p->sharable = 1; + } +#endif + } +#endif + if( pBt==0 ){ + /* + ** The following asserts make sure that structures used by the btree are + ** the right size. This is to guard against size changes that result + ** when compiling on a different architecture. + */ + assert( sizeof(i64)==8 ); + assert( sizeof(u64)==8 ); + assert( sizeof(u32)==4 ); + assert( sizeof(u16)==2 ); + assert( sizeof(Pgno)==4 ); + + pBt = sqlite3MallocZero( sizeof(*pBt) ); + if( pBt==0 ){ + rc = SQLITE_NOMEM; + goto btree_open_out; + } + rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, + EXTRA_SIZE, flags, vfsFlags, pageReinit); + if( rc==SQLITE_OK ){ + sqlite3PagerSetMmapLimit(pBt->pPager, p->szMmap); + rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); + } + if( rc!=SQLITE_OK ){ + goto btree_open_out; + } + pBt->openFlags = (u8)flags; + pBt->pBt = p; + sqlite3PagerSetBusyhandler(pBt->pPager, btreeInvokeBusyHandler, pBt); + p->pBt = pBt; + + pBt->pCursor = 0; + pBt->pPage1 = 0; + if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY; +#ifdef SQLITE_SECURE_DELETE + pBt->btsFlags |= BTS_SECURE_DELETE; +#endif + /* EVIDENCE-OF: R-51873-39618 The page size for a database file is + ** determined by the 2-byte integer located at an offset of 16 bytes from + ** the beginning of the database file. */ + pBt->pageSize = (zDbHeader[16]<<8) | (zDbHeader[17]<<16); + if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE + || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){ + pBt->pageSize = 0; +#ifndef SQLITE_OMIT_AUTOVACUUM + /* If the magic name ":memory:" will create an in-memory database, then + ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if + ** SQLITE_DEFAULT_AUTOVACUUM is true. + */ + if( zFilename ){ + pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0); + pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0); + } +#endif + nReserve = 0; + }else{ + /* EVIDENCE-OF: R-37497-42412 The size of the reserved region is + ** determined by the one-byte unsigned integer found at an offset of 20 + ** into the database file header. */ + nReserve = zDbHeader[20]; + pBt->btsFlags |= BTS_PAGESIZE_FIXED; +#ifndef SQLITE_OMIT_AUTOVACUUM + pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0); + pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0); +#endif + } + rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); + if( rc ) goto btree_open_out; + pBt->usableSize = pBt->pageSize - nReserve; + assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */ + + /* + ** file_format==1 Version 3.0.0. + ** file_format==2 Version 3.1.3. // ALTER TABLE ADD COLUMN + ** file_format==3 Version 3.1.4. // ditto but with non-NULL defaults + ** file_format==4 Version 3.3.0. // DESC indices. Boolean constants + */ + pBt->file_format = zDbHeader[18]; + if( pBt->file_format==0 ){ + pBt->file_format = 1; + } + if( pBt->file_format>SQLITE_MAX_FILE_FORMAT ){ + rc = SQLITE_ERROR; + goto btree_open_out; + } + +#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) + /* Add the new BtShared object to the linked list sharable BtShareds. + */ + if( p->sharable ){ + MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) + pBt->nRef = 1; + MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);) + if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){ + pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST); + if( pBt->mutex==0 ){ + rc = SQLITE_NOMEM; + p->mallocFailed = 0; + goto btree_open_out; + } + } + sqlite3_mutex_enter(mutexShared); + pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList); + GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt; + sqlite3_mutex_leave(mutexShared); + } +#endif + } + + sqlite3PagerSetCachesize(pBt->pPager, SQLITE_DEFAULT_CACHE_SIZE); + + p->magic = SQLITE_MAGIC_OPEN; + + /* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking + ** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking + ** mode. Doing nothing at all also makes NORMAL the default. + */ +#ifdef SQLITE_DEFAULT_LOCKING_MODE + sqlite3PagerLockingMode(sqlite3BtreePager(db->aDb[0].pBt), + SQLITE_DEFAULT_LOCKING_MODE); +#endif + + *ppBtree = p; + +btree_open_out: + if( rc!=SQLITE_OK ){ + if( pBt && pBt->pPager ){ + sqlite3PagerClose(pBt->pPager); + } + sqlite3_free(pBt); + sqlite3_free(p); + *ppBtree = 0; + }else{ + if (p) { + assert( p->mutex!=0 || isThreadsafe==0 + || sqlite3GlobalConfig.bFullMutex==0 ); + sqlite3_mutex_leave(p->mutex); + } + + /* If the B-Tree was successfully opened, set the pager-cache size to the + ** default value. Except, when opening on an existing shared pager-cache, + ** do not change the pager-cache size. + */ + if( sqlite3BtreeSchema(p, 0, 0)==0 ){ + sqlite3PagerSetCachesize(pBt->pPager, SQLITE_DEFAULT_CACHE_SIZE); + } + } + if( mutexOpen ){ + assert( sqlite3_mutex_held(mutexOpen) ); + sqlite3_mutex_leave(mutexOpen); + } + return rc; +} + +/* +** Decrement the BtShared.nRef counter. When it reaches zero, +** remove the BtShared structure from the sharing list. Return +** true if the BtShared.nRef counter reaches zero and return +** false if it is still positive. +*/ +static int removeFromSharingList(BtShared *pBt){ +#ifndef SQLITE_OMIT_SHARED_CACHE + MUTEX_LOGIC( sqlite3_mutex *pMaster; ) + BtShared *pList; + int removed = 0; + + assert( sqlite3_mutex_notheld(pBt->mutex) ); + MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); ) + sqlite3_mutex_enter(pMaster); + pBt->nRef--; + if( pBt->nRef<=0 ){ + if( GLOBAL(BtShared*,sqlite3SharedCacheList)==pBt ){ + GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt->pNext; + }else{ + pList = GLOBAL(BtShared*,sqlite3SharedCacheList); + while( ALWAYS(pList) && pList->pNext!=pBt ){ + pList=pList->pNext; + } + if( ALWAYS(pList) ){ + pList->pNext = pBt->pNext; + } + } + if( SQLITE_THREADSAFE ){ + sqlite3_mutex_free(pBt->mutex); + } + removed = 1; + } + sqlite3_mutex_leave(pMaster); + return removed; +#else + return 1; +#endif +} + +/* +** Make sure pBt->pTmpSpace points to an allocation of +** MX_CELL_SIZE(pBt) bytes with a 4-byte prefix for a left-child +** pointer. +*/ +static void allocateTempSpace(BtShared *pBt){ + if( !pBt->pTmpSpace ){ + pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize ); + + /* One of the uses of pBt->pTmpSpace is to format cells before + ** inserting them into a leaf page (function fillInCell()). If + ** a cell is less than 4 bytes in size, it is rounded up to 4 bytes + ** by the various routines that manipulate binary cells. Which + ** can mean that fillInCell() only initializes the first 2 or 3 + ** bytes of pTmpSpace, but that the first 4 bytes are copied from + ** it into a database page. This is not actually a problem, but it + ** does cause a valgrind error when the 1 or 2 bytes of unitialized + ** data is passed to system call write(). So to avoid this error, + ** zero the first 4 bytes of temp space here. + ** + ** Also: Provide four bytes of initialized space before the + ** beginning of pTmpSpace as an area available to prepend the + ** left-child pointer to the beginning of a cell. + */ + if( pBt->pTmpSpace ){ + memset(pBt->pTmpSpace, 0, 8); + pBt->pTmpSpace += 4; + } + } +} + +/* +** Free the pBt->pTmpSpace allocation +*/ +static void freeTempSpace(BtShared *pBt){ + if( pBt->pTmpSpace ){ + pBt->pTmpSpace -= 4; + sqlite3PageFree(pBt->pTmpSpace); + pBt->pTmpSpace = 0; + } +} + +/* +** Close an open database and invalidate all cursors. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeClose(Btree *p){ + if( !p ){ + /* EVIDENCE-OF: R-63257-11740 Calling sqlite3BtreeClose() + ** with a NULL pointer argument is a harmless no-op. */ + return SQLITE_OK; + } + + if( !sqlite3SafetyCheckSickOrOk(p) ){ + return SQLITE_MISUSE_BKPT; + } + + sqlite3_mutex_enter(p->mutex); + + p->magic = SQLITE_MAGIC_ZOMBIE; + + BtShared *pBt = p->pBt; + BtCursor *pCur; + + /* Close all cursors opened via this handle. */ + sqlite3BtreeEnter(p); + pCur = pBt->pCursor; + while( pCur ){ + BtCursor *pTmp = pCur; + pCur = pCur->pNext; + if( pTmp->pBtree==p ){ + sqlite3BtreeCloseCursor(pTmp); + } + } + + /* Rollback any active transaction and free the handle structure. + ** The call to sqlite3BtreeRollback() drops any table-locks held by + ** this handle. + */ + sqlite3BtreeRollback(p, SQLITE_OK, 0); + sqlite3BtreeLeave(p); + + /* If there are still other outstanding references to the shared-btree + ** structure, return now. The remainder of this procedure cleans + ** up the shared-btree. + */ + assert( p->wantToLock==0 && p->locked==0 ); + if( !p->sharable || removeFromSharingList(pBt) ){ + /* The pBt is no longer on the sharing list, so we can access + ** it without having to hold the mutex. + ** + ** Clean out and delete the BtShared object. + */ + assert( !pBt->pCursor ); + sqlite3PagerClose(pBt->pPager); + if( pBt->xFreeSchema && pBt->pSchema ){ + pBt->xFreeSchema(pBt->pSchema); + } + sqlite3DbFree(0, pBt->pSchema); + freeTempSpace(pBt); + sqlite3_free(pBt); + } + +#ifndef SQLITE_OMIT_SHARED_CACHE + assert( p->wantToLock==0 ); + assert( p->locked==0 ); + if( p->pPrev ) p->pPrev->pNext = p->pNext; + if( p->pNext ) p->pNext->pPrev = p->pPrev; +#endif + + sqlite3_mutex_leave(p->mutex); + sqlite3_mutex_free(p->mutex); + + p->magic = SQLITE_MAGIC_CLOSED; + + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Change the limit on the number of pages allowed in the cache. +** +** The maximum number of cache pages is set to the absolute +** value of mxPage. If mxPage is negative, the pager will +** operate asynchronously - it will not stop to do fsync()s +** to insure data is written to the disk surface before +** continuing. Transactions still work if synchronous is off, +** and the database cannot be corrupted if this program +** crashes. But if the operating system crashes or there is +** an abrupt power failure when synchronous is off, the database +** could be left in an inconsistent and unrecoverable state. +** Synchronous is on by default so database corruption is not +** normally a worry. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ + BtShared *pBt = p->pBt; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + sqlite3PagerSetCachesize(pBt->pPager, mxPage); + sqlite3BtreeLeave(p); + return SQLITE_OK; +} + +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** Change the limit on the amount of the database file that may be +** memory mapped. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ + BtShared *pBt = p->pBt; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + sqlite3PagerSetMmapLimit(pBt->pPager, szMmap); + sqlite3BtreeLeave(p); + return SQLITE_OK; +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +/* +** Change the way data is synced to disk in order to increase or decrease +** how well the database resists damage due to OS crashes and power +** failures. Level 1 is the same as asynchronous (no syncs() occur and +** there is a high probability of damage) Level 2 is the default. There +** is a very low but non-zero probability of damage. Level 3 reduces the +** probability of damage to near zero but with a write performance reduction. +*/ +#ifndef SQLITE_OMIT_PAGER_PRAGMAS +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetPagerFlags( + Btree *p, /* The btree to set the safety level on */ + unsigned pgFlags /* Various PAGER_* flags */ +){ + BtShared *pBt = p->pBt; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + sqlite3PagerSetFlags(pBt->pPager, pgFlags); + sqlite3BtreeLeave(p); + return SQLITE_OK; +} +#endif + +/* +** Return TRUE if the given btree is set to safety level 1. In other +** words, return TRUE if no sync() occurs on the disk files. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSyncDisabled(Btree *p){ + BtShared *pBt = p->pBt; + int rc; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + assert( pBt && pBt->pPager ); + rc = sqlite3PagerNosync(pBt->pPager); + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Change the default pages size and the number of reserved bytes per page. +** Or, if the page size has already been fixed, return SQLITE_READONLY +** without changing anything. +** +** The page size must be a power of 2 between 512 and 65536. If the page +** size supplied does not meet this constraint then the page size is not +** changed. +** +** Page sizes are constrained to be a power of two so that the region +** of the database file used for locking (beginning at PENDING_BYTE, +** the first byte past the 1GB boundary, 0x40000000) needs to occur +** at the beginning of a page. +** +** If parameter nReserve is less than zero, then the number of reserved +** bytes per page is left unchanged. +** +** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size +** and autovacuum mode can no longer be changed. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){ + int rc = SQLITE_OK; + BtShared *pBt = p->pBt; + assert( nReserve>=-1 && nReserve<=255 ); + sqlite3BtreeEnter(p); +#if SQLITE_HAS_CODEC + if( nReserve>pBt->optimalReserve ) pBt->optimalReserve = (u8)nReserve; +#endif + if( pBt->btsFlags & BTS_PAGESIZE_FIXED ){ + sqlite3BtreeLeave(p); + return SQLITE_READONLY; + } + if( nReserve<0 ){ + nReserve = pBt->pageSize - pBt->usableSize; + } + assert( nReserve>=0 && nReserve<=255 ); + if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE && + ((pageSize-1)&pageSize)==0 ){ + assert( (pageSize & 7)==0 ); + assert( !pBt->pCursor ); + pBt->pageSize = (u32)pageSize; + freeTempSpace(pBt); + } + rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); + pBt->usableSize = pBt->pageSize - (u16)nReserve; + if( iFix ) pBt->btsFlags |= BTS_PAGESIZE_FIXED; + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Return the currently defined page size +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeGetPageSize(Btree *p){ + return p->pBt->pageSize; +} + +/* +** This function is similar to sqlite3BtreeGetReserve(), except that it +** may only be called if it is guaranteed that the b-tree mutex is already +** held. +** +** This is useful in one special case in the backup API code where it is +** known that the shared b-tree mutex is held, but the mutex on the +** database handle that owns *p is not. In this case if sqlite3BtreeEnter() +** were to be called, it might collide with some other operation on the +** database handle that owns *p, causing undefined behavior. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeGetReserveNoMutex(Btree *p){ + int n; + assert( sqlite3_mutex_held(p->pBt->mutex) ); + n = p->pBt->pageSize - p->pBt->usableSize; + return n; +} + +/* +** Return the number of bytes of space at the end of every page that +** are intentually left unused. This is the "reserved" space that is +** sometimes used by extensions. +** +** If SQLITE_HAS_MUTEX is defined then the number returned is the +** greater of the current reserved space and the maximum requested +** reserve space. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeGetOptimalReserve(Btree *p){ + int n; + sqlite3BtreeEnter(p); + n = sqlite3BtreeGetReserveNoMutex(p); +#ifdef SQLITE_HAS_CODEC + if( n<p->pBt->optimalReserve ) n = p->pBt->optimalReserve; +#endif + sqlite3BtreeLeave(p); + return n; +} + + +/* +** Set the maximum page count for a database if mxPage is positive. +** No changes are made if mxPage is 0 or negative. +** Regardless of the value of mxPage, return the maximum page count. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeMaxPageCount(Btree *p, int mxPage){ + int n; + sqlite3BtreeEnter(p); + n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage); + sqlite3BtreeLeave(p); + return n; +} + +/* +** Set the BTS_SECURE_DELETE flag if newFlag is 0 or 1. If newFlag is -1, +** then make no changes. Always return the value of the BTS_SECURE_DELETE +** setting after the change. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSecureDelete(Btree *p, int newFlag){ + int b; + if( p==0 ) return 0; + sqlite3BtreeEnter(p); + if( newFlag>=0 ){ + p->pBt->btsFlags &= ~BTS_SECURE_DELETE; + if( newFlag ) p->pBt->btsFlags |= BTS_SECURE_DELETE; + } + b = (p->pBt->btsFlags & BTS_SECURE_DELETE)!=0; + sqlite3BtreeLeave(p); + return b; +} + +/* +** Change the 'auto-vacuum' property of the database. If the 'autoVacuum' +** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it +** is disabled. The default value for the auto-vacuum property is +** determined by the SQLITE_DEFAULT_AUTOVACUUM macro. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){ +#ifdef SQLITE_OMIT_AUTOVACUUM + return SQLITE_READONLY; +#else + BtShared *pBt = p->pBt; + int rc = SQLITE_OK; + u8 av = (u8)autoVacuum; + + sqlite3BtreeEnter(p); + if( (pBt->btsFlags & BTS_PAGESIZE_FIXED)!=0 && (av ?1:0)!=pBt->autoVacuum ){ + rc = SQLITE_READONLY; + }else{ + pBt->autoVacuum = av ?1:0; + pBt->incrVacuum = av==2 ?1:0; + } + sqlite3BtreeLeave(p); + return rc; +#endif +} + +/* +** Return the value of the 'auto-vacuum' property. If auto-vacuum is +** enabled 1 is returned. Otherwise 0. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeGetAutoVacuum(Btree *p){ +#ifdef SQLITE_OMIT_AUTOVACUUM + return BTREE_AUTOVACUUM_NONE; +#else + int rc; + sqlite3BtreeEnter(p); + rc = ( + (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE: + (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL: + BTREE_AUTOVACUUM_INCR + ); + sqlite3BtreeLeave(p); + return rc; +#endif +} + + +/* +** Get a reference to pPage1 of the database file. This will +** also acquire a readlock on that file. +** +** SQLITE_OK is returned on success. If the file is not a +** well-formed database file, then SQLITE_CORRUPT is returned. +** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM +** is returned if we run out of memory. +*/ +static int lockBtree(BtShared *pBt){ + int rc; /* Result code from subfunctions */ + MemPage *pPage1; /* Page 1 of the database file */ + int nPage; /* Number of pages in the database */ + int nPageFile = 0; /* Number of pages in the database file */ + int nPageHeader; /* Number of pages in the database according to hdr */ + + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( pBt->pPage1==0 ); + rc = sqlite3PagerSharedLock(pBt->pPager); + if( rc!=SQLITE_OK ) return rc; + rc = btreeGetPage(pBt, 1, &pPage1, 0); + if( rc!=SQLITE_OK ) return rc; + + /* Do some checking to help insure the file we opened really is + ** a valid database file. + */ + nPage = nPageHeader = get4byte(28+(u8*)pPage1->aData); + sqlite3PagerPagecount(pBt->pPager, &nPageFile); + if( nPage==0 || memcmp(24+(u8*)pPage1->aData, 92+(u8*)pPage1->aData,4)!=0 ){ + nPage = nPageFile; + } + if( nPage>0 ){ + u32 pageSize; + u32 usableSize; + u8 *page1 = pPage1->aData; + rc = SQLITE_NOTADB; + /* EVIDENCE-OF: R-43737-39999 Every valid SQLite database file begins + ** with the following 16 bytes (in hex): 53 51 4c 69 74 65 20 66 6f 72 6d + ** 61 74 20 33 00. */ + if( memcmp(page1, zMagicHeader, 16)!=0 ){ + goto page1_init_failed; + } + +#ifdef SQLITE_OMIT_WAL + if( page1[18]>1 ){ + pBt->btsFlags |= BTS_READ_ONLY; + } + if( page1[19]>1 ){ + goto page1_init_failed; + } +#else + if( page1[18]>2 ){ + pBt->btsFlags |= BTS_READ_ONLY; + } + if( page1[19]>2 ){ + goto page1_init_failed; + } + + /* If the write version is set to 2, this database should be accessed + ** in WAL mode. If the log is not already open, open it now. Then + ** return SQLITE_OK and return without populating BtShared.pPage1. + ** The caller detects this and calls this function again. This is + ** required as the version of page 1 currently in the page1 buffer + ** may not be the latest version - there may be a newer one in the log + ** file. + */ + if( page1[19]==2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){ + int isOpen = 0; + rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen); + if( rc!=SQLITE_OK ){ + goto page1_init_failed; + }else if( isOpen==0 ){ + releasePage(pPage1); + return SQLITE_OK; + } + rc = SQLITE_NOTADB; + } +#endif + + /* EVIDENCE-OF: R-15465-20813 The maximum and minimum embedded payload + ** fractions and the leaf payload fraction values must be 64, 32, and 32. + ** + ** The original design allowed these amounts to vary, but as of + ** version 3.6.0, we require them to be fixed. + */ + if( memcmp(&page1[21], "\100\040\040",3)!=0 ){ + goto page1_init_failed; + } + /* EVIDENCE-OF: R-51873-39618 The page size for a database file is + ** determined by the 2-byte integer located at an offset of 16 bytes from + ** the beginning of the database file. */ + pageSize = (page1[16]<<8) | (page1[17]<<16); + /* EVIDENCE-OF: R-25008-21688 The size of a page is a power of two + ** between 512 and 65536 inclusive. */ + if( ((pageSize-1)&pageSize)!=0 + || pageSize>SQLITE_MAX_PAGE_SIZE + || pageSize<=256 + ){ + goto page1_init_failed; + } + assert( (pageSize & 7)==0 ); + /* EVIDENCE-OF: R-59310-51205 The "reserved space" size in the 1-byte + ** integer at offset 20 is the number of bytes of space at the end of + ** each page to reserve for extensions. + ** + ** EVIDENCE-OF: R-37497-42412 The size of the reserved region is + ** determined by the one-byte unsigned integer found at an offset of 20 + ** into the database file header. */ + usableSize = pageSize - page1[20]; + if( (u32)pageSize!=pBt->pageSize ){ + /* After reading the first page of the database assuming a page size + ** of BtShared.pageSize, we have discovered that the page-size is + ** actually pageSize. Unlock the database, leave pBt->pPage1 at + ** zero and return SQLITE_OK. The caller will call this function + ** again with the correct page-size. + */ + releasePage(pPage1); + pBt->usableSize = usableSize; + pBt->pageSize = pageSize; + freeTempSpace(pBt); + rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, + pageSize-usableSize); + return rc; + } + if( (pBt->pBt->flags & SQLITE_RecoveryMode)==0 && nPage>nPageFile ){ + rc = SQLITE_CORRUPT_BKPT; + goto page1_init_failed; + } + /* EVIDENCE-OF: R-28312-64704 However, the usable size is not allowed to + ** be less than 480. In other words, if the page size is 512, then the + ** reserved space size cannot exceed 32. */ + if( usableSize<480 ){ + goto page1_init_failed; + } + pBt->pageSize = pageSize; + pBt->usableSize = usableSize; +#ifndef SQLITE_OMIT_AUTOVACUUM + pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0); + pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0); +#endif + } + + /* maxLocal is the maximum amount of payload to store locally for + ** a cell. Make sure it is small enough so that at least minFanout + ** cells can will fit on one page. We assume a 10-byte page header. + ** Besides the payload, the cell must store: + ** 2-byte pointer to the cell + ** 4-byte child pointer + ** 9-byte nKey value + ** 4-byte nData value + ** 4-byte overflow page pointer + ** So a cell consists of a 2-byte pointer, a header which is as much as + ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow + ** page pointer. + */ + pBt->maxLocal = (u16)((pBt->usableSize-12)*64/255 - 23); + pBt->minLocal = (u16)((pBt->usableSize-12)*32/255 - 23); + pBt->maxLeaf = (u16)(pBt->usableSize - 35); + pBt->minLeaf = (u16)((pBt->usableSize-12)*32/255 - 23); + if( pBt->maxLocal>127 ){ + pBt->max1bytePayload = 127; + }else{ + pBt->max1bytePayload = (u8)pBt->maxLocal; + } + assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) ); + pBt->pPage1 = pPage1; + pBt->nPage = nPage; + return SQLITE_OK; + +page1_init_failed: + releasePage(pPage1); + pBt->pPage1 = 0; + return rc; +} + +#ifndef NDEBUG +/* +** Return the number of cursors open on pBt. This is for use +** in assert() expressions, so it is only compiled if NDEBUG is not +** defined. +** +** Only write cursors are counted if wrOnly is true. If wrOnly is +** false then all cursors are counted. +** +** For the purposes of this routine, a cursor is any cursor that +** is capable of reading or writing to the database. Cursors that +** have been tripped into the CURSOR_FAULT state are not counted. +*/ +static int countValidCursors(BtShared *pBt, int wrOnly){ + BtCursor *pCur; + int r = 0; + for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ + if( (wrOnly==0 || (pCur->curFlags & BTCF_WriteFlag)!=0) + && pCur->eState!=CURSOR_FAULT ) r++; + } + return r; +} +#endif + +/* +** If there are no outstanding cursors and we are not in the middle +** of a transaction but there is a read lock on the database, then +** this routine unrefs the first page of the database file which +** has the effect of releasing the read lock. +** +** If there is a transaction in progress, this routine is a no-op. +*/ +static void unlockBtreeIfUnused(BtShared *pBt){ + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE ); + if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){ + MemPage *pPage1 = pBt->pPage1; + assert( pPage1->aData ); + assert( sqlite3PagerRefcount(pBt->pPager)==1 ); + pBt->pPage1 = 0; + releasePageNotNull(pPage1); + } +} + +/* +** If pBt points to an empty file then convert that empty file +** into a new empty database by initializing the first page of +** the database. +*/ +static int newDatabase(BtShared *pBt){ + MemPage *pP1; + unsigned char *data; + int rc; + + assert( sqlite3_mutex_held(pBt->mutex) ); + if( pBt->nPage>0 ){ + return SQLITE_OK; + } + pP1 = pBt->pPage1; + assert( pP1!=0 ); + data = pP1->aData; + rc = sqlite3PagerWrite(pP1->pDbPage); + if( rc ) return rc; + memcpy(data, zMagicHeader, sizeof(zMagicHeader)); + assert( sizeof(zMagicHeader)==16 ); + data[16] = (u8)((pBt->pageSize>>8)&0xff); + data[17] = (u8)((pBt->pageSize>>16)&0xff); + data[18] = 1; + data[19] = 1; + assert( pBt->usableSize<=pBt->pageSize && pBt->usableSize+255>=pBt->pageSize); + data[20] = (u8)(pBt->pageSize - pBt->usableSize); + data[21] = 64; + data[22] = 32; + data[23] = 32; + memset(&data[24], 0, 100-24); + zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA ); + pBt->btsFlags |= BTS_PAGESIZE_FIXED; +#ifndef SQLITE_OMIT_AUTOVACUUM + assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 ); + assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 ); + put4byte(&data[36 + 4*4], pBt->autoVacuum); + put4byte(&data[36 + 7*4], pBt->incrVacuum); +#endif + pBt->nPage = 1; + data[31] = 1; + return SQLITE_OK; +} + +/* +** Initialize the first page of the database file (creating a database +** consisting of a single page and no schema objects). Return SQLITE_OK +** if successful, or an SQLite error code otherwise. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeNewDb(Btree *p){ + int rc; + sqlite3BtreeEnter(p); + p->pBt->nPage = 0; + rc = newDatabase(p->pBt); + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Attempt to start a new transaction. A write-transaction +** is started if the second argument is nonzero, otherwise a read- +** transaction. If the second argument is 2 or more and exclusive +** transaction is started, meaning that no other process is allowed +** to access the database. A preexisting transaction may not be +** upgraded to exclusive by calling this routine a second time - the +** exclusivity flag only works for a new transaction. +** +** A write-transaction must be started before attempting any +** changes to the database. None of the following routines +** will work unless a transaction is started first: +** +** sqlite3BtreeCreateTable() +** sqlite3BtreeCreateIndex() +** sqlite3BtreeClearTable() +** sqlite3BtreeDropTable() +** sqlite3BtreeInsert() +** sqlite3BtreeDelete() +** sqlite3BtreeUpdateMeta() +** +** If an initial attempt to acquire the lock fails because of lock contention +** and the database was previously unlocked, then invoke the busy handler +** if there is one. But if there was previously a read-lock, do not +** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is +** returned when there is already a read-lock in order to avoid a deadlock. +** +** Suppose there are two processes A and B. A has a read lock and B has +** a reserved lock. B tries to promote to exclusive but is blocked because +** of A's read lock. A tries to promote to reserved but is blocked by B. +** One or the other of the two processes must give way or there can be +** no progress. By returning SQLITE_BUSY and not invoking the busy callback +** when A already has a read lock, we encourage A to give up and let B +** proceed. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeBeginTrans(Btree *p, int wrflag){ + Btree *pBlock = 0; + BtShared *pBt = p->pBt; + int rc = SQLITE_OK; + + sqlite3BtreeEnter(p); + btreeIntegrity(p); + + /* If the btree is already in a write-transaction, or it + ** is already in a read-transaction and a read-transaction + ** is requested, this is a no-op. + */ + if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ + goto trans_begun; + } + assert( pBt->inTransaction==TRANS_WRITE || IfNotOmitAV(pBt->bDoTruncate)==0 ); + + /* Write transactions are not possible on a read-only database */ + if( (pBt->btsFlags & BTS_READ_ONLY)!=0 && wrflag ){ + rc = SQLITE_READONLY; + goto trans_begun; + } + +#ifndef SQLITE_OMIT_SHARED_CACHE + /* If another database handle has already opened a write transaction + ** on this shared-btree structure and a second write transaction is + ** requested, return SQLITE_LOCKED. + */ + if( (wrflag && pBt->inTransaction==TRANS_WRITE) + || (pBt->btsFlags & BTS_PENDING)!=0 + ){ + pBlock = pBt->pWriter; + }else if( wrflag>1 ){ + BtLock *pIter; + for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ + if( pIter->pBtree!=p ){ + pBlock = pIter->pBtree; + break; + } + } + } + if( pBlock ){ + sqlite3ConnectionBlocked(p, pBlock); + rc = SQLITE_LOCKED_SHAREDCACHE; + goto trans_begun; + } +#endif + + /* Any read-only or read-write transaction implies a read-lock on + ** page 1. So if some other shared-cache client already has a write-lock + ** on page 1, the transaction cannot be opened. */ + rc = querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK); + if( SQLITE_OK!=rc ) goto trans_begun; + + pBt->btsFlags &= ~BTS_INITIALLY_EMPTY; + if( pBt->nPage==0 ) pBt->btsFlags |= BTS_INITIALLY_EMPTY; + do { + /* Call lockBtree() until either pBt->pPage1 is populated or + ** lockBtree() returns something other than SQLITE_OK. lockBtree() + ** may return SQLITE_OK but leave pBt->pPage1 set to 0 if after + ** reading page 1 it discovers that the page-size of the database + ** file is not pBt->pageSize. In this case lockBtree() will update + ** pBt->pageSize to the page-size of the file on disk. + */ + while( pBt->pPage1==0 && SQLITE_OK==(rc = lockBtree(pBt)) ); + + if( rc==SQLITE_OK && wrflag ){ + if( (pBt->btsFlags & BTS_READ_ONLY)!=0 ){ + rc = SQLITE_READONLY; + }else{ + rc = sqlite3PagerBegin(pBt->pPager,wrflag>1,0); + if( rc==SQLITE_OK ){ + rc = newDatabase(pBt); + } + } + } + + if( rc!=SQLITE_OK ){ + unlockBtreeIfUnused(pBt); + } + }while( (rc&0xFF)==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && + btreeInvokeBusyHandler(pBt) ); + + if( rc==SQLITE_OK ){ + if( p->inTrans==TRANS_NONE ){ + pBt->nTransaction++; +#ifndef SQLITE_OMIT_SHARED_CACHE + if( p->sharable ){ + assert( p->lock.pBtree==p && p->lock.iTable==1 ); + p->lock.eLock = READ_LOCK; + p->lock.pNext = pBt->pLock; + pBt->pLock = &p->lock; + } +#endif + } + p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ); + if( p->inTrans>pBt->inTransaction ){ + pBt->inTransaction = p->inTrans; + } + if( wrflag ){ + MemPage *pPage1 = pBt->pPage1; +#ifndef SQLITE_OMIT_SHARED_CACHE + assert( !pBt->pWriter ); + pBt->pWriter = p; + pBt->btsFlags &= ~BTS_EXCLUSIVE; + if( wrflag>1 ) pBt->btsFlags |= BTS_EXCLUSIVE; +#endif + + /* If the db-size header field is incorrect (as it may be if an old + ** client has been writing the database file), update it now. Doing + ** this sooner rather than later means the database size can safely + ** re-read the database size from page 1 if a savepoint or transaction + ** rollback occurs within the transaction. + */ + if( pBt->nPage!=get4byte(&pPage1->aData[28]) ){ + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc==SQLITE_OK ){ + put4byte(&pPage1->aData[28], pBt->nPage); + } + } + } + } + + +trans_begun: + if( rc==SQLITE_OK && wrflag ){ + /* This call makes sure that the pager has the correct number of + ** open savepoints. If the second parameter is greater than 0 and + ** the sub-journal is not already open, then it will be opened here. + */ + rc = sqlite3PagerOpenSavepoint(pBt->pPager, p->nSavepoint); + } + + btreeIntegrity(p); + sqlite3BtreeLeave(p); + return rc; +} + +#ifndef SQLITE_OMIT_AUTOVACUUM + +/* +** Set the pointer-map entries for all children of page pPage. Also, if +** pPage contains cells that point to overflow pages, set the pointer +** map entries for the overflow pages as well. +*/ +static int setChildPtrmaps(MemPage *pPage){ + int i; /* Counter variable */ + int nCell; /* Number of cells in page pPage */ + int rc; /* Return code */ + BtShared *pBt = pPage->pBt; + u8 isInitOrig = pPage->isInit; + Pgno pgno = pPage->pgno; + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + rc = btreeInitPage(pPage); + if( rc!=SQLITE_OK ){ + goto set_child_ptrmaps_out; + } + nCell = pPage->nCell; + + for(i=0; i<nCell; i++){ + u8 *pCell = findCell(pPage, i); + + ptrmapPutOvflPtr(pPage, pCell, &rc); + + if( !pPage->leaf ){ + Pgno childPgno = get4byte(pCell); + ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); + } + } + + if( !pPage->leaf ){ + Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); + ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); + } + +set_child_ptrmaps_out: + pPage->isInit = isInitOrig; + return rc; +} + +/* +** Somewhere on pPage is a pointer to page iFrom. Modify this pointer so +** that it points to iTo. Parameter eType describes the type of pointer to +** be modified, as follows: +** +** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child +** page of pPage. +** +** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow +** page pointed to by one of the cells on pPage. +** +** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next +** overflow page in the list. +*/ +static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){ + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + if( eType==PTRMAP_OVERFLOW2 ){ + /* The pointer is always the first 4 bytes of the page in this case. */ + if( get4byte(pPage->aData)!=iFrom ){ + return SQLITE_CORRUPT_BKPT; + } + put4byte(pPage->aData, iTo); + }else{ + u8 isInitOrig = pPage->isInit; + int i; + int nCell; + int rc; + + rc = btreeInitPage(pPage); + if( rc ) return rc; + nCell = pPage->nCell; + + for(i=0; i<nCell; i++){ + u8 *pCell = findCell(pPage, i); + if( eType==PTRMAP_OVERFLOW1 ){ + CellInfo info; + pPage->xParseCell(pPage, pCell, &info); + if( info.iOverflow + && pCell+info.iOverflow+3<=pPage->aData+pPage->maskPage + && iFrom==get4byte(&pCell[info.iOverflow]) + ){ + put4byte(&pCell[info.iOverflow], iTo); + break; + } + }else{ + if( get4byte(pCell)==iFrom ){ + put4byte(pCell, iTo); + break; + } + } + } + + if( i==nCell ){ + if( eType!=PTRMAP_BTREE || + get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){ + return SQLITE_CORRUPT_BKPT; + } + put4byte(&pPage->aData[pPage->hdrOffset+8], iTo); + } + + pPage->isInit = isInitOrig; + } + return SQLITE_OK; +} + + +/* +** Move the open database page pDbPage to location iFreePage in the +** database. The pDbPage reference remains valid. +** +** The isCommit flag indicates that there is no need to remember that +** the journal needs to be sync()ed before database page pDbPage->pgno +** can be written to. The caller has already promised not to write to that +** page. +*/ +static int relocatePage( + BtShared *pBt, /* Btree */ + MemPage *pDbPage, /* Open page to move */ + u8 eType, /* Pointer map 'type' entry for pDbPage */ + Pgno iPtrPage, /* Pointer map 'page-no' entry for pDbPage */ + Pgno iFreePage, /* The location to move pDbPage to */ + int isCommit /* isCommit flag passed to sqlite3PagerMovepage */ +){ + MemPage *pPtrPage; /* The page that contains a pointer to pDbPage */ + Pgno iDbPage = pDbPage->pgno; + Pager *pPager = pBt->pPager; + int rc; + + assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 || + eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ); + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( pDbPage->pBt==pBt ); + + /* Move page iDbPage from its current location to page number iFreePage */ + TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n", + iDbPage, iFreePage, iPtrPage, eType)); + rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage, isCommit); + if( rc!=SQLITE_OK ){ + return rc; + } + pDbPage->pgno = iFreePage; + + /* If pDbPage was a btree-page, then it may have child pages and/or cells + ** that point to overflow pages. The pointer map entries for all these + ** pages need to be changed. + ** + ** If pDbPage is an overflow page, then the first 4 bytes may store a + ** pointer to a subsequent overflow page. If this is the case, then + ** the pointer map needs to be updated for the subsequent overflow page. + */ + if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){ + rc = setChildPtrmaps(pDbPage); + if( rc!=SQLITE_OK ){ + return rc; + } + }else{ + Pgno nextOvfl = get4byte(pDbPage->aData); + if( nextOvfl!=0 ){ + ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage, &rc); + if( rc!=SQLITE_OK ){ + return rc; + } + } + } + + /* Fix the database pointer on page iPtrPage that pointed at iDbPage so + ** that it points at iFreePage. Also fix the pointer map entry for + ** iPtrPage. + */ + if( eType!=PTRMAP_ROOTPAGE ){ + rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + rc = sqlite3PagerWrite(pPtrPage->pDbPage); + if( rc!=SQLITE_OK ){ + releasePage(pPtrPage); + return rc; + } + rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType); + releasePage(pPtrPage); + if( rc==SQLITE_OK ){ + ptrmapPut(pBt, iFreePage, eType, iPtrPage, &rc); + } + } + return rc; +} + +/* Forward declaration required by incrVacuumStep(). */ +static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); + +/* +** Perform a single step of an incremental-vacuum. If successful, return +** SQLITE_OK. If there is no work to do (and therefore no point in +** calling this function again), return SQLITE_DONE. Or, if an error +** occurs, return some other error code. +** +** More specifically, this function attempts to re-organize the database so +** that the last page of the file currently in use is no longer in use. +** +** Parameter nFin is the number of pages that this database would contain +** were this function called until it returns SQLITE_DONE. +** +** If the bCommit parameter is non-zero, this function assumes that the +** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE +** or an error. bCommit is passed true for an auto-vacuum-on-commit +** operation, or false for an incremental vacuum. +*/ +static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ + Pgno nFreeList; /* Number of pages still on the free-list */ + int rc; + + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( iLastPg>nFin ); + + if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){ + u8 eType; + Pgno iPtrPage; + + nFreeList = get4byte(&pBt->pPage1->aData[36]); + if( nFreeList==0 ){ + return SQLITE_DONE; + } + + rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage); + if( rc!=SQLITE_OK ){ + return rc; + } + if( eType==PTRMAP_ROOTPAGE ){ + return SQLITE_CORRUPT_BKPT; + } + + if( eType==PTRMAP_FREEPAGE ){ + if( bCommit==0 ){ + /* Remove the page from the files free-list. This is not required + ** if bCommit is non-zero. In that case, the free-list will be + ** truncated to zero after this function returns, so it doesn't + ** matter if it still contains some garbage entries. + */ + Pgno iFreePg; + MemPage *pFreePg; + rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, BTALLOC_EXACT); + if( rc!=SQLITE_OK ){ + return rc; + } + assert( iFreePg==iLastPg ); + releasePage(pFreePg); + } + } else { + Pgno iFreePg; /* Index of free page to move pLastPg to */ + MemPage *pLastPg; + u8 eMode = BTALLOC_ANY; /* Mode parameter for allocateBtreePage() */ + Pgno iNear = 0; /* nearby parameter for allocateBtreePage() */ + + rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* If bCommit is zero, this loop runs exactly once and page pLastPg + ** is swapped with the first free page pulled off the free list. + ** + ** On the other hand, if bCommit is greater than zero, then keep + ** looping until a free-page located within the first nFin pages + ** of the file is found. + */ + if( bCommit==0 ){ + eMode = BTALLOC_LE; + iNear = nFin; + } + do { + MemPage *pFreePg; + rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iNear, eMode); + if( rc!=SQLITE_OK ){ + releasePage(pLastPg); + return rc; + } + releasePage(pFreePg); + }while( bCommit && iFreePg>nFin ); + assert( iFreePg<iLastPg ); + + rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, bCommit); + releasePage(pLastPg); + if( rc!=SQLITE_OK ){ + return rc; + } + } + } + + if( bCommit==0 ){ + do { + iLastPg--; + }while( iLastPg==PENDING_BYTE_PAGE(pBt) || PTRMAP_ISPAGE(pBt, iLastPg) ); + pBt->bDoTruncate = 1; + pBt->nPage = iLastPg; + } + return SQLITE_OK; +} + +/* +** The database opened by the first argument is an auto-vacuum database +** nOrig pages in size containing nFree free pages. Return the expected +** size of the database in pages following an auto-vacuum operation. +*/ +static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){ + int nEntry; /* Number of entries on one ptrmap page */ + Pgno nPtrmap; /* Number of PtrMap pages to be freed */ + Pgno nFin; /* Return value */ + + nEntry = pBt->usableSize/5; + nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry; + nFin = nOrig - nFree - nPtrmap; + if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<PENDING_BYTE_PAGE(pBt) ){ + nFin--; + } + while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){ + nFin--; + } + + return nFin; +} + +/* +** A write-transaction must be opened before calling this function. +** It performs a single unit of work towards an incremental vacuum. +** +** If the incremental vacuum is finished after this function has run, +** SQLITE_DONE is returned. If it is not finished, but no error occurred, +** SQLITE_OK is returned. Otherwise an SQLite error code. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeIncrVacuum(Btree *p){ + int rc; + BtShared *pBt = p->pBt; + + sqlite3BtreeEnter(p); + assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE ); + if( !pBt->autoVacuum ){ + rc = SQLITE_DONE; + }else{ + Pgno nOrig = btreePagecount(pBt); + Pgno nFree = get4byte(&pBt->pPage1->aData[36]); + Pgno nFin = finalDbSize(pBt, nOrig, nFree); + + if( nOrig<nFin ){ + rc = SQLITE_CORRUPT_BKPT; + }else if( nFree>0 ){ + rc = saveAllCursors(pBt, 0, 0); + if( rc==SQLITE_OK ){ + invalidateAllOverflowCache(pBt); + rc = incrVacuumStep(pBt, nFin, nOrig, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + put4byte(&pBt->pPage1->aData[28], pBt->nPage); + } + }else{ + rc = SQLITE_DONE; + } + } + sqlite3BtreeLeave(p); + return rc; +} + +/* +** This routine is called prior to sqlite3PagerCommit when a transaction +** is committed for an auto-vacuum database. +** +** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages +** the database file should be truncated to during the commit process. +** i.e. the database has been reorganized so that only the first *pnTrunc +** pages are in use. +*/ +static int autoVacuumCommit(BtShared *pBt){ + int rc = SQLITE_OK; + Pager *pPager = pBt->pPager; + VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager); ) + + assert( sqlite3_mutex_held(pBt->mutex) ); + invalidateAllOverflowCache(pBt); + assert(pBt->autoVacuum); + if( !pBt->incrVacuum ){ + Pgno nFin; /* Number of pages in database after autovacuuming */ + Pgno nFree; /* Number of pages on the freelist initially */ + Pgno iFree; /* The next page to be freed */ + Pgno nOrig; /* Database size before freeing */ + + nOrig = btreePagecount(pBt); + if( PTRMAP_ISPAGE(pBt, nOrig) || nOrig==PENDING_BYTE_PAGE(pBt) ){ + /* It is not possible to create a database for which the final page + ** is either a pointer-map page or the pending-byte page. If one + ** is encountered, this indicates corruption. + */ + return SQLITE_CORRUPT_BKPT; + } + + nFree = get4byte(&pBt->pPage1->aData[36]); + nFin = finalDbSize(pBt, nOrig, nFree); + if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT; + if( nFin<nOrig ){ + rc = saveAllCursors(pBt, 0, 0); + } + for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){ + rc = incrVacuumStep(pBt, nFin, iFree, 1); + } + if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){ + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + put4byte(&pBt->pPage1->aData[32], 0); + put4byte(&pBt->pPage1->aData[36], 0); + put4byte(&pBt->pPage1->aData[28], nFin); + pBt->bDoTruncate = 1; + pBt->nPage = nFin; + } + if( rc!=SQLITE_OK ){ + sqlite3PagerRollback(pPager); + } + } + + assert( nRef>=sqlite3PagerRefcount(pPager) ); + return rc; +} + +#else /* ifndef SQLITE_OMIT_AUTOVACUUM */ +# define setChildPtrmaps(x) SQLITE_OK +#endif + +/* +** This routine does the first phase of a two-phase commit. This routine +** causes a rollback journal to be created (if it does not already exist) +** and populated with enough information so that if a power loss occurs +** the database can be restored to its original state by playing back +** the journal. Then the contents of the journal are flushed out to +** the disk. After the journal is safely on oxide, the changes to the +** database are written into the database file and flushed to oxide. +** At the end of this call, the rollback journal still exists on the +** disk and we are still holding all locks, so the transaction has not +** committed. See sqlite3BtreeCommitPhaseTwo() for the second phase of the +** commit process. +** +** This call is a no-op if no write-transaction is currently active on pBt. +** +** Otherwise, sync the database file for the btree pBt. zMaster points to +** the name of a master journal file that should be written into the +** individual journal file, or is NULL, indicating no master journal file +** (single database transaction). +** +** When this is called, the master journal should already have been +** created, populated with this journal pointer and synced to disk. +** +** Once this is routine has returned, the only thing required to commit +** the write-transaction for this database file is to delete the journal. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){ + int rc = SQLITE_OK; + if( p->inTrans==TRANS_WRITE ){ + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum ){ + rc = autoVacuumCommit(pBt); + if( rc!=SQLITE_OK ){ + sqlite3BtreeLeave(p); + return rc; + } + } + if( pBt->bDoTruncate ){ + sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage); + } +#endif + rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, 0); + sqlite3BtreeLeave(p); + } + return rc; +} + +/* +** This function is called from both BtreeCommitPhaseTwo() and BtreeRollback() +** at the conclusion of a transaction. +*/ +static void btreeEndTransaction(Btree *p){ + BtShared *pBt = p->pBt; + assert( sqlite3BtreeHoldsMutex(p) ); + +#ifndef SQLITE_OMIT_AUTOVACUUM + pBt->bDoTruncate = 0; +#endif + if( p->inTrans>TRANS_NONE && p->nVdbeRead>1 ){ + /* If there are other active statements that belong to this database + ** handle, downgrade to a read-only transaction. The other statements + ** may still be reading from the database. */ + downgradeAllSharedCacheTableLocks(p); + p->inTrans = TRANS_READ; + }else{ + /* If the handle had any kind of transaction open, decrement the + ** transaction count of the shared btree. If the transaction count + ** reaches 0, set the shared state to TRANS_NONE. The unlockBtreeIfUnused() + ** call below will unlock the pager. */ + if( p->inTrans!=TRANS_NONE ){ + clearAllSharedCacheTableLocks(p); + pBt->nTransaction--; + if( 0==pBt->nTransaction ){ + pBt->inTransaction = TRANS_NONE; + } + } + + /* Set the current transaction state to TRANS_NONE and unlock the + ** pager if this call closed the only read or write transaction. */ + p->inTrans = TRANS_NONE; + unlockBtreeIfUnused(pBt); + } + + btreeIntegrity(p); +} + +/* +** Commit the transaction currently in progress. +** +** This routine implements the second phase of a 2-phase commit. The +** sqlite3BtreeCommitPhaseOne() routine does the first phase and should +** be invoked prior to calling this routine. The sqlite3BtreeCommitPhaseOne() +** routine did all the work of writing information out to disk and flushing the +** contents so that they are written onto the disk platter. All this +** routine has to do is delete or truncate or zero the header in the +** the rollback journal (which causes the transaction to commit) and +** drop locks. +** +** Normally, if an error occurs while the pager layer is attempting to +** finalize the underlying journal file, this function returns an error and +** the upper layer will attempt a rollback. However, if the second argument +** is non-zero then this b-tree transaction is part of a multi-file +** transaction. In this case, the transaction has already been committed +** (by deleting a master journal file) and the caller will ignore this +** functions return code. So, even if an error occurs in the pager layer, +** reset the b-tree objects internal state to indicate that the write +** transaction has been closed. This is quite safe, as the pager will have +** transitioned to the error state. +** +** This will release the write lock on the database file. If there +** are no active cursors, it also releases the read lock. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){ + + if( p->inTrans==TRANS_NONE ) return SQLITE_OK; + sqlite3BtreeEnter(p); + btreeIntegrity(p); + + /* If the handle has a write-transaction open, commit the shared-btrees + ** transaction and set the shared state to TRANS_READ. + */ + if( p->inTrans==TRANS_WRITE ){ + int rc; + BtShared *pBt = p->pBt; + assert( pBt->inTransaction==TRANS_WRITE ); + assert( pBt->nTransaction>0 ); + rc = sqlite3PagerCommitPhaseTwo(pBt->pPager); + if( rc!=SQLITE_OK && bCleanup==0 ){ + sqlite3BtreeLeave(p); + return rc; + } + p->iDataVersion--; /* Compensate for pPager->iDataVersion++; */ + pBt->inTransaction = TRANS_READ; + btreeClearHasContent(pBt); + } + + btreeEndTransaction(p); + sqlite3BtreeLeave(p); + return SQLITE_OK; +} + +/* +** Do both phases of a commit. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCommit(Btree *p){ + int rc; + sqlite3BtreeEnter(p); + rc = sqlite3BtreeCommitPhaseOne(p, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3BtreeCommitPhaseTwo(p, 0); + } + sqlite3BtreeLeave(p); + return rc; +} + +/* +** This routine sets the state to CURSOR_FAULT and the error +** code to errCode for every cursor on any BtShared that pBtree +** references. Or if the writeOnly flag is set to 1, then only +** trip write cursors and leave read cursors unchanged. +** +** Every cursor is a candidate to be tripped, including cursors +** that belong to other database connections that happen to be +** sharing the cache with pBtree. +** +** This routine gets called when a rollback occurs. If the writeOnly +** flag is true, then only write-cursors need be tripped - read-only +** cursors save their current positions so that they may continue +** following the rollback. Or, if writeOnly is false, all cursors are +** tripped. In general, writeOnly is false if the transaction being +** rolled back modified the database schema. In this case b-tree root +** pages may be moved or deleted from the database altogether, making +** it unsafe for read cursors to continue. +** +** If the writeOnly flag is true and an error is encountered while +** saving the current position of a read-only cursor, all cursors, +** including all read-cursors are tripped. +** +** SQLITE_OK is returned if successful, or if an error occurs while +** saving a cursor position, an SQLite error code. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){ + BtCursor *p; + int rc = SQLITE_OK; + + assert( (writeOnly==0 || writeOnly==1) && BTCF_WriteFlag==1 ); + if( pBtree ){ + sqlite3BtreeEnter(pBtree); + for(p=pBtree->pBt->pCursor; p; p=p->pNext){ + int i; + if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){ + if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){ + rc = saveCursorPosition(p); + if( rc!=SQLITE_OK ){ + (void)sqlite3BtreeTripAllCursors(pBtree, rc, 0); + break; + } + } + }else{ + sqlite3BtreeClearCursor(p); + p->eState = CURSOR_FAULT; + p->skipNext = errCode; + } + for(i=0; i<=p->iPage; i++){ + releasePage(p->apPage[i]); + p->apPage[i] = 0; + } + } + sqlite3BtreeLeave(pBtree); + } + return rc; +} + +/* +** Rollback the transaction in progress. +** +** If tripCode is not SQLITE_OK then cursors will be invalidated (tripped). +** Only write cursors are tripped if writeOnly is true but all cursors are +** tripped if writeOnly is false. Any attempt to use +** a tripped cursor will result in an error. +** +** This will release the write lock on the database file. If there +** are no active cursors, it also releases the read lock. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeRollback(Btree *p, int tripCode, int writeOnly){ + int rc; + BtShared *pBt = p->pBt; + MemPage *pPage1; + + assert( writeOnly==1 || writeOnly==0 ); + assert( tripCode==SQLITE_ABORT_ROLLBACK || tripCode==SQLITE_OK ); + sqlite3BtreeEnter(p); + if( tripCode==SQLITE_OK ){ + rc = tripCode = saveAllCursors(pBt, 0, 0); + if( rc ) writeOnly = 0; + }else{ + rc = SQLITE_OK; + } + if( tripCode ){ + int rc2 = sqlite3BtreeTripAllCursors(p, tripCode, writeOnly); + assert( rc==SQLITE_OK || (writeOnly==0 && rc2==SQLITE_OK) ); + if( rc2!=SQLITE_OK ) rc = rc2; + } + btreeIntegrity(p); + + if( p->inTrans==TRANS_WRITE ){ + int rc2; + + assert( TRANS_WRITE==pBt->inTransaction ); + rc2 = sqlite3PagerRollback(pBt->pPager); + if( rc2!=SQLITE_OK ){ + rc = rc2; + } + + /* The rollback may have destroyed the pPage1->aData value. So + ** call btreeGetPage() on page 1 again to make + ** sure pPage1->aData is set correctly. */ + if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){ + int nPage = get4byte(28+(u8*)pPage1->aData); + testcase( nPage==0 ); + if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage); + testcase( pBt->nPage!=nPage ); + pBt->nPage = nPage; + releasePage(pPage1); + } + assert( countValidCursors(pBt, 1)==0 ); + pBt->inTransaction = TRANS_READ; + btreeClearHasContent(pBt); + } + + btreeEndTransaction(p); + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Start a statement subtransaction. The subtransaction can be rolled +** back independently of the main transaction. You must start a transaction +** before starting a subtransaction. The subtransaction is ended automatically +** if the main transaction commits or rolls back. +** +** Statement subtransactions are used around individual SQL statements +** that are contained within a BEGIN...COMMIT block. If a constraint +** error occurs within the statement, the effect of that one statement +** can be rolled back without having to rollback the entire transaction. +** +** A statement sub-transaction is implemented as an anonymous savepoint. The +** value passed as the second parameter is the total number of savepoints, +** including the new anonymous savepoint, open on the B-Tree. i.e. if there +** are no active savepoints and no other statement-transactions open, +** iStatement is 1. This anonymous savepoint can be released or rolled back +** using the sqlite3BtreeSavepoint() function. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeBeginStmt(Btree *p, int iStatement){ + int rc; + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); + assert( p->inTrans==TRANS_WRITE ); + assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); + assert( iStatement>0 ); + assert( iStatement>p->nSavepoint ); + assert( pBt->inTransaction==TRANS_WRITE ); + /* At the pager level, a statement transaction is a savepoint with + ** an index greater than all savepoints created explicitly using + ** SQL statements. It is illegal to open, release or rollback any + ** such savepoints while the statement transaction savepoint is active. + */ + rc = sqlite3PagerOpenSavepoint(pBt->pPager, iStatement); + sqlite3BtreeLeave(p); + return rc; +} + +/* +** The second argument to this function, op, is always SAVEPOINT_ROLLBACK +** or SAVEPOINT_RELEASE. This function either releases or rolls back the +** savepoint identified by parameter iSavepoint, depending on the value +** of op. +** +** Normally, iSavepoint is greater than or equal to zero. However, if op is +** SAVEPOINT_ROLLBACK, then iSavepoint may also be -1. In this case the +** contents of the entire transaction are rolled back. This is different +** from a normal transaction rollback, as no locks are released and the +** transaction remains open. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){ + int rc = SQLITE_OK; + if( p && p->inTrans==TRANS_WRITE ){ + BtShared *pBt = p->pBt; + assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); + assert( iSavepoint>=0 || (iSavepoint==-1 && op==SAVEPOINT_ROLLBACK) ); + sqlite3BtreeEnter(p); + rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint); + if( rc==SQLITE_OK ){ + if( iSavepoint<0 && (pBt->btsFlags & BTS_INITIALLY_EMPTY)!=0 ){ + pBt->nPage = 0; + } + rc = newDatabase(pBt); + pBt->nPage = get4byte(28 + pBt->pPage1->aData); + + /* The database size was written into the offset 28 of the header + ** when the transaction started, so we know that the value at offset + ** 28 is nonzero. */ + assert( pBt->nPage>0 ); + } + sqlite3BtreeLeave(p); + } + return rc; +} + +/* +** Create a new cursor for the BTree whose root is on the page +** iTable. If a read-only cursor is requested, it is assumed that +** the caller already has at least a read-only transaction open +** on the database already. If a write-cursor is requested, then +** the caller is assumed to have an open write transaction. +** +** If wrFlag==0, then the cursor can only be used for reading. +** If wrFlag==1, then the cursor can be used for reading or for +** writing if other conditions for writing are also met. These +** are the conditions that must be met in order for writing to +** be allowed: +** +** 1: The cursor must have been opened with wrFlag==1 +** +** 2: Other database connections that share the same pager cache +** but which are not in the READ_UNCOMMITTED state may not have +** cursors open with wrFlag==0 on the same table. Otherwise +** the changes made by this write cursor would be visible to +** the read cursors in the other database connection. +** +** 3: The database must be writable (not on read-only media) +** +** 4: There must be an active transaction. +** +** No checking is done to make sure that page iTable really is the +** root page of a b-tree. If it is not, then the cursor acquired +** will not work correctly. +*/ +static int btreeCursor( + Btree *p, /* The btree */ + int iTable, /* Root page of table to open */ + int wrFlag, /* 1 to write. 0 read-only */ + struct KeyInfo *pKeyInfo, /* First arg to comparison function */ + BtCursor *pCur /* Space for new cursor */ +){ + BtShared *pBt = p->pBt; /* Shared b-tree handle */ + BtCursor *pX; /* Looping over other all cursors */ + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( wrFlag==0 || wrFlag==1 ); + + /* The following assert statements verify that if this is a sharable + ** b-tree database, the connection is holding the required table locks, + ** and that no other connection has any open cursor that conflicts with + ** this lock. */ + assert( hasSharedCacheTableLock(p, iTable, pKeyInfo!=0, wrFlag+1) ); + assert( wrFlag==0 || !hasReadConflicts(p, iTable) ); + + /* Assert that the caller has opened the required transaction. */ + assert( p->inTrans>TRANS_NONE ); + assert( wrFlag==0 || p->inTrans==TRANS_WRITE ); + assert( pBt->pPage1 && pBt->pPage1->aData ); + assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 ); + + if( wrFlag ){ + allocateTempSpace(pBt); + if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM; + } + if( iTable==1 && btreePagecount(pBt)==0 ){ + assert( wrFlag==0 ); + iTable = 0; + } + + /* Now that no other errors can occur, finish filling in the BtCursor + ** variables and link the cursor into the BtShared list. */ + pCur->pgnoRoot = (Pgno)iTable; + pCur->iPage = -1; + pCur->pKeyInfo = pKeyInfo; + pCur->pBtree = p; + pCur->pBt = pBt; + assert( wrFlag==0 || wrFlag==BTCF_WriteFlag ); + pCur->curFlags = wrFlag; + pCur->curPagerFlags = wrFlag ? 0 : PAGER_GET_READONLY; + /* If there are two or more cursors on the same btree, then all such + ** cursors *must* have the BTCF_Multiple flag set. */ + for(pX=pBt->pCursor; pX; pX=pX->pNext){ + if( pX->pgnoRoot==(Pgno)iTable ){ + pX->curFlags |= BTCF_Multiple; + pCur->curFlags |= BTCF_Multiple; + } + } + pCur->pNext = pBt->pCursor; + pBt->pCursor = pCur; + pCur->eState = CURSOR_INVALID; + return SQLITE_OK; +} +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursor( + Btree *p, /* The btree */ + int iTable, /* Root page of table to open */ + int wrFlag, /* 1 to write. 0 read-only */ + int N, int X, /* index of N key columns and X extra columns */ + BtCursor **ppCur /* Write new cursor here */ +){ + int rc; + + *ppCur = 0; + if( iTable<1 ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + KeyInfo *pKeyInfo = NULL; + if (N > 0 || X > 0) { + pKeyInfo = sqlite3DbMallocZero(0, + sizeof(KeyInfo) + (N+X)*(sizeof(CollSeq*)+1)); + if( pKeyInfo ){ + pKeyInfo->aSortOrder = (u8*)&pKeyInfo->aColl[N+X]; + pKeyInfo->nField = (u16)N; + pKeyInfo->nXField = (u16)X; + pKeyInfo->enc = p->enc; + pKeyInfo->pBtree = p; + pKeyInfo->nRef = 1; + }else{ + p->mallocFailed = 1; + return SQLITE_NOMEM; + } + } + + BtCursor *pCur = sqlite3MallocZero(sizeof(BtCursor)); + if (!pCur) { + p->mallocFailed = 1; + sqlite3_free(pKeyInfo); + return SQLITE_NOMEM; + } + + sqlite3BtreeEnter(p); + rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); + sqlite3BtreeLeave(p); + + if (rc != SQLITE_OK) { + sqlite3_free(pKeyInfo); + sqlite3_free(pCur); + return rc; + } + + *ppCur = pCur; + } + return rc; +} + +/* +** Return the size of a BtCursor object in bytes. +** +** This interfaces is needed so that users of cursors can preallocate +** sufficient storage to hold a cursor. The BtCursor object is opaque +** to users so they cannot do the sizeof() themselves - they must call +** this routine. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursorSize(void){ + return ROUND8(sizeof(BtCursor)); +} + +/* +** Close a cursor. The read lock on the database file is released +** when the last cursor is closed. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCloseCursor(BtCursor *pCur){ + Btree *pBtree = pCur->pBtree; + if( pBtree ){ + int i; + BtShared *pBt = pCur->pBt; + sqlite3BtreeEnter(pBtree); + sqlite3BtreeClearCursor(pCur); + assert( pBt->pCursor!=0 ); + if( pBt->pCursor==pCur ){ + pBt->pCursor = pCur->pNext; + }else{ + BtCursor *pPrev = pBt->pCursor; + do{ + if( pPrev->pNext==pCur ){ + pPrev->pNext = pCur->pNext; + break; + } + pPrev = pPrev->pNext; + }while( ALWAYS(pPrev) ); + } + for(i=0; i<=pCur->iPage; i++){ + releasePage(pCur->apPage[i]); + } + unlockBtreeIfUnused(pBt); + sqlite3_free(pCur->aOverflow); + /* sqlite3_free(pCur); */ + sqlite3BtreeLeave(pBtree); + } + + if( pCur->pKeyInfo ) { + assert( pCur->pKeyInfo->nRef>0 ); + pCur->pKeyInfo->nRef--; + if( pCur->pKeyInfo->nRef==0 ) sqlite3DbFree(0, pCur->pKeyInfo); + } + + sqlite3_free(pCur); + return SQLITE_OK; +} + +/* +** Make sure the BtCursor* given in the argument has a valid +** BtCursor.info structure. If it is not already valid, call +** btreeParseCell() to fill it in. +** +** BtCursor.info is a cache of the information in the current cell. +** Using this cache reduces the number of calls to btreeParseCell(). +*/ +#ifndef NDEBUG + static void assertCellInfo(BtCursor *pCur){ + CellInfo info; + int iPage = pCur->iPage; + memset(&info, 0, sizeof(info)); + btreeParseCell(pCur->apPage[iPage], pCur->aiIdx[iPage], &info); + assert( CORRUPT_DB || memcmp(&info, &pCur->info, sizeof(info))==0 ); + } +#else + #define assertCellInfo(x) +#endif +static SQLITE_NOINLINE void getCellInfo(BtCursor *pCur){ + if( pCur->info.nSize==0 ){ + int iPage = pCur->iPage; + pCur->curFlags |= BTCF_ValidNKey; + btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); + }else{ + assertCellInfo(pCur); + } +} + +#ifndef NDEBUG /* The next routine used only within assert() statements */ +/* +** Return true if the given BtCursor is valid. A valid cursor is one +** that is currently pointing to a row in a (non-empty) table. +** This is a verification routine is used only within assert() statements. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursorIsValid(BtCursor *pCur){ + return pCur && pCur->eState==CURSOR_VALID; +} +#endif /* NDEBUG */ + +/* +** Set *pSize to the size of the buffer needed to hold the value of +** the key for the current entry. If the cursor is not pointing +** to a valid entry, *pSize is set to 0. +** +** For a table with the INTKEY flag set, this routine returns the key +** itself, not the number of bytes in the key. +** +** The caller must position the cursor prior to invoking this routine. +** +** This routine cannot fail. It always returns SQLITE_OK. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){ + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + getCellInfo(pCur); + *pSize = pCur->info.nKey; + return SQLITE_OK; +} + +/* +** Set *pSize to the number of bytes of data in the entry the +** cursor currently points to. +** +** The caller must guarantee that the cursor is pointing to a non-NULL +** valid entry. In other words, the calling procedure must guarantee +** that the cursor has Cursor.eState==CURSOR_VALID. +** +** Failure is not possible. This function always returns SQLITE_OK. +** It might just as well be a procedure (returning void) but we continue +** to return an integer result code for historical reasons. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->iPage>=0 ); + assert( pCur->iPage<BTCURSOR_MAX_DEPTH ); + assert( pCur->apPage[pCur->iPage]->intKeyLeaf==1 ); + getCellInfo(pCur); + *pSize = pCur->info.nPayload; + return SQLITE_OK; +} + +/* +** Given the page number of an overflow page in the database (parameter +** ovfl), this function finds the page number of the next page in the +** linked list of overflow pages. If possible, it uses the auto-vacuum +** pointer-map data instead of reading the content of page ovfl to do so. +** +** If an error occurs an SQLite error code is returned. Otherwise: +** +** The page number of the next overflow page in the linked list is +** written to *pPgnoNext. If page ovfl is the last page in its linked +** list, *pPgnoNext is set to zero. +** +** If ppPage is not NULL, and a reference to the MemPage object corresponding +** to page number pOvfl was obtained, then *ppPage is set to point to that +** reference. It is the responsibility of the caller to call releasePage() +** on *ppPage to free the reference. In no reference was obtained (because +** the pointer-map was used to obtain the value for *pPgnoNext), then +** *ppPage is set to zero. +*/ +static int getOverflowPage( + BtShared *pBt, /* The database file */ + Pgno ovfl, /* Current overflow page number */ + MemPage **ppPage, /* OUT: MemPage handle (may be NULL) */ + Pgno *pPgnoNext /* OUT: Next overflow page number */ +){ + Pgno next = 0; + MemPage *pPage = 0; + int rc = SQLITE_OK; + + assert( sqlite3_mutex_held(pBt->mutex) ); + assert(pPgnoNext); + +#ifndef SQLITE_OMIT_AUTOVACUUM + /* Try to find the next page in the overflow list using the + ** autovacuum pointer-map pages. Guess that the next page in + ** the overflow list is page number (ovfl+1). If that guess turns + ** out to be wrong, fall back to loading the data of page + ** number ovfl to determine the next page number. + */ + if( pBt->autoVacuum ){ + Pgno pgno; + Pgno iGuess = ovfl+1; + u8 eType; + + while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){ + iGuess++; + } + + if( iGuess<=btreePagecount(pBt) ){ + rc = ptrmapGet(pBt, iGuess, &eType, &pgno); + if( rc==SQLITE_OK && eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){ + next = iGuess; + rc = SQLITE_DONE; + } + } + } +#endif + + assert( next==0 || rc==SQLITE_DONE ); + if( rc==SQLITE_OK ){ + rc = btreeGetPage(pBt, ovfl, &pPage, (ppPage==0) ? PAGER_GET_READONLY : 0); + assert( rc==SQLITE_OK || pPage==0 ); + if( rc==SQLITE_OK ){ + next = get4byte(pPage->aData); + } + } + + *pPgnoNext = next; + if( ppPage ){ + *ppPage = pPage; + }else{ + releasePage(pPage); + } + return (rc==SQLITE_DONE ? SQLITE_OK : rc); +} + +/* +** Copy data from a buffer to a page, or from a page to a buffer. +** +** pPayload is a pointer to data stored on database page pDbPage. +** If argument eOp is false, then nByte bytes of data are copied +** from pPayload to the buffer pointed at by pBuf. If eOp is true, +** then sqlite3PagerWrite() is called on pDbPage and nByte bytes +** of data are copied from the buffer pBuf to pPayload. +** +** SQLITE_OK is returned on success, otherwise an error code. +*/ +static int copyPayload( + void *pPayload, /* Pointer to page data */ + void *pBuf, /* Pointer to buffer */ + int nByte, /* Number of bytes to copy */ + int eOp, /* 0 -> copy from page, 1 -> copy to page */ + DbPage *pDbPage /* Page containing pPayload */ +){ + if( eOp ){ + /* Copy data from buffer to page (a write operation) */ + int rc = sqlite3PagerWrite(pDbPage); + if( rc!=SQLITE_OK ){ + return rc; + } + memcpy(pPayload, pBuf, nByte); + }else{ + /* Copy data from page to buffer (a read operation) */ + memcpy(pBuf, pPayload, nByte); + } + return SQLITE_OK; +} + +/* +** This function is used to read or overwrite payload information +** for the entry that the pCur cursor is pointing to. The eOp +** argument is interpreted as follows: +** +** 0: The operation is a read. Populate the overflow cache. +** 1: The operation is a write. Populate the overflow cache. +** 2: The operation is a read. Do not populate the overflow cache. +** +** A total of "amt" bytes are read or written beginning at "offset". +** Data is read to or from the buffer pBuf. +** +** The content being read or written might appear on the main page +** or be scattered out on multiple overflow pages. +** +** If the current cursor entry uses one or more overflow pages and the +** eOp argument is not 2, this function may allocate space for and lazily +** populates the overflow page-list cache array (BtCursor.aOverflow). +** Subsequent calls use this cache to make seeking to the supplied offset +** more efficient. +** +** Once an overflow page-list cache has been allocated, it may be +** invalidated if some other cursor writes to the same table, or if +** the cursor is moved to a different row. Additionally, in auto-vacuum +** mode, the following events may invalidate an overflow page-list cache. +** +** * An incremental vacuum, +** * A commit in auto_vacuum="full" mode, +** * Creating a table (may require moving an overflow page). +*/ +static int accessPayload( + BtCursor *pCur, /* Cursor pointing to entry to read from */ + u32 offset, /* Begin reading this far into payload */ + u32 amt, /* Read this many bytes */ + unsigned char *pBuf, /* Write the bytes into this buffer */ + int eOp /* zero to read. non-zero to write. */ +){ + unsigned char *aPayload; + int rc = SQLITE_OK; + int iIdx = 0; + MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */ + BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */ +#ifdef SQLITE_DIRECT_OVERFLOW_READ + unsigned char * const pBufStart = pBuf; + int bEnd; /* True if reading to end of data */ +#endif + + assert( pPage ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); + assert( cursorHoldsMutex(pCur) ); + assert( eOp!=2 || offset==0 ); /* Always start from beginning for eOp==2 */ + + getCellInfo(pCur); + aPayload = pCur->info.pPayload; +#ifdef SQLITE_DIRECT_OVERFLOW_READ + bEnd = offset+amt==pCur->info.nPayload; +#endif + assert( offset+amt <= pCur->info.nPayload ); + + if( &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] ){ + /* Trying to read or write past the end of the data is an error */ + return SQLITE_CORRUPT_BKPT; + } + + /* Check if data must be read/written to/from the btree page itself. */ + if( offset<pCur->info.nLocal ){ + int a = amt; + if( a+offset>pCur->info.nLocal ){ + a = pCur->info.nLocal - offset; + } + rc = copyPayload(&aPayload[offset], pBuf, a, (eOp & 0x01), pPage->pDbPage); + offset = 0; + pBuf += a; + amt -= a; + }else{ + offset -= pCur->info.nLocal; + } + + + if( rc==SQLITE_OK && amt>0 ){ + const u32 ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */ + Pgno nextPage; + + nextPage = get4byte(&aPayload[pCur->info.nLocal]); + + /* If the BtCursor.aOverflow[] has not been allocated, allocate it now. + ** Except, do not allocate aOverflow[] for eOp==2. + ** + ** The aOverflow[] array is sized at one entry for each overflow page + ** in the overflow chain. The page number of the first overflow page is + ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array + ** means "not yet known" (the cache is lazily populated). + */ + if( eOp!=2 && (pCur->curFlags & BTCF_ValidOvfl)==0 ){ + int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; + if( nOvfl>pCur->nOvflAlloc ){ + Pgno *aNew = (Pgno*)sqlite3Realloc( + pCur->aOverflow, nOvfl*2*sizeof(Pgno) + ); + if( aNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + pCur->nOvflAlloc = nOvfl*2; + pCur->aOverflow = aNew; + } + } + if( rc==SQLITE_OK ){ + memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno)); + pCur->curFlags |= BTCF_ValidOvfl; + } + } + + /* If the overflow page-list cache has been allocated and the + ** entry for the first required overflow page is valid, skip + ** directly to it. + */ + if( (pCur->curFlags & BTCF_ValidOvfl)!=0 + && pCur->aOverflow[offset/ovflSize] + ){ + iIdx = (offset/ovflSize); + nextPage = pCur->aOverflow[iIdx]; + offset = (offset%ovflSize); + } + + for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){ + + /* If required, populate the overflow page-list cache. */ + if( (pCur->curFlags & BTCF_ValidOvfl)!=0 ){ + assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage); + pCur->aOverflow[iIdx] = nextPage; + } + + if( offset>=ovflSize ){ + /* The only reason to read this page is to obtain the page + ** number for the next page in the overflow chain. The page + ** data is not required. So first try to lookup the overflow + ** page-list cache, if any, then fall back to the getOverflowPage() + ** function. + ** + ** Note that the aOverflow[] array must be allocated because eOp!=2 + ** here. If eOp==2, then offset==0 and this branch is never taken. + */ + assert( eOp!=2 ); + assert( pCur->curFlags & BTCF_ValidOvfl ); + assert( pCur->pBtree->db==pBt->db ); + if( pCur->aOverflow[iIdx+1] ){ + nextPage = pCur->aOverflow[iIdx+1]; + }else{ + rc = getOverflowPage(pBt, nextPage, 0, &nextPage); + } + offset -= ovflSize; + }else{ + /* Need to read this page properly. It contains some of the + ** range of data that is being read (eOp==0) or written (eOp!=0). + */ +#ifdef SQLITE_DIRECT_OVERFLOW_READ + sqlite3_file *fd; +#endif + int a = amt; + if( a + offset > ovflSize ){ + a = ovflSize - offset; + } + +#ifdef SQLITE_DIRECT_OVERFLOW_READ + /* If all the following are true: + ** + ** 1) this is a read operation, and + ** 2) data is required from the start of this overflow page, and + ** 3) the database is file-backed, and + ** 4) there is no open write-transaction, and + ** 5) the database is not a WAL database, + ** 6) all data from the page is being read. + ** 7) at least 4 bytes have already been read into the output buffer + ** + ** then data can be read directly from the database file into the + ** output buffer, bypassing the page-cache altogether. This speeds + ** up loading large records that span many overflow pages. + */ + if( (eOp&0x01)==0 /* (1) */ + && offset==0 /* (2) */ + && (bEnd || a==ovflSize) /* (6) */ + && pBt->inTransaction==TRANS_READ /* (4) */ + && (fd = sqlite3PagerFile(pBt->pPager))->pMethods /* (3) */ + && pBt->pPage1->aData[19]==0x01 /* (5) */ + && &pBuf[-4]>=pBufStart /* (7) */ + ){ + u8 aSave[4]; + u8 *aWrite = &pBuf[-4]; + assert( aWrite>=pBufStart ); /* hence (7) */ + memcpy(aSave, aWrite, 4); + rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); + nextPage = get4byte(aWrite); + memcpy(aWrite, aSave, 4); + }else +#endif + + { + DbPage *pDbPage; + rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage, + ((eOp&0x01)==0 ? PAGER_GET_READONLY : 0) + ); + if( rc==SQLITE_OK ){ + aPayload = sqlite3PagerGetData(pDbPage); + nextPage = get4byte(aPayload); + rc = copyPayload(&aPayload[offset+4], pBuf, a, (eOp&0x01), pDbPage); + sqlite3PagerUnref(pDbPage); + offset = 0; + } + } + amt -= a; + pBuf += a; + } + } + } + + if( rc==SQLITE_OK && amt>0 ){ + return SQLITE_CORRUPT_BKPT; + } + return rc; +} + +/* +** Read part of the key associated with cursor pCur. Exactly +** "amt" bytes will be transferred into pBuf[]. The transfer +** begins at "offset". +** +** The caller must ensure that pCur is pointing to a valid row +** in the table. +** +** Return SQLITE_OK on success or an error code if anything goes +** wrong. An error is returned if "offset+amt" is larger than +** the available payload. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] ); + assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); + return accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0); +} + +/* +** Read part of the data associated with cursor pCur. Exactly +** "amt" bytes will be transfered into pBuf[]. The transfer +** begins at "offset". +** +** Return SQLITE_OK on success or an error code if anything goes +** wrong. An error is returned if "offset+amt" is larger than +** the available payload. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ + int rc; + +#ifndef SQLITE_OMIT_INCRBLOB + if ( pCur->eState==CURSOR_INVALID ){ + return SQLITE_ABORT; + } +#endif + + assert( cursorHoldsMutex(pCur) ); + rc = restoreCursorPosition(pCur); + if( rc==SQLITE_OK ){ + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] ); + assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); + rc = accessPayload(pCur, offset, amt, pBuf, 0); + } + return rc; +} + +/* +** Return a pointer to payload information from the entry that the +** pCur cursor is pointing to. The pointer is to the beginning of +** the key if index btrees (pPage->intKey==0) and is the data for +** table btrees (pPage->intKey==1). The number of bytes of available +** key/data is written into *pAmt. If *pAmt==0, then the value +** returned will not be a valid pointer. +** +** This routine is an optimization. It is common for the entire key +** and data to fit on the local page and for there to be no overflow +** pages. When that is so, this routine can be used to access the +** key and data without making a copy. If the key and/or data spills +** onto overflow pages, then accessPayload() must be used to reassemble +** the key/data and copy it into a preallocated buffer. +** +** The pointer returned by this routine looks directly into the cached +** page of the database. The data might change or move the next time +** any btree routine is called. +*/ +static const void *fetchPayload( + BtCursor *pCur, /* Cursor pointing to entry to read from */ + u32 *pAmt /* Write the number of available bytes here */ +){ + u32 amt; + assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]); + assert( pCur->eState==CURSOR_VALID ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); + assert( cursorHoldsMutex(pCur) ); + assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); + assert( pCur->info.nSize>0 ); + assert( pCur->info.pPayload>pCur->apPage[pCur->iPage]->aData || CORRUPT_DB ); + assert( pCur->info.pPayload<pCur->apPage[pCur->iPage]->aDataEnd ||CORRUPT_DB); + amt = (int)(pCur->apPage[pCur->iPage]->aDataEnd - pCur->info.pPayload); + if( pCur->info.nLocal<amt ) amt = pCur->info.nLocal; + *pAmt = amt; + return (void*)pCur->info.pPayload; +} + + +/* +** For the entry that cursor pCur is point to, return as +** many bytes of the key or data as are available on the local +** b-tree page. Write the number of available bytes into *pAmt. +** +** The pointer returned is ephemeral. The key/data may move +** or be destroyed on the next call to any Btree routine, +** including calls from other threads against the same cache. +** Hence, a mutex on the BtShared should be held prior to calling +** this routine. +** +** These routines is used to get quick access to key and data +** in the common case where no overflow pages are used. +*/ +SQLITE_API const void *SQLITE_STDCALL sqlite3BtreeKeyFetch(BtCursor *pCur, u32 *pAmt){ + return fetchPayload(pCur, pAmt); +} +SQLITE_API const void *SQLITE_STDCALL sqlite3BtreeDataFetch(BtCursor *pCur, u32 *pAmt){ + return fetchPayload(pCur, pAmt); +} + + +/* +** Move the cursor down to a new child page. The newPgno argument is the +** page number of the child page to move to. +** +** This function returns SQLITE_CORRUPT if the page-header flags field of +** the new child page does not match the flags field of the parent (i.e. +** if an intkey page appears to be the parent of a non-intkey page, or +** vice-versa). +*/ +static int moveToChild(BtCursor *pCur, u32 newPgno){ + BtShared *pBt = pCur->pBt; + + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->iPage<BTCURSOR_MAX_DEPTH ); + assert( pCur->iPage>=0 ); + if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ + return SQLITE_CORRUPT_BKPT; + } + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + pCur->iPage++; + pCur->aiIdx[pCur->iPage] = 0; + return getAndInitPage(pBt, newPgno, &pCur->apPage[pCur->iPage], + pCur, pCur->curPagerFlags); +} + +#if SQLITE_DEBUG +/* +** Page pParent is an internal (non-leaf) tree page. This function +** asserts that page number iChild is the left-child if the iIdx'th +** cell in page pParent. Or, if iIdx is equal to the total number of +** cells in pParent, that page number iChild is the right-child of +** the page. +*/ +static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){ + if( CORRUPT_DB ) return; /* The conditions tested below might not be true + ** in a corrupt database */ + assert( iIdx<=pParent->nCell ); + if( iIdx==pParent->nCell ){ + assert( get4byte(&pParent->aData[pParent->hdrOffset+8])==iChild ); + }else{ + assert( get4byte(findCell(pParent, iIdx))==iChild ); + } +} +#else +# define assertParentIndex(x,y,z) +#endif + +/* +** Move the cursor up to the parent page. +** +** pCur->idx is set to the cell index that contains the pointer +** to the page we are coming from. If we are coming from the +** right-most child page then pCur->idx is set to one more than +** the largest cell index. +*/ +static void moveToParent(BtCursor *pCur){ + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + assert( pCur->iPage>0 ); + assert( pCur->apPage[pCur->iPage] ); + assertParentIndex( + pCur->apPage[pCur->iPage-1], + pCur->aiIdx[pCur->iPage-1], + pCur->apPage[pCur->iPage]->pgno + ); + testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + releasePageNotNull(pCur->apPage[pCur->iPage--]); +} + +/* +** Move the cursor to point to the root page of its b-tree structure. +** +** If the table has a virtual root page, then the cursor is moved to point +** to the virtual root page instead of the actual root page. A table has a +** virtual root page when the actual root page contains no cells and a +** single child page. This can only happen with the table rooted at page 1. +** +** If the b-tree structure is empty, the cursor state is set to +** CURSOR_INVALID. Otherwise, the cursor is set to point to the first +** cell located on the root (or virtual root) page and the cursor state +** is set to CURSOR_VALID. +** +** If this function returns successfully, it may be assumed that the +** page-header flags indicate that the [virtual] root-page is the expected +** kind of b-tree page (i.e. if when opening the cursor the caller did not +** specify a KeyInfo structure the flags byte is set to 0x05 or 0x0D, +** indicating a table b-tree, or if the caller did specify a KeyInfo +** structure the flags byte is set to 0x02 or 0x0A, indicating an index +** b-tree). +*/ +static int moveToRoot(BtCursor *pCur){ + MemPage *pRoot; + int rc = SQLITE_OK; + + assert( cursorHoldsMutex(pCur) ); + assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); + assert( CURSOR_VALID < CURSOR_REQUIRESEEK ); + assert( CURSOR_FAULT > CURSOR_REQUIRESEEK ); + if( pCur->eState>=CURSOR_REQUIRESEEK ){ + if( pCur->eState==CURSOR_FAULT ){ + assert( pCur->skipNext!=SQLITE_OK ); + return pCur->skipNext; + } + sqlite3BtreeClearCursor(pCur); + } + + if( pCur->iPage>=0 ){ + while( pCur->iPage ){ + assert( pCur->apPage[pCur->iPage]!=0 ); + releasePageNotNull(pCur->apPage[pCur->iPage--]); + } + }else if( pCur->pgnoRoot==0 ){ + pCur->eState = CURSOR_INVALID; + return SQLITE_OK; + }else{ + assert( pCur->iPage==(-1) ); + rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0], + 0, pCur->curPagerFlags); + if( rc!=SQLITE_OK ){ + pCur->eState = CURSOR_INVALID; + return rc; + } + pCur->iPage = 0; + pCur->curIntKey = pCur->apPage[0]->intKey; + } + pRoot = pCur->apPage[0]; + assert( pRoot->pgno==pCur->pgnoRoot ); + + /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor + ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is + ** NULL, the caller expects a table b-tree. If this is not the case, + ** return an SQLITE_CORRUPT error. + ** + ** Earlier versions of SQLite assumed that this test could not fail + ** if the root page was already loaded when this function was called (i.e. + ** if pCur->iPage>=0). But this is not so if the database is corrupted + ** in such a way that page pRoot is linked into a second b-tree table + ** (or the freelist). */ + assert( pRoot->intKey==1 || pRoot->intKey==0 ); + if( pRoot->isInit==0 || (pCur->pKeyInfo==0)!=pRoot->intKey ){ + return SQLITE_CORRUPT_BKPT; + } + + pCur->aiIdx[0] = 0; + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidNKey|BTCF_ValidOvfl); + + if( pRoot->nCell>0 ){ + pCur->eState = CURSOR_VALID; + }else if( !pRoot->leaf ){ + Pgno subpage; + if( pRoot->pgno!=1 ) return SQLITE_CORRUPT_BKPT; + subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]); + pCur->eState = CURSOR_VALID; + rc = moveToChild(pCur, subpage); + }else{ + pCur->eState = CURSOR_INVALID; + } + return rc; +} + +/* +** Move the cursor down to the left-most leaf entry beneath the +** entry to which it is currently pointing. +** +** The left-most leaf is the one with the smallest key - the first +** in ascending order. +*/ +static int moveToLeftmost(BtCursor *pCur){ + Pgno pgno; + int rc = SQLITE_OK; + MemPage *pPage; + + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){ + assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); + pgno = get4byte(findCell(pPage, pCur->aiIdx[pCur->iPage])); + rc = moveToChild(pCur, pgno); + } + return rc; +} + +/* +** Move the cursor down to the right-most leaf entry beneath the +** page to which it is currently pointing. Notice the difference +** between moveToLeftmost() and moveToRightmost(). moveToLeftmost() +** finds the left-most entry beneath the *entry* whereas moveToRightmost() +** finds the right-most entry beneath the *page*. +** +** The right-most entry is the one with the largest key - the last +** key in ascending order. +*/ +static int moveToRightmost(BtCursor *pCur){ + Pgno pgno; + int rc = SQLITE_OK; + MemPage *pPage = 0; + + assert( cursorHoldsMutex(pCur) ); + assert( pCur->eState==CURSOR_VALID ); + while( !(pPage = pCur->apPage[pCur->iPage])->leaf ){ + pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); + pCur->aiIdx[pCur->iPage] = pPage->nCell; + rc = moveToChild(pCur, pgno); + if( rc ) return rc; + } + pCur->aiIdx[pCur->iPage] = pPage->nCell-1; + assert( pCur->info.nSize==0 ); + assert( (pCur->curFlags & BTCF_ValidNKey)==0 ); + return SQLITE_OK; +} + +/* Move the cursor to the first entry in the table. Return SQLITE_OK +** on success. Set *pRes to 0 if the cursor actually points to something +** or set *pRes to 1 if the table is empty. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeFirst(BtCursor *pCur, int *pRes){ + int rc; + + assert( cursorHoldsMutex(pCur) ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); + rc = moveToRoot(pCur); + if( rc==SQLITE_OK ){ + if( pCur->eState==CURSOR_INVALID ){ + assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); + *pRes = 1; + }else{ + assert( pCur->apPage[pCur->iPage]->nCell>0 ); + *pRes = 0; + rc = moveToLeftmost(pCur); + } + } + return rc; +} + +/* Move the cursor to the last entry in the table. Return SQLITE_OK +** on success. Set *pRes to 0 if the cursor actually points to something +** or set *pRes to 1 if the table is empty. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeLast(BtCursor *pCur, int *pRes){ + int rc; + + assert( cursorHoldsMutex(pCur) ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); + + /* If the cursor already points to the last entry, this is a no-op. */ + if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){ +#ifdef SQLITE_DEBUG + /* This block serves to assert() that the cursor really does point + ** to the last entry in the b-tree. */ + int ii; + for(ii=0; ii<pCur->iPage; ii++){ + assert( pCur->aiIdx[ii]==pCur->apPage[ii]->nCell ); + } + assert( pCur->aiIdx[pCur->iPage]==pCur->apPage[pCur->iPage]->nCell-1 ); + assert( pCur->apPage[pCur->iPage]->leaf ); +#endif + return SQLITE_OK; + } + + rc = moveToRoot(pCur); + if( rc==SQLITE_OK ){ + if( CURSOR_INVALID==pCur->eState ){ + assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); + *pRes = 1; + }else{ + assert( pCur->eState==CURSOR_VALID ); + *pRes = 0; + rc = moveToRightmost(pCur); + if( rc==SQLITE_OK ){ + pCur->curFlags |= BTCF_AtLast; + }else{ + pCur->curFlags &= ~BTCF_AtLast; + } + + } + } + return rc; +} + +SQLITE_API void SQLITE_STDCALL sqlite3BtreeInitUnpackedRecord( + UnpackedRecord *pUnKey, + BtCursor* pCur, + int nField, + int default_rc, + Mem* pMem) +{ + pUnKey->pKeyInfo = pCur->pKeyInfo; + pUnKey->nField = nField; + pUnKey->default_rc = default_rc; + pUnKey->aMem = pMem; +} + +/* Move the cursor so that it points to an entry near the key +** specified by pIdxKey or intKey. Return a success code. +** +** For INTKEY tables, the intKey parameter is used. pIdxKey +** must be NULL. For index tables, pIdxKey is used and intKey +** is ignored. +** +** If an exact match is not found, then the cursor is always +** left pointing at a leaf page which would hold the entry if it +** were present. The cursor might point to an entry that comes +** before or after the key. +** +** An integer is written into *pRes which is the result of +** comparing the key with the entry to which the cursor is +** pointing. The meaning of the integer written into +** *pRes is as follows: +** +** *pRes<0 The cursor is left pointing at an entry that +** is smaller than intKey/pIdxKey or if the table is empty +** and the cursor is therefore left point to nothing. +** +** *pRes==0 The cursor is left pointing at an entry that +** exactly matches intKey/pIdxKey. +** +** *pRes>0 The cursor is left pointing at an entry that +** is larger than intKey/pIdxKey. +** +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeMovetoUnpacked( + BtCursor *pCur, /* The cursor to be moved */ + UnpackedRecord *pIdxKey, /* Unpacked index key */ + i64 intKey, /* The table key */ + int biasRight, /* If true, bias the search to the high end */ + int *pRes /* Write search results here */ +){ + int rc; + RecordCompare xRecordCompare; + + assert( cursorHoldsMutex(pCur) ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); + assert( pRes ); + assert( (pIdxKey==0)==(pCur->pKeyInfo==0) ); + + /* If the cursor is already positioned at the point we are trying + ** to move to, then just return without doing any work */ + if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 + && pCur->curIntKey + ){ + if( pCur->info.nKey==intKey ){ + *pRes = 0; + return SQLITE_OK; + } + if( (pCur->curFlags & BTCF_AtLast)!=0 && pCur->info.nKey<intKey ){ + *pRes = -1; + return SQLITE_OK; + } + } + + if( pIdxKey ){ + xRecordCompare = sqlite3VdbeFindCompare(pIdxKey); + pIdxKey->errCode = 0; + assert( pIdxKey->default_rc==1 + || pIdxKey->default_rc==0 + || pIdxKey->default_rc==-1 + ); + }else{ + xRecordCompare = 0; /* All keys are integers */ + } + + rc = moveToRoot(pCur); + if( rc ){ + return rc; + } + assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage] ); + assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->isInit ); + assert( pCur->eState==CURSOR_INVALID || pCur->apPage[pCur->iPage]->nCell>0 ); + if( pCur->eState==CURSOR_INVALID ){ + *pRes = -1; + assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); + return SQLITE_OK; + } + assert( pCur->apPage[0]->intKey==pCur->curIntKey ); + assert( pCur->curIntKey || pIdxKey ); + for(;;){ + int lwr, upr, idx, c; + Pgno chldPg; + MemPage *pPage = pCur->apPage[pCur->iPage]; + u8 *pCell; /* Pointer to current cell in pPage */ + + /* pPage->nCell must be greater than zero. If this is the root-page + ** the cursor would have been INVALID above and this for(;;) loop + ** not run. If this is not the root-page, then the moveToChild() routine + ** would have already detected db corruption. Similarly, pPage must + ** be the right kind (index or table) of b-tree page. Otherwise + ** a moveToChild() or moveToRoot() call would have detected corruption. */ + assert( pPage->nCell>0 ); + assert( pPage->intKey==(pIdxKey==0) ); + lwr = 0; + upr = pPage->nCell-1; + assert( biasRight==0 || biasRight==1 ); + idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */ + pCur->aiIdx[pCur->iPage] = (u16)idx; + if( xRecordCompare==0 ){ + for(;;){ + i64 nCellKey; + pCell = findCellPastPtr(pPage, idx); + if( pPage->intKeyLeaf ){ + while( 0x80 <= *(pCell++) ){ + if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT; + } + } + getVarint(pCell, (u64*)&nCellKey); + if( nCellKey<intKey ){ + lwr = idx+1; + if( lwr>upr ){ c = -1; break; } + }else if( nCellKey>intKey ){ + upr = idx-1; + if( lwr>upr ){ c = +1; break; } + }else{ + assert( nCellKey==intKey ); + pCur->curFlags |= BTCF_ValidNKey; + pCur->info.nKey = nCellKey; + pCur->aiIdx[pCur->iPage] = (u16)idx; + if( !pPage->leaf ){ + lwr = idx; + goto moveto_next_layer; + }else{ + *pRes = 0; + rc = SQLITE_OK; + goto moveto_finish; + } + } + assert( lwr+upr>=0 ); + idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */ + } + }else{ + for(;;){ + int nCell; /* Size of the pCell cell in bytes */ + pCell = findCellPastPtr(pPage, idx); + + /* The maximum supported page-size is 65536 bytes. This means that + ** the maximum number of record bytes stored on an index B-Tree + ** page is less than 16384 bytes and may be stored as a 2-byte + ** varint. This information is used to attempt to avoid parsing + ** the entire cell by checking for the cases where the record is + ** stored entirely within the b-tree page by inspecting the first + ** 2 bytes of the cell. + */ + nCell = pCell[0]; + if( nCell<=pPage->max1bytePayload ){ + /* This branch runs if the record-size field of the cell is a + ** single byte varint and the record fits entirely on the main + ** b-tree page. */ + testcase( pCell+nCell+1==pPage->aDataEnd ); + c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey); + }else if( !(pCell[1] & 0x80) + && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal + ){ + /* The record-size field is a 2 byte varint and the record + ** fits entirely on the main b-tree page. */ + testcase( pCell+nCell+2==pPage->aDataEnd ); + c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey); + }else{ + /* The record flows over onto one or more overflow pages. In + ** this case the whole cell needs to be parsed, a buffer allocated + ** and accessPayload() used to retrieve the record into the + ** buffer before VdbeRecordCompare() can be called. + ** + ** If the record is corrupt, the xRecordCompare routine may read + ** up to two varints past the end of the buffer. An extra 18 + ** bytes of padding is allocated at the end of the buffer in + ** case this happens. */ + void *pCellKey; + u8 * const pCellBody = pCell - pPage->childPtrSize; + pPage->xParseCell(pPage, pCellBody, &pCur->info); + nCell = (int)pCur->info.nKey; + testcase( nCell<0 ); /* True if key size is 2^32 or more */ + testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */ + testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */ + testcase( nCell==2 ); /* Minimum legal index key size */ + if( nCell<2 ){ + rc = SQLITE_CORRUPT_BKPT; + goto moveto_finish; + } + pCellKey = sqlite3Malloc( nCell+18 ); + if( pCellKey==0 ){ + rc = SQLITE_NOMEM; + goto moveto_finish; + } + pCur->aiIdx[pCur->iPage] = (u16)idx; + rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 2); + if( rc ){ + sqlite3_free(pCellKey); + goto moveto_finish; + } + c = xRecordCompare(nCell, pCellKey, pIdxKey); + sqlite3_free(pCellKey); + } + assert( + (pIdxKey->errCode!=SQLITE_CORRUPT || c==0) + && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->mallocFailed) + ); + if( c<0 ){ + lwr = idx+1; + }else if( c>0 ){ + upr = idx-1; + }else{ + assert( c==0 ); + *pRes = 0; + rc = SQLITE_OK; + pCur->aiIdx[pCur->iPage] = (u16)idx; + if( pIdxKey->errCode ) rc = SQLITE_CORRUPT; + goto moveto_finish; + } + if( lwr>upr ) break; + assert( lwr+upr>=0 ); + idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2 */ + } + } + assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) ); + assert( pPage->isInit ); + if( pPage->leaf ){ + assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); + pCur->aiIdx[pCur->iPage] = (u16)idx; + *pRes = c; + rc = SQLITE_OK; + goto moveto_finish; + } +moveto_next_layer: + if( lwr>=pPage->nCell ){ + chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); + }else{ + chldPg = get4byte(findCell(pPage, lwr)); + } + pCur->aiIdx[pCur->iPage] = (u16)lwr; + rc = moveToChild(pCur, chldPg); + if( rc ) break; + } +moveto_finish: + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + return rc; +} + + +/* +** Return TRUE if the cursor is not pointing at an entry of the table. +** +** TRUE will be returned after a call to sqlite3BtreeNext() moves +** past the last entry in the table or sqlite3BtreePrev() moves past +** the first entry. TRUE is also returned if the table is empty. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeEof(BtCursor *pCur){ + /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries + ** have been deleted? This API will need to change to return an error code + ** as well as the boolean result value. + */ + return (CURSOR_VALID!=pCur->eState); +} + +/* +** Advance the cursor to the next entry in the database. If +** successful then set *pRes=0. If the cursor +** was already pointing to the last entry in the database before +** this routine was called, then set *pRes=1. +** +** The main entry point is sqlite3BtreeNext(). That routine is optimized +** for the common case of merely incrementing the cell counter BtCursor.aiIdx +** to the next cell on the current page. The (slower) btreeNext() helper +** routine is called when it is necessary to move to a different page or +** to restore the cursor. +** +** The calling function will set *pRes to 0 or 1. The initial *pRes value +** will be 1 if the cursor being stepped corresponds to an SQL index and +** if this routine could have been skipped if that SQL index had been +** a unique index. Otherwise the caller will have set *pRes to zero. +** Zero is the common case. The btree implementation is free to use the +** initial *pRes value as a hint to improve performance, but the current +** SQLite btree implementation does not. (Note that the comdb2 btree +** implementation does use this hint, however.) +*/ +static SQLITE_NOINLINE int btreeNext(BtCursor *pCur, int *pRes){ + int rc; + int idx; + MemPage *pPage; + + assert( cursorHoldsMutex(pCur) ); + assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); + assert( *pRes==0 ); + if( pCur->eState!=CURSOR_VALID ){ + assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); + rc = restoreCursorPosition(pCur); + if( rc!=SQLITE_OK ){ + return rc; + } + if( CURSOR_INVALID==pCur->eState ){ + *pRes = 1; + return SQLITE_OK; + } + if( pCur->skipNext ){ + assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT ); + pCur->eState = CURSOR_VALID; + if( pCur->skipNext>0 ){ + pCur->skipNext = 0; + return SQLITE_OK; + } + pCur->skipNext = 0; + } + } + + pPage = pCur->apPage[pCur->iPage]; + idx = ++pCur->aiIdx[pCur->iPage]; + assert( pPage->isInit ); + + /* If the database file is corrupt, it is possible for the value of idx + ** to be invalid here. This can only occur if a second cursor modifies + ** the page while cursor pCur is holding a reference to it. Which can + ** only happen if the database is corrupt in such a way as to link the + ** page into more than one b-tree structure. */ + testcase( idx>pPage->nCell ); + + if( idx>=pPage->nCell ){ + if( !pPage->leaf ){ + rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); + if( rc ) return rc; + return moveToLeftmost(pCur); + } + do{ + if( pCur->iPage==0 ){ + *pRes = 1; + pCur->eState = CURSOR_INVALID; + return SQLITE_OK; + } + moveToParent(pCur); + pPage = pCur->apPage[pCur->iPage]; + }while( pCur->aiIdx[pCur->iPage]>=pPage->nCell ); + if( pPage->intKey ){ + return sqlite3BtreeNext(pCur, pRes); + }else{ + return SQLITE_OK; + } + } + if( pPage->leaf ){ + return SQLITE_OK; + }else{ + return moveToLeftmost(pCur); + } +} +SQLITE_API int SQLITE_STDCALL sqlite3BtreeNext(BtCursor *pCur, int *pRes){ + MemPage *pPage; + assert( cursorHoldsMutex(pCur) ); + assert( pRes!=0 ); + assert( *pRes==0 || *pRes==1 ); + assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + *pRes = 0; + if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur, pRes); + pPage = pCur->apPage[pCur->iPage]; + if( (++pCur->aiIdx[pCur->iPage])>=pPage->nCell ){ + pCur->aiIdx[pCur->iPage]--; + return btreeNext(pCur, pRes); + } + if( pPage->leaf ){ + return SQLITE_OK; + }else{ + return moveToLeftmost(pCur); + } +} + +/* +** Step the cursor to the back to the previous entry in the database. If +** successful then set *pRes=0. If the cursor +** was already pointing to the first entry in the database before +** this routine was called, then set *pRes=1. +** +** The main entry point is sqlite3BtreePrevious(). That routine is optimized +** for the common case of merely decrementing the cell counter BtCursor.aiIdx +** to the previous cell on the current page. The (slower) btreePrevious() +** helper routine is called when it is necessary to move to a different page +** or to restore the cursor. +** +** The calling function will set *pRes to 0 or 1. The initial *pRes value +** will be 1 if the cursor being stepped corresponds to an SQL index and +** if this routine could have been skipped if that SQL index had been +** a unique index. Otherwise the caller will have set *pRes to zero. +** Zero is the common case. The btree implementation is free to use the +** initial *pRes value as a hint to improve performance, but the current +** SQLite btree implementation does not. (Note that the comdb2 btree +** implementation does use this hint, however.) +*/ +static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur, int *pRes){ + int rc; + MemPage *pPage; + + assert( cursorHoldsMutex(pCur) ); + assert( pRes!=0 ); + assert( *pRes==0 ); + assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); + assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 ); + assert( pCur->info.nSize==0 ); + if( pCur->eState!=CURSOR_VALID ){ + rc = restoreCursorPosition(pCur); + if( rc!=SQLITE_OK ){ + return rc; + } + if( CURSOR_INVALID==pCur->eState ){ + *pRes = 1; + return SQLITE_OK; + } + if( pCur->skipNext ){ + assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT ); + pCur->eState = CURSOR_VALID; + if( pCur->skipNext<0 ){ + pCur->skipNext = 0; + return SQLITE_OK; + } + pCur->skipNext = 0; + } + } + + pPage = pCur->apPage[pCur->iPage]; + assert( pPage->isInit ); + if( !pPage->leaf ){ + int idx = pCur->aiIdx[pCur->iPage]; + rc = moveToChild(pCur, get4byte(findCell(pPage, idx))); + if( rc ) return rc; + rc = moveToRightmost(pCur); + }else{ + while( pCur->aiIdx[pCur->iPage]==0 ){ + if( pCur->iPage==0 ){ + pCur->eState = CURSOR_INVALID; + *pRes = 1; + return SQLITE_OK; + } + moveToParent(pCur); + } + assert( pCur->info.nSize==0 ); + assert( (pCur->curFlags & (BTCF_ValidNKey|BTCF_ValidOvfl))==0 ); + + pCur->aiIdx[pCur->iPage]--; + pPage = pCur->apPage[pCur->iPage]; + if( pPage->intKey && !pPage->leaf ){ + rc = sqlite3BtreePrevious(pCur, pRes); + }else{ + rc = SQLITE_OK; + } + } + return rc; +} +SQLITE_API int SQLITE_STDCALL sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ + assert( cursorHoldsMutex(pCur) ); + assert( pRes!=0 ); + assert( *pRes==0 || *pRes==1 ); + assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); + *pRes = 0; + pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey); + pCur->info.nSize = 0; + if( pCur->eState!=CURSOR_VALID + || pCur->aiIdx[pCur->iPage]==0 + || pCur->apPage[pCur->iPage]->leaf==0 + ){ + return btreePrevious(pCur, pRes); + } + pCur->aiIdx[pCur->iPage]--; + return SQLITE_OK; +} + +/* +** Allocate a new page from the database file. +** +** The new page is marked as dirty. (In other words, sqlite3PagerWrite() +** has already been called on the new page.) The new page has also +** been referenced and the calling routine is responsible for calling +** sqlite3PagerUnref() on the new page when it is done. +** +** SQLITE_OK is returned on success. Any other return value indicates +** an error. *ppPage is set to NULL in the event of an error. +** +** If the "nearby" parameter is not 0, then an effort is made to +** locate a page close to the page number "nearby". This can be used in an +** attempt to keep related pages close to each other in the database file, +** which in turn can make database access faster. +** +** If the eMode parameter is BTALLOC_EXACT and the nearby page exists +** anywhere on the free-list, then it is guaranteed to be returned. If +** eMode is BTALLOC_LT then the page returned will be less than or equal +** to nearby if any such page exists. If eMode is BTALLOC_ANY then there +** are no restrictions on which page is returned. +*/ +static int allocateBtreePage( + BtShared *pBt, /* The btree */ + MemPage **ppPage, /* Store pointer to the allocated page here */ + Pgno *pPgno, /* Store the page number here */ + Pgno nearby, /* Search for a page near this one */ + u8 eMode /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */ +){ + MemPage *pPage1; + int rc; + u32 n; /* Number of pages on the freelist */ + u32 k; /* Number of leaves on the trunk of the freelist */ + MemPage *pTrunk = 0; + MemPage *pPrevTrunk = 0; + Pgno mxPage; /* Total size of the database file */ + + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); + pPage1 = pBt->pPage1; + mxPage = btreePagecount(pBt); + /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36 + ** stores stores the total number of pages on the freelist. */ + n = get4byte(&pPage1->aData[36]); + testcase( n==mxPage-1 ); + if( n>=mxPage ){ + return SQLITE_CORRUPT_BKPT; + } + if( n>0 ){ + /* There are pages on the freelist. Reuse one of those pages. */ + Pgno iTrunk; + u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ + u32 nSearch = 0; /* Count of the number of search attempts */ + + /* If eMode==BTALLOC_EXACT and a query of the pointer-map + ** shows that the page 'nearby' is somewhere on the free-list, then + ** the entire-list will be searched for that page. + */ +#ifndef SQLITE_OMIT_AUTOVACUUM + if( eMode==BTALLOC_EXACT ){ + if( nearby<=mxPage ){ + u8 eType; + assert( nearby>0 ); + assert( pBt->autoVacuum ); + rc = ptrmapGet(pBt, nearby, &eType, 0); + if( rc ) return rc; + if( eType==PTRMAP_FREEPAGE ){ + searchList = 1; + } + } + }else if( eMode==BTALLOC_LE ){ + searchList = 1; + } +#endif + + /* Decrement the free-list count by 1. Set iTrunk to the index of the + ** first free-list trunk page. iPrevTrunk is initially 1. + */ + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc ) return rc; + put4byte(&pPage1->aData[36], n-1); + + /* The code within this loop is run only once if the 'searchList' variable + ** is not true. Otherwise, it runs once for each trunk-page on the + ** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT) + ** or until a page less than 'nearby' is located (eMode==BTALLOC_LT) + */ + do { + pPrevTrunk = pTrunk; + if( pPrevTrunk ){ + /* EVIDENCE-OF: R-01506-11053 The first integer on a freelist trunk page + ** is the page number of the next freelist trunk page in the list or + ** zero if this is the last freelist trunk page. */ + iTrunk = get4byte(&pPrevTrunk->aData[0]); + }else{ + /* EVIDENCE-OF: R-59841-13798 The 4-byte big-endian integer at offset 32 + ** stores the page number of the first page of the freelist, or zero if + ** the freelist is empty. */ + iTrunk = get4byte(&pPage1->aData[32]); + } + testcase( iTrunk==mxPage ); + if( iTrunk>mxPage || nSearch++ > n ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0); + } + if( rc ){ + pTrunk = 0; + goto end_allocate_page; + } + assert( pTrunk!=0 ); + assert( pTrunk->aData!=0 ); + /* EVIDENCE-OF: R-13523-04394 The second integer on a freelist trunk page + ** is the number of leaf page pointers to follow. */ + k = get4byte(&pTrunk->aData[4]); + if( k==0 && !searchList ){ + /* The trunk has no leaves and the list is not being searched. + ** So extract the trunk page itself and use it as the newly + ** allocated page */ + assert( pPrevTrunk==0 ); + rc = sqlite3PagerWrite(pTrunk->pDbPage); + if( rc ){ + goto end_allocate_page; + } + *pPgno = iTrunk; + memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); + *ppPage = pTrunk; + pTrunk = 0; + TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); + }else if( k>(u32)(pBt->usableSize/4 - 2) ){ + /* Value of k is out of range. Database corruption */ + rc = SQLITE_CORRUPT_BKPT; + goto end_allocate_page; +#ifndef SQLITE_OMIT_AUTOVACUUM + }else if( searchList + && (nearby==iTrunk || (iTrunk<nearby && eMode==BTALLOC_LE)) + ){ + /* The list is being searched and this trunk page is the page + ** to allocate, regardless of whether it has leaves. + */ + *pPgno = iTrunk; + *ppPage = pTrunk; + searchList = 0; + rc = sqlite3PagerWrite(pTrunk->pDbPage); + if( rc ){ + goto end_allocate_page; + } + if( k==0 ){ + if( !pPrevTrunk ){ + memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); + }else{ + rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); + if( rc!=SQLITE_OK ){ + goto end_allocate_page; + } + memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4); + } + }else{ + /* The trunk page is required by the caller but it contains + ** pointers to free-list leaves. The first leaf becomes a trunk + ** page in this case. + */ + MemPage *pNewTrunk; + Pgno iNewTrunk = get4byte(&pTrunk->aData[8]); + if( iNewTrunk>mxPage ){ + rc = SQLITE_CORRUPT_BKPT; + goto end_allocate_page; + } + testcase( iNewTrunk==mxPage ); + rc = btreeGetUnusedPage(pBt, iNewTrunk, &pNewTrunk, 0); + if( rc!=SQLITE_OK ){ + goto end_allocate_page; + } + rc = sqlite3PagerWrite(pNewTrunk->pDbPage); + if( rc!=SQLITE_OK ){ + releasePage(pNewTrunk); + goto end_allocate_page; + } + memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4); + put4byte(&pNewTrunk->aData[4], k-1); + memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4); + releasePage(pNewTrunk); + if( !pPrevTrunk ){ + assert( sqlite3PagerIswriteable(pPage1->pDbPage) ); + put4byte(&pPage1->aData[32], iNewTrunk); + }else{ + rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); + if( rc ){ + goto end_allocate_page; + } + put4byte(&pPrevTrunk->aData[0], iNewTrunk); + } + } + pTrunk = 0; + TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); +#endif + }else if( k>0 ){ + /* Extract a leaf from the trunk */ + u32 closest; + Pgno iPage; + unsigned char *aData = pTrunk->aData; + if( nearby>0 ){ + u32 i; + closest = 0; + if( eMode==BTALLOC_LE ){ + for(i=0; i<k; i++){ + iPage = get4byte(&aData[8+i*4]); + if( iPage<=nearby ){ + closest = i; + break; + } + } + }else{ + int dist; + dist = sqlite3AbsInt32(get4byte(&aData[8]) - nearby); + for(i=1; i<k; i++){ + int d2 = sqlite3AbsInt32(get4byte(&aData[8+i*4]) - nearby); + if( d2<dist ){ + closest = i; + dist = d2; + } + } + } + }else{ + closest = 0; + } + + iPage = get4byte(&aData[8+closest*4]); + testcase( iPage==mxPage ); + if( iPage>mxPage ){ + rc = SQLITE_CORRUPT_BKPT; + goto end_allocate_page; + } + testcase( iPage==mxPage ); + if( !searchList + || (iPage==nearby || (iPage<nearby && eMode==BTALLOC_LE)) + ){ + int noContent; + *pPgno = iPage; + TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d" + ": %d more free pages\n", + *pPgno, closest+1, k, pTrunk->pgno, n-1)); + rc = sqlite3PagerWrite(pTrunk->pDbPage); + if( rc ) goto end_allocate_page; + if( closest<k-1 ){ + memcpy(&aData[8+closest*4], &aData[4+k*4], 4); + } + put4byte(&aData[4], k-1); + noContent = !btreeGetHasContent(pBt, *pPgno)? PAGER_GET_NOCONTENT : 0; + rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, noContent); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite((*ppPage)->pDbPage); + if( rc!=SQLITE_OK ){ + releasePage(*ppPage); + *ppPage = 0; + } + } + searchList = 0; + } + } + releasePage(pPrevTrunk); + pPrevTrunk = 0; + }while( searchList ); + }else{ + /* There are no pages on the freelist, so append a new page to the + ** database image. + ** + ** Normally, new pages allocated by this block can be requested from the + ** pager layer with the 'no-content' flag set. This prevents the pager + ** from trying to read the pages content from disk. However, if the + ** current transaction has already run one or more incremental-vacuum + ** steps, then the page we are about to allocate may contain content + ** that is required in the event of a rollback. In this case, do + ** not set the no-content flag. This causes the pager to load and journal + ** the current page content before overwriting it. + ** + ** Note that the pager will not actually attempt to load or journal + ** content for any page that really does lie past the end of the database + ** file on disk. So the effects of disabling the no-content optimization + ** here are confined to those pages that lie between the end of the + ** database image and the end of the database file. + */ + int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate))? PAGER_GET_NOCONTENT:0; + + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + if( rc ) return rc; + pBt->nPage++; + if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; + +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, pBt->nPage) ){ + /* If *pPgno refers to a pointer-map page, allocate two new pages + ** at the end of the file instead of one. The first allocated page + ** becomes a new pointer-map page, the second is used by the caller. + */ + MemPage *pPg = 0; + TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); + assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); + rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pPg->pDbPage); + releasePage(pPg); + } + if( rc ) return rc; + pBt->nPage++; + if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; } + } +#endif + put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage); + *pPgno = pBt->nPage; + + assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); + rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, bNoContent); + if( rc ) return rc; + rc = sqlite3PagerWrite((*ppPage)->pDbPage); + if( rc!=SQLITE_OK ){ + releasePage(*ppPage); + *ppPage = 0; + } + TRACE(("ALLOCATE: %d from end of file\n", *pPgno)); + } + + assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); + +end_allocate_page: + releasePage(pTrunk); + releasePage(pPrevTrunk); + assert( rc!=SQLITE_OK || sqlite3PagerPageRefcount((*ppPage)->pDbPage)<=1 ); + assert( rc!=SQLITE_OK || (*ppPage)->isInit==0 ); + return rc; +} + +/* +** This function is used to add page iPage to the database file free-list. +** It is assumed that the page is not already a part of the free-list. +** +** The value passed as the second argument to this function is optional. +** If the caller happens to have a pointer to the MemPage object +** corresponding to page iPage handy, it may pass it as the second value. +** Otherwise, it may pass NULL. +** +** If a pointer to a MemPage object is passed as the second argument, +** its reference count is not altered by this function. +*/ +static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ + MemPage *pTrunk = 0; /* Free-list trunk page */ + Pgno iTrunk = 0; /* Page number of free-list trunk page */ + MemPage *pPage1 = pBt->pPage1; /* Local reference to page 1 */ + MemPage *pPage; /* Page being freed. May be NULL. */ + int rc; /* Return Code */ + int nFree; /* Initial number of pages on free-list */ + + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( CORRUPT_DB || iPage>1 ); + assert( !pMemPage || pMemPage->pgno==iPage ); + + if( iPage<2 ) return SQLITE_CORRUPT_BKPT; + if( pMemPage ){ + pPage = pMemPage; + sqlite3PagerRef(pPage->pDbPage); + }else{ + pPage = btreePageLookup(pBt, iPage); + } + + /* Increment the free page count on pPage1 */ + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc ) goto freepage_out; + nFree = get4byte(&pPage1->aData[36]); + put4byte(&pPage1->aData[36], nFree+1); + + if( pBt->btsFlags & BTS_SECURE_DELETE ){ + /* If the secure_delete option is enabled, then + ** always fully overwrite deleted information with zeros. + */ + if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) ) + || ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0) + ){ + goto freepage_out; + } + memset(pPage->aData, 0, pPage->pBt->pageSize); + } + + /* If the database supports auto-vacuum, write an entry in the pointer-map + ** to indicate that the page is free. + */ + if( ISAUTOVACUUM ){ + ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc); + if( rc ) goto freepage_out; + } + + /* Now manipulate the actual database free-list structure. There are two + ** possibilities. If the free-list is currently empty, or if the first + ** trunk page in the free-list is full, then this page will become a + ** new free-list trunk page. Otherwise, it will become a leaf of the + ** first trunk page in the current free-list. This block tests if it + ** is possible to add the page as a new free-list leaf. + */ + if( nFree!=0 ){ + u32 nLeaf; /* Initial number of leaf cells on trunk page */ + + iTrunk = get4byte(&pPage1->aData[32]); + rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); + if( rc!=SQLITE_OK ){ + goto freepage_out; + } + + nLeaf = get4byte(&pTrunk->aData[4]); + assert( pBt->usableSize>32 ); + if( nLeaf > (u32)pBt->usableSize/4 - 2 ){ + rc = SQLITE_CORRUPT_BKPT; + goto freepage_out; + } + if( nLeaf < (u32)pBt->usableSize/4 - 8 ){ + /* In this case there is room on the trunk page to insert the page + ** being freed as a new leaf. + ** + ** Note that the trunk page is not really full until it contains + ** usableSize/4 - 2 entries, not usableSize/4 - 8 entries as we have + ** coded. But due to a coding error in versions of SQLite prior to + ** 3.6.0, databases with freelist trunk pages holding more than + ** usableSize/4 - 8 entries will be reported as corrupt. In order + ** to maintain backwards compatibility with older versions of SQLite, + ** we will continue to restrict the number of entries to usableSize/4 - 8 + ** for now. At some point in the future (once everyone has upgraded + ** to 3.6.0 or later) we should consider fixing the conditional above + ** to read "usableSize/4-2" instead of "usableSize/4-8". + ** + ** EVIDENCE-OF: R-19920-11576 However, newer versions of SQLite still + ** avoid using the last six entries in the freelist trunk page array in + ** order that database files created by newer versions of SQLite can be + ** read by older versions of SQLite. + */ + rc = sqlite3PagerWrite(pTrunk->pDbPage); + if( rc==SQLITE_OK ){ + put4byte(&pTrunk->aData[4], nLeaf+1); + put4byte(&pTrunk->aData[8+nLeaf*4], iPage); + if( pPage && (pBt->btsFlags & BTS_SECURE_DELETE)==0 ){ + sqlite3PagerDontWrite(pPage->pDbPage); + } + rc = btreeSetHasContent(pBt, iPage); + } + TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno)); + goto freepage_out; + } + } + + /* If control flows to this point, then it was not possible to add the + ** the page being freed as a leaf page of the first trunk in the free-list. + ** Possibly because the free-list is empty, or possibly because the + ** first trunk in the free-list is full. Either way, the page being freed + ** will become the new first trunk page in the free-list. + */ + if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){ + goto freepage_out; + } + rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc!=SQLITE_OK ){ + goto freepage_out; + } + put4byte(pPage->aData, iTrunk); + put4byte(&pPage->aData[4], 0); + put4byte(&pPage1->aData[32], iPage); + TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", pPage->pgno, iTrunk)); + +freepage_out: + if( pPage ){ + pPage->isInit = 0; + } + releasePage(pPage); + releasePage(pTrunk); + return rc; +} +static void freePage(MemPage *pPage, int *pRC){ + if( (*pRC)==SQLITE_OK ){ + *pRC = freePage2(pPage->pBt, pPage, pPage->pgno); + } +} + +/* +** Free any overflow pages associated with the given Cell. Write the +** local Cell size (the number of bytes on the original page, omitting +** overflow) into *pnSize. +*/ +static int clearCell( + MemPage *pPage, /* The page that contains the Cell */ + unsigned char *pCell, /* First byte of the Cell */ + u16 *pnSize /* Write the size of the Cell here */ +){ + BtShared *pBt = pPage->pBt; + CellInfo info; + Pgno ovflPgno; + int rc; + int nOvfl; + u32 ovflPageSize; + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + pPage->xParseCell(pPage, pCell, &info); + *pnSize = info.nSize; + if( info.iOverflow==0 ){ + return SQLITE_OK; /* No overflow pages. Return without doing anything */ + } + if( pCell+info.iOverflow+3 > pPage->aData+pPage->maskPage ){ + return SQLITE_CORRUPT_BKPT; /* Cell extends past end of page */ + } + ovflPgno = get4byte(&pCell[info.iOverflow]); + assert( pBt->usableSize > 4 ); + ovflPageSize = pBt->usableSize - 4; + nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize; + assert( nOvfl>0 || + (CORRUPT_DB && (info.nPayload + ovflPageSize)<ovflPageSize) + ); + while( nOvfl-- ){ + Pgno iNext = 0; + MemPage *pOvfl = 0; + if( ovflPgno<2 || ovflPgno>btreePagecount(pBt) ){ + /* 0 is not a legal page number and page 1 cannot be an + ** overflow page. Therefore if ovflPgno<2 or past the end of the + ** file the database must be corrupt. */ + return SQLITE_CORRUPT_BKPT; + } + if( nOvfl ){ + rc = getOverflowPage(pBt, ovflPgno, &pOvfl, &iNext); + if( rc ) return rc; + } + + if( ( pOvfl || ((pOvfl = btreePageLookup(pBt, ovflPgno))!=0) ) + && sqlite3PagerPageRefcount(pOvfl->pDbPage)!=1 + ){ + /* There is no reason any cursor should have an outstanding reference + ** to an overflow page belonging to a cell that is being deleted/updated. + ** So if there exists more than one reference to this page, then it + ** must not really be an overflow page and the database must be corrupt. + ** It is helpful to detect this before calling freePage2(), as + ** freePage2() may zero the page contents if secure-delete mode is + ** enabled. If this 'overflow' page happens to be a page that the + ** caller is iterating through or using in some other way, this + ** can be problematic. + */ + rc = SQLITE_CORRUPT_BKPT; + }else{ + rc = freePage2(pBt, pOvfl, ovflPgno); + } + + if( pOvfl ){ + sqlite3PagerUnref(pOvfl->pDbPage); + } + if( rc ) return rc; + ovflPgno = iNext; + } + return SQLITE_OK; +} + +/* +** Create the byte sequence used to represent a cell on page pPage +** and write that byte sequence into pCell[]. Overflow pages are +** allocated and filled in as necessary. The calling procedure +** is responsible for making sure sufficient space has been allocated +** for pCell[]. +** +** Note that pCell does not necessary need to point to the pPage->aData +** area. pCell might point to some temporary storage. The cell will +** be constructed in this temporary area then copied into pPage->aData +** later. +*/ +static int fillInCell( + MemPage *pPage, /* The page that contains the cell */ + unsigned char *pCell, /* Complete text of the cell */ + const void *pKey, i64 nKey, /* The key */ + const void *pData,int nData, /* The data */ + int nZero, /* Extra zero bytes to append to pData */ + int *pnSize /* Write cell size here */ +){ + int nPayload; + const u8 *pSrc; + int nSrc, n, rc; + int spaceLeft; + MemPage *pOvfl = 0; + MemPage *pToRelease = 0; + unsigned char *pPrior; + unsigned char *pPayload; + BtShared *pBt = pPage->pBt; + Pgno pgnoOvfl = 0; + int nHeader; + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + + /* pPage is not necessarily writeable since pCell might be auxiliary + ** buffer space that is separate from the pPage buffer area */ + assert( pCell<pPage->aData || pCell>=&pPage->aData[pBt->pageSize] + || sqlite3PagerIswriteable(pPage->pDbPage) ); + + /* Fill in the header. */ + nHeader = pPage->childPtrSize; + nPayload = nData + nZero; + if( pPage->intKeyLeaf ){ + nHeader += putVarint32(&pCell[nHeader], nPayload); + }else{ + assert( nData==0 ); + assert( nZero==0 ); + } + nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey); + + /* Fill in the payload size */ + if( pPage->intKey ){ + pSrc = pData; + nSrc = nData; + nData = 0; + }else{ + assert( nKey<=0x7fffffff && pKey!=0 ); + nPayload = (int)nKey; + pSrc = pKey; + nSrc = (int)nKey; + } + if( nPayload<=pPage->maxLocal ){ + n = nHeader + nPayload; + testcase( n==3 ); + testcase( n==4 ); + if( n<4 ) n = 4; + *pnSize = n; + spaceLeft = nPayload; + pPrior = pCell; + }else{ + int mn = pPage->minLocal; + n = mn + (nPayload - mn) % (pPage->pBt->usableSize - 4); + testcase( n==pPage->maxLocal ); + testcase( n==pPage->maxLocal+1 ); + if( n > pPage->maxLocal ) n = mn; + spaceLeft = n; + *pnSize = n + nHeader + 4; + pPrior = &pCell[nHeader+n]; + } + pPayload = &pCell[nHeader]; + + /* At this point variables should be set as follows: + ** + ** nPayload Total payload size in bytes + ** pPayload Begin writing payload here + ** spaceLeft Space available at pPayload. If nPayload>spaceLeft, + ** that means content must spill into overflow pages. + ** *pnSize Size of the local cell (not counting overflow pages) + ** pPrior Where to write the pgno of the first overflow page + ** + ** Use a call to btreeParseCellPtr() to verify that the values above + ** were computed correctly. + */ +#if SQLITE_DEBUG + { + CellInfo info; + pPage->xParseCell(pPage, pCell, &info); + assert( nHeader=(int)(info.pPayload - pCell) ); + assert( info.nKey==nKey ); + assert( *pnSize == info.nSize ); + assert( spaceLeft == info.nLocal ); + assert( pPrior == &pCell[info.iOverflow] ); + } +#endif + + /* Write the payload into the local Cell and any extra into overflow pages */ + while( nPayload>0 ){ + if( spaceLeft==0 ){ +#ifndef SQLITE_OMIT_AUTOVACUUM + Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */ + if( pBt->autoVacuum ){ + do{ + pgnoOvfl++; + } while( + PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt) + ); + } +#endif + rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, 0); +#ifndef SQLITE_OMIT_AUTOVACUUM + /* If the database supports auto-vacuum, and the second or subsequent + ** overflow page is being allocated, add an entry to the pointer-map + ** for that page now. + ** + ** If this is the first overflow page, then write a partial entry + ** to the pointer-map. If we write nothing to this pointer-map slot, + ** then the optimistic overflow chain processing in clearCell() + ** may misinterpret the uninitialized values and delete the + ** wrong pages from the database. + */ + if( pBt->autoVacuum && rc==SQLITE_OK ){ + u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1); + ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap, &rc); + if( rc ){ + releasePage(pOvfl); + } + } +#endif + if( rc ){ + releasePage(pToRelease); + return rc; + } + + /* If pToRelease is not zero than pPrior points into the data area + ** of pToRelease. Make sure pToRelease is still writeable. */ + assert( pToRelease==0 || sqlite3PagerIswriteable(pToRelease->pDbPage) ); + + /* If pPrior is part of the data area of pPage, then make sure pPage + ** is still writeable */ + assert( pPrior<pPage->aData || pPrior>=&pPage->aData[pBt->pageSize] + || sqlite3PagerIswriteable(pPage->pDbPage) ); + + put4byte(pPrior, pgnoOvfl); + releasePage(pToRelease); + pToRelease = pOvfl; + pPrior = pOvfl->aData; + put4byte(pPrior, 0); + pPayload = &pOvfl->aData[4]; + spaceLeft = pBt->usableSize - 4; + } + n = nPayload; + if( n>spaceLeft ) n = spaceLeft; + + /* If pToRelease is not zero than pPayload points into the data area + ** of pToRelease. Make sure pToRelease is still writeable. */ + assert( pToRelease==0 || sqlite3PagerIswriteable(pToRelease->pDbPage) ); + + /* If pPayload is part of the data area of pPage, then make sure pPage + ** is still writeable */ + assert( pPayload<pPage->aData || pPayload>=&pPage->aData[pBt->pageSize] + || sqlite3PagerIswriteable(pPage->pDbPage) ); + + if( nSrc>0 ){ + if( n>nSrc ) n = nSrc; + assert( pSrc ); + memcpy(pPayload, pSrc, n); + }else{ + memset(pPayload, 0, n); + } + nPayload -= n; + pPayload += n; + pSrc += n; + nSrc -= n; + spaceLeft -= n; + if( nSrc==0 ){ + nSrc = nData; + pSrc = pData; + } + } + releasePage(pToRelease); + return SQLITE_OK; +} + +/* +** Remove the i-th cell from pPage. This routine effects pPage only. +** The cell content is not freed or deallocated. It is assumed that +** the cell content has been copied someplace else. This routine just +** removes the reference to the cell from pPage. +** +** "sz" must be the number of bytes in the cell. +*/ +static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ + u32 pc; /* Offset to cell content of cell being deleted */ + u8 *data; /* pPage->aData */ + u8 *ptr; /* Used to move bytes around within data[] */ + int rc; /* The return code */ + int hdr; /* Beginning of the header. 0 most pages. 100 page 1 */ + + if( *pRC ) return; + + assert( idx>=0 && idx<pPage->nCell ); + assert( CORRUPT_DB || sz==cellSize(pPage, idx) ); + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + data = pPage->aData; + ptr = &pPage->aCellIdx[2*idx]; + pc = get2byte(ptr); + hdr = pPage->hdrOffset; + testcase( pc==get2byte(&data[hdr+5]) ); + testcase( pc+sz==pPage->pBt->usableSize ); + if( pc < (u32)get2byte(&data[hdr+5]) || pc+sz > pPage->pBt->usableSize ){ + *pRC = SQLITE_CORRUPT_BKPT; + return; + } + rc = freeSpace(pPage, pc, sz); + if( rc ){ + *pRC = rc; + return; + } + pPage->nCell--; + if( pPage->nCell==0 ){ + memset(&data[hdr+1], 0, 4); + data[hdr+7] = 0; + put2byte(&data[hdr+5], pPage->pBt->usableSize); + pPage->nFree = pPage->pBt->usableSize - pPage->hdrOffset + - pPage->childPtrSize - 8; + }else{ + memmove(ptr, ptr+2, 2*(pPage->nCell - idx)); + put2byte(&data[hdr+3], pPage->nCell); + pPage->nFree += 2; + } +} + +/* +** Insert a new cell on pPage at cell index "i". pCell points to the +** content of the cell. +** +** If the cell content will fit on the page, then put it there. If it +** will not fit, then make a copy of the cell content into pTemp if +** pTemp is not null. Regardless of pTemp, allocate a new entry +** in pPage->apOvfl[] and make it point to the cell content (either +** in pTemp or the original pCell) and also record its index. +** Allocating a new entry in pPage->aCell[] implies that +** pPage->nOverflow is incremented. +*/ +static void insertCell( + MemPage *pPage, /* Page into which we are copying */ + int i, /* New cell becomes the i-th cell of the page */ + u8 *pCell, /* Content of the new cell */ + int sz, /* Bytes of content in pCell */ + u8 *pTemp, /* Temp storage space for pCell, if needed */ + Pgno iChild, /* If non-zero, replace first 4 bytes with this value */ + int *pRC /* Read and write return code from here */ +){ + int idx = 0; /* Where to write new cell content in data[] */ + int j; /* Loop counter */ + u8 *data; /* The content of the whole page */ + u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */ + + if( *pRC ) return; + + assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); + assert( MX_CELL(pPage->pBt)<=10921 ); + assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB ); + assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) ); + assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + /* The cell should normally be sized correctly. However, when moving a + ** malformed cell from a leaf page to an interior page, if the cell size + ** wanted to be less than 4 but got rounded up to 4 on the leaf, then size + ** might be less than 8 (leaf-size + pointer) on the interior node. Hence + ** the term after the || in the following assert(). */ + assert( sz==pPage->xCellSize(pPage, pCell) || (sz==8 && iChild>0) ); + if( pPage->nOverflow || sz+2>pPage->nFree ){ + if( pTemp ){ + memcpy(pTemp, pCell, sz); + pCell = pTemp; + } + if( iChild ){ + put4byte(pCell, iChild); + } + j = pPage->nOverflow++; + assert( j<(int)(sizeof(pPage->apOvfl)/sizeof(pPage->apOvfl[0])) ); + pPage->apOvfl[j] = pCell; + pPage->aiOvfl[j] = (u16)i; + + /* When multiple overflows occur, they are always sequential and in + ** sorted order. This invariants arise because multiple overflows can + ** only occur when inserting divider cells into the parent page during + ** balancing, and the dividers are adjacent and sorted. + */ + assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ + assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ + }else{ + int rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc!=SQLITE_OK ){ + *pRC = rc; + return; + } + assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + data = pPage->aData; + assert( &data[pPage->cellOffset]==pPage->aCellIdx ); + rc = allocateSpace(pPage, sz, &idx); + if( rc ){ *pRC = rc; return; } + /* The allocateSpace() routine guarantees the following properties + ** if it returns successfully */ + assert( idx >= 0 ); + assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB ); + assert( idx+sz <= (int)pPage->pBt->usableSize ); + pPage->nFree -= (u16)(2 + sz); + memcpy(&data[idx], pCell, sz); + if( iChild ){ + put4byte(&data[idx], iChild); + } + pIns = pPage->aCellIdx + i*2; + memmove(pIns+2, pIns, 2*(pPage->nCell - i)); + put2byte(pIns, idx); + pPage->nCell++; + /* increment the cell count */ + if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; + assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell ); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pPage->pBt->autoVacuum ){ + /* The cell may contain a pointer to an overflow page. If so, write + ** the entry for the overflow page into the pointer map. + */ + ptrmapPutOvflPtr(pPage, pCell, pRC); + } +#endif + } +} + +/* +** A CellArray object contains a cache of pointers and sizes for a +** consecutive sequence of cells that might be held multiple pages. +*/ +typedef struct CellArray CellArray; +struct CellArray { + int nCell; /* Number of cells in apCell[] */ + MemPage *pRef; /* Reference page */ + u8 **apCell; /* All cells begin balanced */ + u16 *szCell; /* Local size of all cells in apCell[] */ +}; + +/* +** Make sure the cell sizes at idx, idx+1, ..., idx+N-1 have been +** computed. +*/ +static void populateCellCache(CellArray *p, int idx, int N){ + assert( idx>=0 && idx+N<=p->nCell ); + while( N>0 ){ + assert( p->apCell[idx]!=0 ); + if( p->szCell[idx]==0 ){ + p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]); + }else{ + assert( CORRUPT_DB || + p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) ); + } + idx++; + N--; + } +} + +/* +** Return the size of the Nth element of the cell array +*/ +static SQLITE_NOINLINE u16 computeCellSize(CellArray *p, int N){ + assert( N>=0 && N<p->nCell ); + assert( p->szCell[N]==0 ); + p->szCell[N] = p->pRef->xCellSize(p->pRef, p->apCell[N]); + return p->szCell[N]; +} +static u16 cachedCellSize(CellArray *p, int N){ + assert( N>=0 && N<p->nCell ); + if( p->szCell[N] ) return p->szCell[N]; + return computeCellSize(p, N); +} + +/* +** Array apCell[] contains pointers to nCell b-tree page cells. The +** szCell[] array contains the size in bytes of each cell. This function +** replaces the current contents of page pPg with the contents of the cell +** array. +** +** Some of the cells in apCell[] may currently be stored in pPg. This +** function works around problems caused by this by making a copy of any +** such cells before overwriting the page data. +** +** The MemPage.nFree field is invalidated by this function. It is the +** responsibility of the caller to set it correctly. +*/ +static int rebuildPage( + MemPage *pPg, /* Edit this page */ + int nCell, /* Final number of cells on page */ + u8 **apCell, /* Array of cells */ + u16 *szCell /* Array of cell sizes */ +){ + const int hdr = pPg->hdrOffset; /* Offset of header on pPg */ + u8 * const aData = pPg->aData; /* Pointer to data for pPg */ + const int usableSize = pPg->pBt->usableSize; + u8 * const pEnd = &aData[usableSize]; + int i; + u8 *pCellptr = pPg->aCellIdx; + u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager); + u8 *pData; + + i = get2byte(&aData[hdr+5]); + memcpy(&pTmp[i], &aData[i], usableSize - i); + + pData = pEnd; + for(i=0; i<nCell; i++){ + u8 *pCell = apCell[i]; + if( pCell>aData && pCell<pEnd ){ + pCell = &pTmp[pCell - aData]; + } + pData -= szCell[i]; + put2byte(pCellptr, (pData - aData)); + pCellptr += 2; + if( pData < pCellptr ) return SQLITE_CORRUPT_BKPT; + memcpy(pData, pCell, szCell[i]); + assert( szCell[i]==pPg->xCellSize(pPg, pCell) || CORRUPT_DB ); + testcase( szCell[i]!=pPg->xCellSize(pPg,pCell) ); + } + + /* The pPg->nFree field is now set incorrectly. The caller will fix it. */ + pPg->nCell = nCell; + pPg->nOverflow = 0; + + put2byte(&aData[hdr+1], 0); + put2byte(&aData[hdr+3], pPg->nCell); + put2byte(&aData[hdr+5], pData - aData); + aData[hdr+7] = 0x00; + return SQLITE_OK; +} + +/* +** Array apCell[] contains nCell pointers to b-tree cells. Array szCell +** contains the size in bytes of each such cell. This function attempts to +** add the cells stored in the array to page pPg. If it cannot (because +** the page needs to be defragmented before the cells will fit), non-zero +** is returned. Otherwise, if the cells are added successfully, zero is +** returned. +** +** Argument pCellptr points to the first entry in the cell-pointer array +** (part of page pPg) to populate. After cell apCell[0] is written to the +** page body, a 16-bit offset is written to pCellptr. And so on, for each +** cell in the array. It is the responsibility of the caller to ensure +** that it is safe to overwrite this part of the cell-pointer array. +** +** When this function is called, *ppData points to the start of the +** content area on page pPg. If the size of the content area is extended, +** *ppData is updated to point to the new start of the content area +** before returning. +** +** Finally, argument pBegin points to the byte immediately following the +** end of the space required by this page for the cell-pointer area (for +** all cells - not just those inserted by the current call). If the content +** area must be extended to before this point in order to accomodate all +** cells in apCell[], then the cells do not fit and non-zero is returned. +*/ +static int pageInsertArray( + MemPage *pPg, /* Page to add cells to */ + u8 *pBegin, /* End of cell-pointer array */ + u8 **ppData, /* IN/OUT: Page content -area pointer */ + u8 *pCellptr, /* Pointer to cell-pointer area */ + int iFirst, /* Index of first cell to add */ + int nCell, /* Number of cells to add to pPg */ + CellArray *pCArray /* Array of cells */ +){ + int i; + u8 *aData = pPg->aData; + u8 *pData = *ppData; + int iEnd = iFirst + nCell; + assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */ + for(i=iFirst; i<iEnd; i++){ + int sz, rc; + u8 *pSlot; + sz = cachedCellSize(pCArray, i); + if( (aData[1]==0 && aData[2]==0) || (pSlot = pageFindSlot(pPg,sz,&rc))==0 ){ + pData -= sz; + if( pData<pBegin ) return 1; + pSlot = pData; + } + /* pSlot and pCArray->apCell[i] will never overlap on a well-formed + ** database. But they might for a corrupt database. Hence use memmove() + ** since memcpy() sends SIGABORT with overlapping buffers on OpenBSD */ + assert( (pSlot+sz)<=pCArray->apCell[i] + || pSlot>=(pCArray->apCell[i]+sz) + || CORRUPT_DB ); + memmove(pSlot, pCArray->apCell[i], sz); + put2byte(pCellptr, (pSlot - aData)); + pCellptr += 2; + } + *ppData = pData; + return 0; +} + +/* +** Array apCell[] contains nCell pointers to b-tree cells. Array szCell +** contains the size in bytes of each such cell. This function adds the +** space associated with each cell in the array that is currently stored +** within the body of pPg to the pPg free-list. The cell-pointers and other +** fields of the page are not updated. +** +** This function returns the total number of cells added to the free-list. +*/ +static int pageFreeArray( + MemPage *pPg, /* Page to edit */ + int iFirst, /* First cell to delete */ + int nCell, /* Cells to delete */ + CellArray *pCArray /* Array of cells */ +){ + u8 * const aData = pPg->aData; + u8 * const pEnd = &aData[pPg->pBt->usableSize]; + u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize]; + int nRet = 0; + int i; + int iEnd = iFirst + nCell; + u8 *pFree = 0; + int szFree = 0; + + for(i=iFirst; i<iEnd; i++){ + u8 *pCell = pCArray->apCell[i]; + if( pCell>=pStart && pCell<pEnd ){ + int sz; + /* No need to use cachedCellSize() here. The sizes of all cells that + ** are to be freed have already been computing while deciding which + ** cells need freeing */ + sz = pCArray->szCell[i]; assert( sz>0 ); + if( pFree!=(pCell + sz) ){ + if( pFree ){ + assert( pFree>aData && (pFree - aData)<65536 ); + freeSpace(pPg, (u16)(pFree - aData), szFree); + } + pFree = pCell; + szFree = sz; + if( pFree+sz>pEnd ) return 0; + }else{ + pFree = pCell; + szFree += sz; + } + nRet++; + } + } + if( pFree ){ + assert( pFree>aData && (pFree - aData)<65536 ); + freeSpace(pPg, (u16)(pFree - aData), szFree); + } + return nRet; +} + +/* +** apCell[] and szCell[] contains pointers to and sizes of all cells in the +** pages being balanced. The current page, pPg, has pPg->nCell cells starting +** with apCell[iOld]. After balancing, this page should hold nNew cells +** starting at apCell[iNew]. +** +** This routine makes the necessary adjustments to pPg so that it contains +** the correct cells after being balanced. +** +** The pPg->nFree field is invalid when this function returns. It is the +** responsibility of the caller to set it correctly. +*/ +static int editPage( + MemPage *pPg, /* Edit this page */ + int iOld, /* Index of first cell currently on page */ + int iNew, /* Index of new first cell on page */ + int nNew, /* Final number of cells on page */ + CellArray *pCArray /* Array of cells and sizes */ +){ + u8 * const aData = pPg->aData; + const int hdr = pPg->hdrOffset; + u8 *pBegin = &pPg->aCellIdx[nNew * 2]; + int nCell = pPg->nCell; /* Cells stored on pPg */ + u8 *pData; + u8 *pCellptr; + int i; + int iOldEnd = iOld + pPg->nCell + pPg->nOverflow; + int iNewEnd = iNew + nNew; + +#ifdef SQLITE_DEBUG + u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager); + memcpy(pTmp, aData, pPg->pBt->usableSize); +#endif + + /* Remove cells from the start and end of the page */ + if( iOld<iNew ){ + int nShift = pageFreeArray(pPg, iOld, iNew-iOld, pCArray); + memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift*2], nCell*2); + nCell -= nShift; + } + if( iNewEnd < iOldEnd ){ + nCell -= pageFreeArray(pPg, iNewEnd, iOldEnd - iNewEnd, pCArray); + } + + pData = &aData[get2byteNotZero(&aData[hdr+5])]; + if( pData<pBegin ) goto editpage_fail; + + /* Add cells to the start of the page */ + if( iNew<iOld ){ + int nAdd = MIN(nNew,iOld-iNew); + assert( (iOld-iNew)<nNew || nCell==0 || CORRUPT_DB ); + pCellptr = pPg->aCellIdx; + memmove(&pCellptr[nAdd*2], pCellptr, nCell*2); + if( pageInsertArray( + pPg, pBegin, &pData, pCellptr, + iNew, nAdd, pCArray + ) ) goto editpage_fail; + nCell += nAdd; + } + + /* Add any overflow cells */ + for(i=0; i<pPg->nOverflow; i++){ + int iCell = (iOld + pPg->aiOvfl[i]) - iNew; + if( iCell>=0 && iCell<nNew ){ + pCellptr = &pPg->aCellIdx[iCell * 2]; + memmove(&pCellptr[2], pCellptr, (nCell - iCell) * 2); + nCell++; + if( pageInsertArray( + pPg, pBegin, &pData, pCellptr, + iCell+iNew, 1, pCArray + ) ) goto editpage_fail; + } + } + + /* Append cells to the end of the page */ + pCellptr = &pPg->aCellIdx[nCell*2]; + if( pageInsertArray( + pPg, pBegin, &pData, pCellptr, + iNew+nCell, nNew-nCell, pCArray + ) ) goto editpage_fail; + + pPg->nCell = nNew; + pPg->nOverflow = 0; + + put2byte(&aData[hdr+3], pPg->nCell); + put2byte(&aData[hdr+5], pData - aData); + +#ifdef SQLITE_DEBUG + for(i=0; i<nNew && !CORRUPT_DB; i++){ + u8 *pCell = pCArray->apCell[i+iNew]; + int iOff = get2byteAligned(&pPg->aCellIdx[i*2]); + if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){ + pCell = &pTmp[pCell - aData]; + } + assert( 0==memcmp(pCell, &aData[iOff], + pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) ); + } +#endif + + return SQLITE_OK; + editpage_fail: + /* Unable to edit this page. Rebuild it from scratch instead. */ + populateCellCache(pCArray, iNew, nNew); + return rebuildPage(pPg, nNew, &pCArray->apCell[iNew], &pCArray->szCell[iNew]); +} + +/* +** The following parameters determine how many adjacent pages get involved +** in a balancing operation. NN is the number of neighbors on either side +** of the page that participate in the balancing operation. NB is the +** total number of pages that participate, including the target page and +** NN neighbors on either side. +** +** The minimum value of NN is 1 (of course). Increasing NN above 1 +** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance +** in exchange for a larger degradation in INSERT and UPDATE performance. +** The value of NN appears to give the best results overall. +*/ +#define NN 1 /* Number of neighbors on either side of pPage */ +#define NB (NN*2+1) /* Total pages involved in the balance */ + + +#ifndef SQLITE_OMIT_QUICKBALANCE +/* +** This version of balance() handles the common special case where +** a new entry is being inserted on the extreme right-end of the +** tree, in other words, when the new entry will become the largest +** entry in the tree. +** +** Instead of trying to balance the 3 right-most leaf pages, just add +** a new page to the right-hand side and put the one new entry in +** that page. This leaves the right side of the tree somewhat +** unbalanced. But odds are that we will be inserting new entries +** at the end soon afterwards so the nearly empty page will quickly +** fill up. On average. +** +** pPage is the leaf page which is the right-most page in the tree. +** pParent is its parent. pPage must have a single overflow entry +** which is also the right-most entry on the page. +** +** The pSpace buffer is used to store a temporary copy of the divider +** cell that will be inserted into pParent. Such a cell consists of a 4 +** byte page number followed by a variable length integer. In other +** words, at most 13 bytes. Hence the pSpace buffer must be at +** least 13 bytes in size. +*/ +static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){ + BtShared *const pBt = pPage->pBt; /* B-Tree Database */ + MemPage *pNew; /* Newly allocated page */ + int rc; /* Return Code */ + Pgno pgnoNew; /* Page number of pNew */ + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( sqlite3PagerIswriteable(pParent->pDbPage) ); + assert( pPage->nOverflow==1 ); + + /* This error condition is now caught prior to reaching this function */ + if( NEVER(pPage->nCell==0) ) return SQLITE_CORRUPT_BKPT; + + /* Allocate a new page. This page will become the right-sibling of + ** pPage. Make the parent page writable, so that the new divider cell + ** may be inserted. If both these operations are successful, proceed. + */ + rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0); + + if( rc==SQLITE_OK ){ + + u8 *pOut = &pSpace[4]; + u8 *pCell = pPage->apOvfl[0]; + u16 szCell = pPage->xCellSize(pPage, pCell); + u8 *pStop; + + assert( sqlite3PagerIswriteable(pNew->pDbPage) ); + assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) ); + zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF); + rc = rebuildPage(pNew, 1, &pCell, &szCell); + if( NEVER(rc) ) return rc; + pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell; + + /* If this is an auto-vacuum database, update the pointer map + ** with entries for the new page, and any pointer from the + ** cell on the page to an overflow page. If either of these + ** operations fails, the return code is set, but the contents + ** of the parent page are still manipulated by thh code below. + ** That is Ok, at this point the parent page is guaranteed to + ** be marked as dirty. Returning an error code will cause a + ** rollback, undoing any changes made to the parent page. + */ + if( ISAUTOVACUUM ){ + ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno, &rc); + if( szCell>pNew->minLocal ){ + ptrmapPutOvflPtr(pNew, pCell, &rc); + } + } + + /* Create a divider cell to insert into pParent. The divider cell + ** consists of a 4-byte page number (the page number of pPage) and + ** a variable length key value (which must be the same value as the + ** largest key on pPage). + ** + ** To find the largest key value on pPage, first find the right-most + ** cell on pPage. The first two fields of this cell are the + ** record-length (a variable length integer at most 32-bits in size) + ** and the key value (a variable length integer, may have any value). + ** The first of the while(...) loops below skips over the record-length + ** field. The second while(...) loop copies the key value from the + ** cell on pPage into the pSpace buffer. + */ + pCell = findCell(pPage, pPage->nCell-1); + pStop = &pCell[9]; + while( (*(pCell++)&0x80) && pCell<pStop ); + pStop = &pCell[9]; + while( ((*(pOut++) = *(pCell++))&0x80) && pCell<pStop ); + + /* Insert the new divider cell into pParent. */ + insertCell(pParent, pParent->nCell, pSpace, (int)(pOut-pSpace), + 0, pPage->pgno, &rc); + + /* Set the right-child pointer of pParent to point to the new page. */ + put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew); + + /* Release the reference to the new page. */ + releasePage(pNew); + } + + return rc; +} +#endif /* SQLITE_OMIT_QUICKBALANCE */ + +#if 0 +/* +** This function does not contribute anything to the operation of SQLite. +** it is sometimes activated temporarily while debugging code responsible +** for setting pointer-map entries. +*/ +static int ptrmapCheckPages(MemPage **apPage, int nPage){ + int i, j; + for(i=0; i<nPage; i++){ + Pgno n; + u8 e; + MemPage *pPage = apPage[i]; + BtShared *pBt = pPage->pBt; + assert( pPage->isInit ); + + for(j=0; j<pPage->nCell; j++){ + CellInfo info; + u8 *z; + + z = findCell(pPage, j); + pPage->xParseCell(pPage, z, &info); + if( info.iOverflow ){ + Pgno ovfl = get4byte(&z[info.iOverflow]); + ptrmapGet(pBt, ovfl, &e, &n); + assert( n==pPage->pgno && e==PTRMAP_OVERFLOW1 ); + } + if( !pPage->leaf ){ + Pgno child = get4byte(z); + ptrmapGet(pBt, child, &e, &n); + assert( n==pPage->pgno && e==PTRMAP_BTREE ); + } + } + if( !pPage->leaf ){ + Pgno child = get4byte(&pPage->aData[pPage->hdrOffset+8]); + ptrmapGet(pBt, child, &e, &n); + assert( n==pPage->pgno && e==PTRMAP_BTREE ); + } + } + return 1; +} +#endif + +/* +** This function is used to copy the contents of the b-tree node stored +** on page pFrom to page pTo. If page pFrom was not a leaf page, then +** the pointer-map entries for each child page are updated so that the +** parent page stored in the pointer map is page pTo. If pFrom contained +** any cells with overflow page pointers, then the corresponding pointer +** map entries are also updated so that the parent page is page pTo. +** +** If pFrom is currently carrying any overflow cells (entries in the +** MemPage.apOvfl[] array), they are not copied to pTo. +** +** Before returning, page pTo is reinitialized using btreeInitPage(). +** +** The performance of this function is not critical. It is only used by +** the balance_shallower() and balance_deeper() procedures, neither of +** which are called often under normal circumstances. +*/ +static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){ + if( (*pRC)==SQLITE_OK ){ + BtShared * const pBt = pFrom->pBt; + u8 * const aFrom = pFrom->aData; + u8 * const aTo = pTo->aData; + int const iFromHdr = pFrom->hdrOffset; + int const iToHdr = ((pTo->pgno==1) ? 100 : 0); + int rc; + int iData; + + + assert( pFrom->isInit ); + assert( pFrom->nFree>=iToHdr ); + assert( get2byte(&aFrom[iFromHdr+5]) <= (int)pBt->usableSize ); + + /* Copy the b-tree node content from page pFrom to page pTo. */ + iData = get2byte(&aFrom[iFromHdr+5]); + memcpy(&aTo[iData], &aFrom[iData], pBt->usableSize-iData); + memcpy(&aTo[iToHdr], &aFrom[iFromHdr], pFrom->cellOffset + 2*pFrom->nCell); + + /* Reinitialize page pTo so that the contents of the MemPage structure + ** match the new data. The initialization of pTo can actually fail under + ** fairly obscure circumstances, even though it is a copy of initialized + ** page pFrom. + */ + pTo->isInit = 0; + rc = btreeInitPage(pTo); + if( rc!=SQLITE_OK ){ + *pRC = rc; + return; + } + + /* If this is an auto-vacuum database, update the pointer-map entries + ** for any b-tree or overflow pages that pTo now contains the pointers to. + */ + if( ISAUTOVACUUM ){ + *pRC = setChildPtrmaps(pTo); + } + } +} + +/* +** This routine redistributes cells on the iParentIdx'th child of pParent +** (hereafter "the page") and up to 2 siblings so that all pages have about the +** same amount of free space. Usually a single sibling on either side of the +** page are used in the balancing, though both siblings might come from one +** side if the page is the first or last child of its parent. If the page +** has fewer than 2 siblings (something which can only happen if the page +** is a root page or a child of a root page) then all available siblings +** participate in the balancing. +** +** The number of siblings of the page might be increased or decreased by +** one or two in an effort to keep pages nearly full but not over full. +** +** Note that when this routine is called, some of the cells on the page +** might not actually be stored in MemPage.aData[]. This can happen +** if the page is overfull. This routine ensures that all cells allocated +** to the page and its siblings fit into MemPage.aData[] before returning. +** +** In the course of balancing the page and its siblings, cells may be +** inserted into or removed from the parent page (pParent). Doing so +** may cause the parent page to become overfull or underfull. If this +** happens, it is the responsibility of the caller to invoke the correct +** balancing routine to fix this problem (see the balance() routine). +** +** If this routine fails for any reason, it might leave the database +** in a corrupted state. So if this routine fails, the database should +** be rolled back. +** +** The third argument to this function, aOvflSpace, is a pointer to a +** buffer big enough to hold one page. If while inserting cells into the parent +** page (pParent) the parent page becomes overfull, this buffer is +** used to store the parent's overflow cells. Because this function inserts +** a maximum of four divider cells into the parent page, and the maximum +** size of a cell stored within an internal node is always less than 1/4 +** of the page-size, the aOvflSpace[] buffer is guaranteed to be large +** enough for all overflow cells. +** +** If aOvflSpace is set to a null pointer, this function returns +** SQLITE_NOMEM. +*/ +#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) +#pragma optimize("", off) +#endif +static int balance_nonroot( + MemPage *pParent, /* Parent page of siblings being balanced */ + int iParentIdx, /* Index of "the page" in pParent */ + u8 *aOvflSpace, /* page-size bytes of space for parent ovfl */ + int isRoot, /* True if pParent is a root-page */ + int bBulk /* True if this call is part of a bulk load */ +){ + BtShared *pBt; /* The whole database */ + int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ + int nNew = 0; /* Number of pages in apNew[] */ + int nOld; /* Number of pages in apOld[] */ + int i, j, k; /* Loop counters */ + int nxDiv; /* Next divider slot in pParent->aCell[] */ + int rc = SQLITE_OK; /* The return code */ + u16 leafCorrection; /* 4 if pPage is a leaf. 0 if not */ + int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ + int usableSpace; /* Bytes in pPage beyond the header */ + int pageFlags; /* Value of pPage->aData[0] */ + int iSpace1 = 0; /* First unused byte of aSpace1[] */ + int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */ + int szScratch; /* Size of scratch memory requested */ + MemPage *apOld[NB]; /* pPage and up to two siblings */ + MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ + u8 *pRight; /* Location in parent of right-sibling pointer */ + u8 *apDiv[NB-1]; /* Divider cells in pParent */ + int cntNew[NB+2]; /* Index in b.paCell[] of cell after i-th page */ + int cntOld[NB+2]; /* Old index in b.apCell[] */ + int szNew[NB+2]; /* Combined size of cells placed on i-th page */ + u8 *aSpace1; /* Space for copies of dividers cells */ + Pgno pgno; /* Temp var to store a page number in */ + u8 abDone[NB+2]; /* True after i'th new page is populated */ + Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */ + Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */ + u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */ + CellArray b; /* Parsed information on cells being balanced */ + + memset(abDone, 0, sizeof(abDone)); + b.nCell = 0; + b.apCell = 0; + pBt = pParent->pBt; + assert( sqlite3_mutex_held(pBt->mutex) ); + assert( sqlite3PagerIswriteable(pParent->pDbPage) ); + +#if 0 + TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno)); +#endif + + /* At this point pParent may have at most one overflow cell. And if + ** this overflow cell is present, it must be the cell with + ** index iParentIdx. This scenario comes about when this function + ** is called (indirectly) from sqlite3BtreeDelete(). + */ + assert( pParent->nOverflow==0 || pParent->nOverflow==1 ); + assert( pParent->nOverflow==0 || pParent->aiOvfl[0]==iParentIdx ); + + if( !aOvflSpace ){ + return SQLITE_NOMEM; + } + + /* Find the sibling pages to balance. Also locate the cells in pParent + ** that divide the siblings. An attempt is made to find NN siblings on + ** either side of pPage. More siblings are taken from one side, however, + ** if there are fewer than NN siblings on the other side. If pParent + ** has NB or fewer children then all children of pParent are taken. + ** + ** This loop also drops the divider cells from the parent page. This + ** way, the remainder of the function does not have to deal with any + ** overflow cells in the parent page, since if any existed they will + ** have already been removed. + */ + i = pParent->nOverflow + pParent->nCell; + if( i<2 ){ + nxDiv = 0; + }else{ + assert( bBulk==0 || bBulk==1 ); + if( iParentIdx==0 ){ + nxDiv = 0; + }else if( iParentIdx==i ){ + nxDiv = i-2+bBulk; + }else{ + nxDiv = iParentIdx-1; + } + i = 2-bBulk; + } + nOld = i+1; + if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){ + pRight = &pParent->aData[pParent->hdrOffset+8]; + }else{ + pRight = findCell(pParent, i+nxDiv-pParent->nOverflow); + } + pgno = get4byte(pRight); + while( 1 ){ + rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0); + if( rc ){ + memset(apOld, 0, (i+1)*sizeof(MemPage*)); + goto balance_cleanup; + } + nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow; + if( (i--)==0 ) break; + + if( i+nxDiv==pParent->aiOvfl[0] && pParent->nOverflow ){ + apDiv[i] = pParent->apOvfl[0]; + pgno = get4byte(apDiv[i]); + szNew[i] = pParent->xCellSize(pParent, apDiv[i]); + pParent->nOverflow = 0; + }else{ + apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow); + pgno = get4byte(apDiv[i]); + szNew[i] = pParent->xCellSize(pParent, apDiv[i]); + + /* Drop the cell from the parent page. apDiv[i] still points to + ** the cell within the parent, even though it has been dropped. + ** This is safe because dropping a cell only overwrites the first + ** four bytes of it, and this function does not need the first + ** four bytes of the divider cell. So the pointer is safe to use + ** later on. + ** + ** But not if we are in secure-delete mode. In secure-delete mode, + ** the dropCell() routine will overwrite the entire cell with zeroes. + ** In this case, temporarily copy the cell into the aOvflSpace[] + ** buffer. It will be copied out again as soon as the aSpace[] buffer + ** is allocated. */ + if( pBt->btsFlags & BTS_SECURE_DELETE ){ + int iOff; + + iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData); + if( (iOff+szNew[i])>(int)pBt->usableSize ){ + rc = SQLITE_CORRUPT_BKPT; + memset(apOld, 0, (i+1)*sizeof(MemPage*)); + goto balance_cleanup; + }else{ + memcpy(&aOvflSpace[iOff], apDiv[i], szNew[i]); + apDiv[i] = &aOvflSpace[apDiv[i]-pParent->aData]; + } + } + dropCell(pParent, i+nxDiv-pParent->nOverflow, szNew[i], &rc); + } + } + + /* Make nMaxCells a multiple of 4 in order to preserve 8-byte + ** alignment */ + nMaxCells = (nMaxCells + 3)&~3; + + /* + ** Allocate space for memory structures + */ + szScratch = + nMaxCells*sizeof(u8*) /* b.apCell */ + + nMaxCells*sizeof(u16) /* b.szCell */ + + pBt->pageSize; /* aSpace1 */ + + /* EVIDENCE-OF: R-28375-38319 SQLite will never request a scratch buffer + ** that is more than 6 times the database page size. */ + assert( szScratch<=6*(int)pBt->pageSize ); + b.apCell = sqlite3ScratchMalloc( szScratch ); + if( b.apCell==0 ){ + rc = SQLITE_NOMEM; + goto balance_cleanup; + } + b.szCell = (u16*)&b.apCell[nMaxCells]; + aSpace1 = (u8*)&b.szCell[nMaxCells]; + assert( EIGHT_BYTE_ALIGNMENT(aSpace1) ); + + /* + ** Load pointers to all cells on sibling pages and the divider cells + ** into the local b.apCell[] array. Make copies of the divider cells + ** into space obtained from aSpace1[]. The divider cells have already + ** been removed from pParent. + ** + ** If the siblings are on leaf pages, then the child pointers of the + ** divider cells are stripped from the cells before they are copied + ** into aSpace1[]. In this way, all cells in b.apCell[] are without + ** child pointers. If siblings are not leaves, then all cell in + ** b.apCell[] include child pointers. Either way, all cells in b.apCell[] + ** are alike. + ** + ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. + ** leafData: 1 if pPage holds key+data and pParent holds only keys. + */ + b.pRef = apOld[0]; + leafCorrection = b.pRef->leaf*4; + leafData = b.pRef->intKeyLeaf; + for(i=0; i<nOld; i++){ + MemPage *pOld = apOld[i]; + int limit = pOld->nCell; + u8 *aData = pOld->aData; + u16 maskPage = pOld->maskPage; + u8 *piCell = aData + pOld->cellOffset; + u8 *piEnd; + + /* Verify that all sibling pages are of the same "type" (table-leaf, + ** table-interior, index-leaf, or index-interior). + */ + if( pOld->aData[0]!=apOld[0]->aData[0] ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } + + /* Load b.apCell[] with pointers to all cells in pOld. If pOld + ** constains overflow cells, include them in the b.apCell[] array + ** in the correct spot. + ** + ** Note that when there are multiple overflow cells, it is always the + ** case that they are sequential and adjacent. This invariant arises + ** because multiple overflows can only occurs when inserting divider + ** cells into a parent on a prior balance, and divider cells are always + ** adjacent and are inserted in order. There is an assert() tagged + ** with "NOTE 1" in the overflow cell insertion loop to prove this + ** invariant. + ** + ** This must be done in advance. Once the balance starts, the cell + ** offset section of the btree page will be overwritten and we will no + ** long be able to find the cells if a pointer to each cell is not saved + ** first. + */ + memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit); + if( pOld->nOverflow>0 ){ + memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow); + limit = pOld->aiOvfl[0]; + for(j=0; j<limit; j++){ + b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell)); + piCell += 2; + b.nCell++; + } + for(k=0; k<pOld->nOverflow; k++){ + assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */ + b.apCell[b.nCell] = pOld->apOvfl[k]; + b.nCell++; + } + } + piEnd = aData + pOld->cellOffset + 2*pOld->nCell; + while( piCell<piEnd ){ + assert( b.nCell<nMaxCells ); + b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell)); + piCell += 2; + b.nCell++; + } + + cntOld[i] = b.nCell; + if( i<nOld-1 && !leafData){ + u16 sz = (u16)szNew[i]; + u8 *pTemp; + assert( b.nCell<nMaxCells ); + b.szCell[b.nCell] = sz; + pTemp = &aSpace1[iSpace1]; + iSpace1 += sz; + assert( sz<=pBt->maxLocal+23 ); + assert( iSpace1 <= (int)pBt->pageSize ); + memcpy(pTemp, apDiv[i], sz); + b.apCell[b.nCell] = pTemp+leafCorrection; + assert( leafCorrection==0 || leafCorrection==4 ); + b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection; + if( !pOld->leaf ){ + assert( leafCorrection==0 ); + assert( pOld->hdrOffset==0 ); + /* The right pointer of the child page pOld becomes the left + ** pointer of the divider cell */ + memcpy(b.apCell[b.nCell], &pOld->aData[8], 4); + }else{ + assert( leafCorrection==4 ); + while( b.szCell[b.nCell]<4 ){ + /* Do not allow any cells smaller than 4 bytes. If a smaller cell + ** does exist, pad it with 0x00 bytes. */ + assert( b.szCell[b.nCell]==3 || CORRUPT_DB ); + assert( b.apCell[b.nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB ); + aSpace1[iSpace1++] = 0x00; + b.szCell[b.nCell]++; + } + } + b.nCell++; + } + } + + /* + ** Figure out the number of pages needed to hold all b.nCell cells. + ** Store this number in "k". Also compute szNew[] which is the total + ** size of all cells on the i-th page and cntNew[] which is the index + ** in b.apCell[] of the cell that divides page i from page i+1. + ** cntNew[k] should equal b.nCell. + ** + ** Values computed by this block: + ** + ** k: The total number of sibling pages + ** szNew[i]: Spaced used on the i-th sibling page. + ** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to + ** the right of the i-th sibling page. + ** usableSpace: Number of bytes of space available on each sibling. + ** + */ + usableSpace = pBt->usableSize - 12 + leafCorrection; + for(i=0; i<nOld; i++){ + MemPage *p = apOld[i]; + szNew[i] = usableSpace - p->nFree; + if( szNew[i]<0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } + for(j=0; j<p->nOverflow; j++){ + szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]); + } + cntNew[i] = cntOld[i]; + } + k = nOld; + for(i=0; i<k; i++){ + int sz; + while( szNew[i]>usableSpace ){ + if( i+1>=k ){ + k = i+2; + if( k>NB+2 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } + szNew[k-1] = 0; + cntNew[k-1] = b.nCell; + } + sz = 2 + cachedCellSize(&b, cntNew[i]-1); + szNew[i] -= sz; + if( !leafData ){ + if( cntNew[i]<b.nCell ){ + sz = 2 + cachedCellSize(&b, cntNew[i]); + }else{ + sz = 0; + } + } + szNew[i+1] += sz; + cntNew[i]--; + } + while( cntNew[i]<b.nCell ){ + sz = 2 + cachedCellSize(&b, cntNew[i]); + if( szNew[i]+sz>usableSpace ) break; + szNew[i] += sz; + cntNew[i]++; + if( !leafData ){ + if( cntNew[i]<b.nCell ){ + sz = 2 + cachedCellSize(&b, cntNew[i]); + }else{ + sz = 0; + } + } + szNew[i+1] -= sz; + } + if( cntNew[i]>=b.nCell ){ + k = i+1; + }else if( cntNew[i] <= (i>0 ? cntNew[i-1] : 0) ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } + } + + /* + ** The packing computed by the previous block is biased toward the siblings + ** on the left side (siblings with smaller keys). The left siblings are + ** always nearly full, while the right-most sibling might be nearly empty. + ** The next block of code attempts to adjust the packing of siblings to + ** get a better balance. + ** + ** This adjustment is more than an optimization. The packing above might + ** be so out of balance as to be illegal. For example, the right-most + ** sibling might be completely empty. This adjustment is not optional. + */ + for(i=k-1; i>0; i--){ + int szRight = szNew[i]; /* Size of sibling on the right */ + int szLeft = szNew[i-1]; /* Size of sibling on the left */ + int r; /* Index of right-most cell in left sibling */ + int d; /* Index of first cell to the left of right sibling */ + + r = cntNew[i-1] - 1; + d = r + 1 - leafData; + (void)cachedCellSize(&b, d); + do{ + assert( d<nMaxCells ); + assert( r<nMaxCells ); + (void)cachedCellSize(&b, r); + if( szRight!=0 + && (bBulk || szRight+b.szCell[d]+2 > szLeft-(b.szCell[r]+2)) ){ + break; + } + szRight += b.szCell[d] + 2; + szLeft -= b.szCell[r] + 2; + cntNew[i-1] = r; + r--; + d--; + }while( r>=0 ); + szNew[i] = szRight; + szNew[i-1] = szLeft; + if( cntNew[i-1] <= (i>1 ? cntNew[i-2] : 0) ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } + } + + /* Sanity check: For a non-corrupt database file one of the follwing + ** must be true: + ** (1) We found one or more cells (cntNew[0])>0), or + ** (2) pPage is a virtual root page. A virtual root page is when + ** the real root page is page 1 and we are the only child of + ** that page. + */ + assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) || CORRUPT_DB); + TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n", + apOld[0]->pgno, apOld[0]->nCell, + nOld>=2 ? apOld[1]->pgno : 0, nOld>=2 ? apOld[1]->nCell : 0, + nOld>=3 ? apOld[2]->pgno : 0, nOld>=3 ? apOld[2]->nCell : 0 + )); + + /* + ** Allocate k new pages. Reuse old pages where possible. + */ + pageFlags = apOld[0]->aData[0]; + for(i=0; i<k; i++){ + MemPage *pNew; + if( i<nOld ){ + pNew = apNew[i] = apOld[i]; + apOld[i] = 0; + rc = sqlite3PagerWrite(pNew->pDbPage); + nNew++; + if( rc ) goto balance_cleanup; + }else{ + assert( i>0 ); + rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0); + if( rc ) goto balance_cleanup; + zeroPage(pNew, pageFlags); + apNew[i] = pNew; + nNew++; + cntOld[i] = b.nCell; + + /* Set the pointer-map entry for the new sibling page. */ + if( ISAUTOVACUUM ){ + ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc); + if( rc!=SQLITE_OK ){ + goto balance_cleanup; + } + } + } + } + + /* + ** Reassign page numbers so that the new pages are in ascending order. + ** This helps to keep entries in the disk file in order so that a scan + ** of the table is closer to a linear scan through the file. That in turn + ** helps the operating system to deliver pages from the disk more rapidly. + ** + ** An O(n^2) insertion sort algorithm is used, but since n is never more + ** than (NB+2) (a small constant), that should not be a problem. + ** + ** When NB==3, this one optimization makes the database about 25% faster + ** for large insertions and deletions. + */ + for(i=0; i<nNew; i++){ + aPgOrder[i] = aPgno[i] = apNew[i]->pgno; + aPgFlags[i] = apNew[i]->pDbPage->flags; + for(j=0; j<i; j++){ + if( aPgno[j]==aPgno[i] ){ + /* This branch is taken if the set of sibling pages somehow contains + ** duplicate entries. This can happen if the database is corrupt. + ** It would be simpler to detect this as part of the loop below, but + ** we do the detection here in order to avoid populating the pager + ** cache with two separate objects associated with the same + ** page number. */ + assert( CORRUPT_DB ); + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } + } + } + for(i=0; i<nNew; i++){ + int iBest = 0; /* aPgno[] index of page number to use */ + for(j=1; j<nNew; j++){ + if( aPgOrder[j]<aPgOrder[iBest] ) iBest = j; + } + pgno = aPgOrder[iBest]; + aPgOrder[iBest] = 0xffffffff; + if( iBest!=i ){ + if( iBest>i ){ + sqlite3PagerRekey(apNew[iBest]->pDbPage, pBt->nPage+iBest+1, 0); + } + sqlite3PagerRekey(apNew[i]->pDbPage, pgno, aPgFlags[iBest]); + apNew[i]->pgno = pgno; + } + } + + TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) " + "%d(%d nc=%d) %d(%d nc=%d)\n", + apNew[0]->pgno, szNew[0], cntNew[0], + nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0, + nNew>=2 ? cntNew[1] - cntNew[0] - !leafData : 0, + nNew>=3 ? apNew[2]->pgno : 0, nNew>=3 ? szNew[2] : 0, + nNew>=3 ? cntNew[2] - cntNew[1] - !leafData : 0, + nNew>=4 ? apNew[3]->pgno : 0, nNew>=4 ? szNew[3] : 0, + nNew>=4 ? cntNew[3] - cntNew[2] - !leafData : 0, + nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0, + nNew>=5 ? cntNew[4] - cntNew[3] - !leafData : 0 + )); + + assert( sqlite3PagerIswriteable(pParent->pDbPage) ); + put4byte(pRight, apNew[nNew-1]->pgno); + + /* If the sibling pages are not leaves, ensure that the right-child pointer + ** of the right-most new sibling page is set to the value that was + ** originally in the same field of the right-most old sibling page. */ + if( (pageFlags & PTF_LEAF)==0 && nOld!=nNew ){ + MemPage *pOld = (nNew>nOld ? apNew : apOld)[nOld-1]; + memcpy(&apNew[nNew-1]->aData[8], &pOld->aData[8], 4); + } + + /* Make any required updates to pointer map entries associated with + ** cells stored on sibling pages following the balance operation. Pointer + ** map entries associated with divider cells are set by the insertCell() + ** routine. The associated pointer map entries are: + ** + ** a) if the cell contains a reference to an overflow chain, the + ** entry associated with the first page in the overflow chain, and + ** + ** b) if the sibling pages are not leaves, the child page associated + ** with the cell. + ** + ** If the sibling pages are not leaves, then the pointer map entry + ** associated with the right-child of each sibling may also need to be + ** updated. This happens below, after the sibling pages have been + ** populated, not here. + */ + if( ISAUTOVACUUM ){ + MemPage *pNew = apNew[0]; + u8 *aOld = pNew->aData; + int cntOldNext = pNew->nCell + pNew->nOverflow; + int usableSize = pBt->usableSize; + int iNew = 0; + int iOld = 0; + + for(i=0; i<b.nCell; i++){ + u8 *pCell = b.apCell[i]; + if( i==cntOldNext ){ + MemPage *pOld = (++iOld)<nNew ? apNew[iOld] : apOld[iOld]; + cntOldNext += pOld->nCell + pOld->nOverflow + !leafData; + aOld = pOld->aData; + } + if( i==cntNew[iNew] ){ + pNew = apNew[++iNew]; + if( !leafData ) continue; + } + + /* Cell pCell is destined for new sibling page pNew. Originally, it + ** was either part of sibling page iOld (possibly an overflow cell), + ** or else the divider cell to the left of sibling page iOld. So, + ** if sibling page iOld had the same page number as pNew, and if + ** pCell really was a part of sibling page iOld (not a divider or + ** overflow cell), we can skip updating the pointer map entries. */ + if( iOld>=nNew + || pNew->pgno!=aPgno[iOld] + || pCell<aOld + || pCell>=&aOld[usableSize] + ){ + if( !leafCorrection ){ + ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc); + } + if( cachedCellSize(&b,i)>pNew->minLocal ){ + ptrmapPutOvflPtr(pNew, pCell, &rc); + } + if( rc ) goto balance_cleanup; + } + } + } + + /* Insert new divider cells into pParent. */ + for(i=0; i<nNew-1; i++){ + u8 *pCell; + u8 *pTemp; + int sz; + MemPage *pNew = apNew[i]; + j = cntNew[i]; + + assert( j<nMaxCells ); + assert( b.apCell[j]!=0 ); + pCell = b.apCell[j]; + sz = b.szCell[j] + leafCorrection; + pTemp = &aOvflSpace[iOvflSpace]; + if( !pNew->leaf ){ + memcpy(&pNew->aData[8], pCell, 4); + }else if( leafData ){ + /* If the tree is a leaf-data tree, and the siblings are leaves, + ** then there is no divider cell in b.apCell[]. Instead, the divider + ** cell consists of the integer key for the right-most cell of + ** the sibling-page assembled above only. + */ + CellInfo info; + j--; + pNew->xParseCell(pNew, b.apCell[j], &info); + pCell = pTemp; + sz = 4 + putVarint(&pCell[4], info.nKey); + pTemp = 0; + }else{ + pCell -= 4; + /* Obscure case for non-leaf-data trees: If the cell at pCell was + ** previously stored on a leaf node, and its reported size was 4 + ** bytes, then it may actually be smaller than this + ** (see btreeParseCellPtr(), 4 bytes is the minimum size of + ** any cell). But it is important to pass the correct size to + ** insertCell(), so reparse the cell now. + ** + ** Note that this can never happen in an SQLite data file, as all + ** cells are at least 4 bytes. It only happens in b-trees used + ** to evaluate "IN (SELECT ...)" and similar clauses. + */ + if( b.szCell[j]==4 ){ + assert(leafCorrection==4); + sz = pParent->xCellSize(pParent, pCell); + } + } + iOvflSpace += sz; + assert( sz<=pBt->maxLocal+23 ); + assert( iOvflSpace <= (int)pBt->pageSize ); + insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc); + if( rc!=SQLITE_OK ) goto balance_cleanup; + assert( sqlite3PagerIswriteable(pParent->pDbPage) ); + } + + /* Now update the actual sibling pages. The order in which they are updated + ** is important, as this code needs to avoid disrupting any page from which + ** cells may still to be read. In practice, this means: + ** + ** (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1]) + ** then it is not safe to update page apNew[iPg] until after + ** the left-hand sibling apNew[iPg-1] has been updated. + ** + ** (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1]) + ** then it is not safe to update page apNew[iPg] until after + ** the right-hand sibling apNew[iPg+1] has been updated. + ** + ** If neither of the above apply, the page is safe to update. + ** + ** The iPg value in the following loop starts at nNew-1 goes down + ** to 0, then back up to nNew-1 again, thus making two passes over + ** the pages. On the initial downward pass, only condition (1) above + ** needs to be tested because (2) will always be true from the previous + ** step. On the upward pass, both conditions are always true, so the + ** upwards pass simply processes pages that were missed on the downward + ** pass. + */ + for(i=1-nNew; i<nNew; i++){ + int iPg = i<0 ? -i : i; + assert( iPg>=0 && iPg<nNew ); + if( abDone[iPg] ) continue; /* Skip pages already processed */ + if( i>=0 /* On the upwards pass, or... */ + || cntOld[iPg-1]>=cntNew[iPg-1] /* Condition (1) is true */ + ){ + int iNew; + int iOld; + int nNewCell; + + /* Verify condition (1): If cells are moving left, update iPg + ** only after iPg-1 has already been updated. */ + assert( iPg==0 || cntOld[iPg-1]>=cntNew[iPg-1] || abDone[iPg-1] ); + + /* Verify condition (2): If cells are moving right, update iPg + ** only after iPg+1 has already been updated. */ + assert( cntNew[iPg]>=cntOld[iPg] || abDone[iPg+1] ); + + if( iPg==0 ){ + iNew = iOld = 0; + nNewCell = cntNew[0]; + }else{ + iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : b.nCell; + iNew = cntNew[iPg-1] + !leafData; + nNewCell = cntNew[iPg] - iNew; + } + + rc = editPage(apNew[iPg], iOld, iNew, nNewCell, &b); + if( rc ) goto balance_cleanup; + abDone[iPg]++; + apNew[iPg]->nFree = usableSpace-szNew[iPg]; + assert( apNew[iPg]->nOverflow==0 ); + assert( apNew[iPg]->nCell==nNewCell ); + } + } + + /* All pages have been processed exactly once */ + assert( memcmp(abDone, "\01\01\01\01\01", nNew)==0 ); + + assert( nOld>0 ); + assert( nNew>0 ); + + if( isRoot && pParent->nCell==0 && pParent->hdrOffset<=apNew[0]->nFree ){ + /* The root page of the b-tree now contains no cells. The only sibling + ** page is the right-child of the parent. Copy the contents of the + ** child page into the parent, decreasing the overall height of the + ** b-tree structure by one. This is described as the "balance-shallower" + ** sub-algorithm in some documentation. + ** + ** If this is an auto-vacuum database, the call to copyNodeContent() + ** sets all pointer-map entries corresponding to database image pages + ** for which the pointer is stored within the content being copied. + ** + ** It is critical that the child page be defragmented before being + ** copied into the parent, because if the parent is page 1 then it will + ** by smaller than the child due to the database header, and so all the + ** free space needs to be up front. + */ + assert( nNew==1 || CORRUPT_DB ); + rc = defragmentPage(apNew[0]); + testcase( rc!=SQLITE_OK ); + assert( apNew[0]->nFree == + (get2byte(&apNew[0]->aData[5])-apNew[0]->cellOffset-apNew[0]->nCell*2) + || rc!=SQLITE_OK + ); + copyNodeContent(apNew[0], pParent, &rc); + freePage(apNew[0], &rc); + }else if( ISAUTOVACUUM && !leafCorrection ){ + /* Fix the pointer map entries associated with the right-child of each + ** sibling page. All other pointer map entries have already been taken + ** care of. */ + for(i=0; i<nNew; i++){ + u32 key = get4byte(&apNew[i]->aData[8]); + ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc); + } + } + + assert( pParent->isInit ); + TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n", + nOld, nNew, b.nCell)); + + /* Free any old pages that were not reused as new pages. + */ + for(i=nNew; i<nOld; i++){ + freePage(apOld[i], &rc); + } + +#if 0 + if( ISAUTOVACUUM && rc==SQLITE_OK && apNew[0]->isInit ){ + /* The ptrmapCheckPages() contains assert() statements that verify that + ** all pointer map pages are set correctly. This is helpful while + ** debugging. This is usually disabled because a corrupt database may + ** cause an assert() statement to fail. */ + ptrmapCheckPages(apNew, nNew); + ptrmapCheckPages(&pParent, 1); + } +#endif + + /* + ** Cleanup before returning. + */ +balance_cleanup: + sqlite3ScratchFree(b.apCell); + for(i=0; i<nOld; i++){ + releasePage(apOld[i]); + } + for(i=0; i<nNew; i++){ + releasePage(apNew[i]); + } + + return rc; +} +#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) +#pragma optimize("", on) +#endif + + +/* +** This function is called when the root page of a b-tree structure is +** overfull (has one or more overflow pages). +** +** A new child page is allocated and the contents of the current root +** page, including overflow cells, are copied into the child. The root +** page is then overwritten to make it an empty page with the right-child +** pointer pointing to the new page. +** +** Before returning, all pointer-map entries corresponding to pages +** that the new child-page now contains pointers to are updated. The +** entry corresponding to the new right-child pointer of the root +** page is also updated. +** +** If successful, *ppChild is set to contain a reference to the child +** page and SQLITE_OK is returned. In this case the caller is required +** to call releasePage() on *ppChild exactly once. If an error occurs, +** an error code is returned and *ppChild is set to 0. +*/ +static int balance_deeper(MemPage *pRoot, MemPage **ppChild){ + int rc; /* Return value from subprocedures */ + MemPage *pChild = 0; /* Pointer to a new child page */ + Pgno pgnoChild = 0; /* Page number of the new child page */ + BtShared *pBt = pRoot->pBt; /* The BTree */ + + assert( pRoot->nOverflow>0 ); + assert( sqlite3_mutex_held(pBt->mutex) ); + + /* Make pRoot, the root page of the b-tree, writable. Allocate a new + ** page that will become the new right-child of pPage. Copy the contents + ** of the node stored on pRoot into the new child page. + */ + rc = sqlite3PagerWrite(pRoot->pDbPage); + if( rc==SQLITE_OK ){ + rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0); + copyNodeContent(pRoot, pChild, &rc); + if( ISAUTOVACUUM ){ + ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc); + } + } + if( rc ){ + *ppChild = 0; + releasePage(pChild); + return rc; + } + assert( sqlite3PagerIswriteable(pChild->pDbPage) ); + assert( sqlite3PagerIswriteable(pRoot->pDbPage) ); + assert( pChild->nCell==pRoot->nCell ); + + TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno)); + + /* Copy the overflow cells from pRoot to pChild */ + memcpy(pChild->aiOvfl, pRoot->aiOvfl, + pRoot->nOverflow*sizeof(pRoot->aiOvfl[0])); + memcpy(pChild->apOvfl, pRoot->apOvfl, + pRoot->nOverflow*sizeof(pRoot->apOvfl[0])); + pChild->nOverflow = pRoot->nOverflow; + + /* Zero the contents of pRoot. Then install pChild as the right-child. */ + zeroPage(pRoot, pChild->aData[0] & ~PTF_LEAF); + put4byte(&pRoot->aData[pRoot->hdrOffset+8], pgnoChild); + + *ppChild = pChild; + return SQLITE_OK; +} + +/* +** The page that pCur currently points to has just been modified in +** some way. This function figures out if this modification means the +** tree needs to be balanced, and if so calls the appropriate balancing +** routine. Balancing routines are: +** +** balance_quick() +** balance_deeper() +** balance_nonroot() +*/ +static int balance(BtCursor *pCur){ + int rc = SQLITE_OK; + const int nMin = pCur->pBt->usableSize * 2 / 3; + u8 aBalanceQuickSpace[13]; + u8 *pFree = 0; + + TESTONLY( int balance_quick_called = 0 ); + TESTONLY( int balance_deeper_called = 0 ); + + do { + int iPage = pCur->iPage; + MemPage *pPage = pCur->apPage[iPage]; + + if( iPage==0 ){ + if( pPage->nOverflow ){ + /* The root page of the b-tree is overfull. In this case call the + ** balance_deeper() function to create a new child for the root-page + ** and copy the current contents of the root-page to it. The + ** next iteration of the do-loop will balance the child page. + */ + assert( (balance_deeper_called++)==0 ); + rc = balance_deeper(pPage, &pCur->apPage[1]); + if( rc==SQLITE_OK ){ + pCur->iPage = 1; + pCur->aiIdx[0] = 0; + pCur->aiIdx[1] = 0; + assert( pCur->apPage[1]->nOverflow ); + } + }else{ + break; + } + }else if( pPage->nOverflow==0 && pPage->nFree<=nMin ){ + break; + }else{ + MemPage * const pParent = pCur->apPage[iPage-1]; + int const iIdx = pCur->aiIdx[iPage-1]; + + rc = sqlite3PagerWrite(pParent->pDbPage); + if( rc==SQLITE_OK ){ +#ifndef SQLITE_OMIT_QUICKBALANCE + if( pPage->intKeyLeaf + && pPage->nOverflow==1 + && pPage->aiOvfl[0]==pPage->nCell + && pParent->pgno!=1 + && pParent->nCell==iIdx + ){ + /* Call balance_quick() to create a new sibling of pPage on which + ** to store the overflow cell. balance_quick() inserts a new cell + ** into pParent, which may cause pParent overflow. If this + ** happens, the next iteration of the do-loop will balance pParent + ** use either balance_nonroot() or balance_deeper(). Until this + ** happens, the overflow cell is stored in the aBalanceQuickSpace[] + ** buffer. + ** + ** The purpose of the following assert() is to check that only a + ** single call to balance_quick() is made for each call to this + ** function. If this were not verified, a subtle bug involving reuse + ** of the aBalanceQuickSpace[] might sneak in. + */ + assert( (balance_quick_called++)==0 ); + rc = balance_quick(pParent, pPage, aBalanceQuickSpace); + }else +#endif + { + /* In this case, call balance_nonroot() to redistribute cells + ** between pPage and up to 2 of its sibling pages. This involves + ** modifying the contents of pParent, which may cause pParent to + ** become overfull or underfull. The next iteration of the do-loop + ** will balance the parent page to correct this. + ** + ** If the parent page becomes overfull, the overflow cell or cells + ** are stored in the pSpace buffer allocated immediately below. + ** A subsequent iteration of the do-loop will deal with this by + ** calling balance_nonroot() (balance_deeper() may be called first, + ** but it doesn't deal with overflow cells - just moves them to a + ** different page). Once this subsequent call to balance_nonroot() + ** has completed, it is safe to release the pSpace buffer used by + ** the previous call, as the overflow cell data will have been + ** copied either into the body of a database page or into the new + ** pSpace buffer passed to the latter call to balance_nonroot(). + */ + u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize); + rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1, + pCur->hints&BTREE_BULKLOAD); + if( pFree ){ + /* If pFree is not NULL, it points to the pSpace buffer used + ** by a previous call to balance_nonroot(). Its contents are + ** now stored either on real database pages or within the + ** new pSpace buffer, so it may be safely freed here. */ + sqlite3PageFree(pFree); + } + + /* The pSpace buffer will be freed after the next call to + ** balance_nonroot(), or just before this function returns, whichever + ** comes first. */ + pFree = pSpace; + } + } + + pPage->nOverflow = 0; + + /* The next iteration of the do-loop balances the parent page. */ + releasePage(pPage); + pCur->iPage--; + assert( pCur->iPage>=0 ); + } + }while( rc==SQLITE_OK ); + + if( pFree ){ + sqlite3PageFree(pFree); + } + return rc; +} + + +/* +** Insert a new record into the BTree. The key is given by (pKey,nKey) +** and the data is given by (pData,nData). The cursor is used only to +** define what table the record should be inserted into. The cursor +** is left pointing at a random location. +** +** For an INTKEY table, only the nKey value of the key is used. pKey is +** ignored. For a ZERODATA table, the pData and nData are both ignored. +** +** If the seekResult parameter is non-zero, then a successful call to +** MovetoUnpacked() to seek cursor pCur to (pKey, nKey) has already +** been performed. seekResult is the search result returned (a negative +** number if pCur points at an entry that is smaller than (pKey, nKey), or +** a positive value if pCur points at an entry that is larger than +** (pKey, nKey)). +** +** If the seekResult parameter is non-zero, then the caller guarantees that +** cursor pCur is pointing at the existing copy of a row that is to be +** overwritten. If the seekResult parameter is 0, then cursor pCur may +** point to any entry or to no entry at all and so this function has to seek +** the cursor before the new key can be inserted. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeInsert( + BtCursor *pCur, /* Insert data into the table of this cursor */ + const void *pKey, i64 nKey, /* The key of the new record */ + const void *pData, int nData, /* The data of the new record */ + int nZero, /* Number of extra 0 bytes to append to data */ + int appendBias, /* True if this is likely an append */ + int seekResult /* Result of prior MovetoUnpacked() call */ +){ + int rc; + int loc = seekResult; /* -1: before desired location +1: after */ + int szNew = 0; + int idx; + MemPage *pPage; + Btree *p = pCur->pBtree; + BtShared *pBt = p->pBt; + unsigned char *oldCell; + unsigned char *newCell = 0; + + if( pCur->eState==CURSOR_FAULT ){ + assert( pCur->skipNext!=SQLITE_OK ); + return pCur->skipNext; + } + + assert( cursorHoldsMutex(pCur) ); + assert( (pCur->curFlags & BTCF_WriteFlag)!=0 + && pBt->inTransaction==TRANS_WRITE + && (pBt->btsFlags & BTS_READ_ONLY)==0 ); + assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); + + /* Assert that the caller has been consistent. If this cursor was opened + ** expecting an index b-tree, then the caller should be inserting blob + ** keys with no associated data. If the cursor was opened expecting an + ** intkey table, the caller should be inserting integer keys with a + ** blob of associated data. */ + assert( (pKey==0)==(pCur->pKeyInfo==0) ); + + /* Save the positions of any other cursors open on this table. + ** + ** In some cases, the call to btreeMoveto() below is a no-op. For + ** example, when inserting data into a table with auto-generated integer + ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the + ** integer key to use. It then calls this function to actually insert the + ** data into the intkey B-Tree. In this case btreeMoveto() recognizes + ** that the cursor is already where it needs to be and returns without + ** doing any work. To avoid thwarting these optimizations, it is important + ** not to clear the cursor here. + */ + if( pCur->curFlags & BTCF_Multiple ){ + rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); + if( rc ) return rc; + } + + if( pCur->pKeyInfo==0 ){ + assert( pKey==0 ); + /* If this is an insert into a table b-tree, invalidate any incrblob + ** cursors open on the row being replaced */ + invalidateIncrblobCursors(p, nKey, 0); + + /* If the cursor is currently on the last row and we are appending a + ** new row onto the end, set the "loc" to avoid an unnecessary + ** btreeMoveto() call */ + if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 + && pCur->info.nKey==nKey-1 ){ + loc = -1; + }else if( loc==0 ){ + rc = sqlite3BtreeMovetoUnpacked(pCur, 0, nKey, appendBias, &loc); + if( rc ) return rc; + } + }else if( loc==0 ){ + rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc); + if( rc ) return rc; + } + assert( pCur->eState==CURSOR_VALID || (pCur->eState==CURSOR_INVALID && loc) ); + + pPage = pCur->apPage[pCur->iPage]; + assert( pPage->intKey || nKey>=0 ); + assert( pPage->leaf || !pPage->intKey ); + + TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n", + pCur->pgnoRoot, nKey, nData, pPage->pgno, + loc==0 ? "overwrite" : "new entry")); + assert( pPage->isInit ); + newCell = pBt->pTmpSpace; + assert( newCell!=0 ); + rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew); + if( rc ) goto end_insert; + assert( szNew==pPage->xCellSize(pPage, newCell) ); + assert( szNew <= MX_CELL_SIZE(pBt) ); + idx = pCur->aiIdx[pCur->iPage]; + if( loc==0 ){ + u16 szOld; + assert( idx<pPage->nCell ); + rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc ){ + goto end_insert; + } + oldCell = findCell(pPage, idx); + if( !pPage->leaf ){ + memcpy(newCell, oldCell, 4); + } + rc = clearCell(pPage, oldCell, &szOld); + dropCell(pPage, idx, szOld, &rc); + if( rc ) goto end_insert; + }else if( loc<0 && pPage->nCell>0 ){ + assert( pPage->leaf ); + idx = ++pCur->aiIdx[pCur->iPage]; + }else{ + assert( pPage->leaf ); + } + insertCell(pPage, idx, newCell, szNew, 0, 0, &rc); + assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 ); + + /* If no error has occurred and pPage has an overflow cell, call balance() + ** to redistribute the cells within the tree. Since balance() may move + ** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey + ** variables. + ** + ** Previous versions of SQLite called moveToRoot() to move the cursor + ** back to the root page as balance() used to invalidate the contents + ** of BtCursor.apPage[] and BtCursor.aiIdx[]. Instead of doing that, + ** set the cursor state to "invalid". This makes common insert operations + ** slightly faster. + ** + ** There is a subtle but important optimization here too. When inserting + ** multiple records into an intkey b-tree using a single cursor (as can + ** happen while processing an "INSERT INTO ... SELECT" statement), it + ** is advantageous to leave the cursor pointing to the last entry in + ** the b-tree if possible. If the cursor is left pointing to the last + ** entry in the table, and the next row inserted has an integer key + ** larger than the largest existing key, it is possible to insert the + ** row without seeking the cursor. This can be a big performance boost. + */ + pCur->info.nSize = 0; + if( rc==SQLITE_OK && pPage->nOverflow ){ + pCur->curFlags &= ~(BTCF_ValidNKey); + rc = balance(pCur); + + /* Must make sure nOverflow is reset to zero even if the balance() + ** fails. Internal data structure corruption will result otherwise. + ** Also, set the cursor state to invalid. This stops saveCursorPosition() + ** from trying to save the current position of the cursor. */ + pCur->apPage[pCur->iPage]->nOverflow = 0; + pCur->eState = CURSOR_INVALID; + } + assert( pCur->apPage[pCur->iPage]->nOverflow==0 ); + +end_insert: + return rc; +} + +/* +** Delete the entry that the cursor is pointing to. +** +** If the second parameter is zero, then the cursor is left pointing at an +** arbitrary location after the delete. If it is non-zero, then the cursor +** is left in a state such that the next call to BtreeNext() or BtreePrev() +** moves it to the same row as it would if the call to BtreeDelete() had +** been omitted. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeDelete(BtCursor *pCur, int bPreserve){ + Btree *p = pCur->pBtree; + BtShared *pBt = p->pBt; + int rc; /* Return code */ + MemPage *pPage; /* Page to delete cell from */ + unsigned char *pCell; /* Pointer to cell to delete */ + int iCellIdx; /* Index of cell to delete */ + int iCellDepth; /* Depth of node containing pCell */ + u16 szCell; /* Size of the cell being deleted */ + int bSkipnext = 0; /* Leaf cursor in SKIPNEXT state */ + + assert( cursorHoldsMutex(pCur) ); + assert( pBt->inTransaction==TRANS_WRITE ); + assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); + assert( pCur->curFlags & BTCF_WriteFlag ); + assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); + assert( !hasReadConflicts(p, pCur->pgnoRoot) ); + assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); + assert( pCur->eState==CURSOR_VALID ); + + iCellDepth = pCur->iPage; + iCellIdx = pCur->aiIdx[iCellDepth]; + pPage = pCur->apPage[iCellDepth]; + pCell = findCell(pPage, iCellIdx); + + /* If the page containing the entry to delete is not a leaf page, move + ** the cursor to the largest entry in the tree that is smaller than + ** the entry being deleted. This cell will replace the cell being deleted + ** from the internal node. The 'previous' entry is used for this instead + ** of the 'next' entry, as the previous entry is always a part of the + ** sub-tree headed by the child page of the cell being deleted. This makes + ** balancing the tree following the delete operation easier. */ + if( !pPage->leaf ){ + int notUsed = 0; + rc = sqlite3BtreePrevious(pCur, ¬Used); + if( rc ) return rc; + } + + /* Save the positions of any other cursors open on this table before + ** making any modifications. */ + if( pCur->curFlags & BTCF_Multiple ){ + rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); + if( rc ) return rc; + } + + /* If this is a delete operation to remove a row from a table b-tree, + ** invalidate any incrblob cursors open on the row being deleted. */ + if( pCur->pKeyInfo==0 ){ + invalidateIncrblobCursors(p, pCur->info.nKey, 0); + } + + /* If the bPreserve flag is set to true, then the cursor position must + ** be preserved following this delete operation. If the current delete + ** will cause a b-tree rebalance, then this is done by saving the cursor + ** key and leaving the cursor in CURSOR_REQUIRESEEK state before + ** returning. + ** + ** Or, if the current delete will not cause a rebalance, then the cursor + ** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately + ** before or after the deleted entry. In this case set bSkipnext to true. */ + if( bPreserve ){ + if( !pPage->leaf + || (pPage->nFree+cellSizePtr(pPage,pCell)+2)>(int)(pBt->usableSize*2/3) + ){ + /* A b-tree rebalance will be required after deleting this entry. + ** Save the cursor key. */ + rc = saveCursorKey(pCur); + if( rc ) return rc; + }else{ + bSkipnext = 1; + } + } + + /* Make the page containing the entry to be deleted writable. Then free any + ** overflow pages associated with the entry and finally remove the cell + ** itself from within the page. */ + rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc ) return rc; + rc = clearCell(pPage, pCell, &szCell); + dropCell(pPage, iCellIdx, szCell, &rc); + if( rc ) return rc; + + /* If the cell deleted was not located on a leaf page, then the cursor + ** is currently pointing to the largest entry in the sub-tree headed + ** by the child-page of the cell that was just deleted from an internal + ** node. The cell from the leaf node needs to be moved to the internal + ** node to replace the deleted cell. */ + if( !pPage->leaf ){ + MemPage *pLeaf = pCur->apPage[pCur->iPage]; + int nCell; + Pgno n = pCur->apPage[iCellDepth+1]->pgno; + unsigned char *pTmp; + + pCell = findCell(pLeaf, pLeaf->nCell-1); + if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT; + nCell = pLeaf->xCellSize(pLeaf, pCell); + assert( MX_CELL_SIZE(pBt) >= nCell ); + pTmp = pBt->pTmpSpace; + assert( pTmp!=0 ); + rc = sqlite3PagerWrite(pLeaf->pDbPage); + insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc); + dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc); + if( rc ) return rc; + } + + /* Balance the tree. If the entry deleted was located on a leaf page, + ** then the cursor still points to that page. In this case the first + ** call to balance() repairs the tree, and the if(...) condition is + ** never true. + ** + ** Otherwise, if the entry deleted was on an internal node page, then + ** pCur is pointing to the leaf page from which a cell was removed to + ** replace the cell deleted from the internal node. This is slightly + ** tricky as the leaf node may be underfull, and the internal node may + ** be either under or overfull. In this case run the balancing algorithm + ** on the leaf node first. If the balance proceeds far enough up the + ** tree that we can be sure that any problem in the internal node has + ** been corrected, so be it. Otherwise, after balancing the leaf node, + ** walk the cursor up the tree to the internal node and balance it as + ** well. */ + rc = balance(pCur); + if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){ + while( pCur->iPage>iCellDepth ){ + releasePage(pCur->apPage[pCur->iPage--]); + } + rc = balance(pCur); + } + + if( rc==SQLITE_OK ){ + if( bSkipnext ){ + assert( bPreserve && pCur->iPage==iCellDepth ); + assert( pPage==pCur->apPage[pCur->iPage] ); + assert( (pPage->nCell>0 || CORRUPT_DB) && iCellIdx<=pPage->nCell ); + pCur->eState = CURSOR_SKIPNEXT; + if( iCellIdx>=pPage->nCell ){ + pCur->skipNext = -1; + pCur->aiIdx[iCellDepth] = pPage->nCell-1; + }else{ + pCur->skipNext = 1; + } + }else{ + rc = moveToRoot(pCur); + if( bPreserve ){ + pCur->eState = CURSOR_REQUIRESEEK; + } + } + } + return rc; +} + +/* +** Create a new BTree table. Write into *piTable the page +** number for the root page of the new table. +** +** The type of type is determined by the flags parameter. Only the +** following values of flags are currently in use. Other values for +** flags might not work: +** +** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys +** BTREE_ZERODATA Used for SQL indices +*/ +static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ + BtShared *pBt = p->pBt; + MemPage *pRoot; + Pgno pgnoRoot; + int rc; + int ptfFlags; /* Page-type flage for the root page of new table */ + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( pBt->inTransaction==TRANS_WRITE ); + assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); + +#ifdef SQLITE_OMIT_AUTOVACUUM + rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); + if( rc ){ + return rc; + } +#else + if( pBt->autoVacuum ){ + Pgno pgnoMove; /* Move a page here to make room for the root-page */ + MemPage *pPageMove; /* The page to move to. */ + + /* Creating a new table may probably require moving an existing database + ** to make room for the new tables root page. In case this page turns + ** out to be an overflow page, delete all overflow page-map caches + ** held by open cursors. + */ + invalidateAllOverflowCache(pBt); + + /* Read the value of meta[3] from the database to determine where the + ** root page of the new table should go. meta[3] is the largest root-page + ** created so far, so the new root-page is (meta[3]+1). + */ + sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &pgnoRoot); + pgnoRoot++; + + /* The new root-page may not be allocated on a pointer-map page, or the + ** PENDING_BYTE page. + */ + while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) || + pgnoRoot==PENDING_BYTE_PAGE(pBt) ){ + pgnoRoot++; + } + assert( pgnoRoot>=3 || CORRUPT_DB ); + testcase( pgnoRoot<3 ); + + /* Allocate a page. The page that currently resides at pgnoRoot will + ** be moved to the allocated page (unless the allocated page happens + ** to reside at pgnoRoot). + */ + rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT); + if( rc!=SQLITE_OK ){ + return rc; + } + + if( pgnoMove!=pgnoRoot ){ + /* pgnoRoot is the page that will be used for the root-page of + ** the new table (assuming an error did not occur). But we were + ** allocated pgnoMove. If required (i.e. if it was not allocated + ** by extending the file), the current page at position pgnoMove + ** is already journaled. + */ + u8 eType = 0; + Pgno iPtrPage = 0; + + /* Save the positions of any open cursors. This is required in + ** case they are holding a reference to an xFetch reference + ** corresponding to page pgnoRoot. */ + rc = saveAllCursors(pBt, 0, 0); + releasePage(pPageMove); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Move the page currently at pgnoRoot to pgnoMove. */ + rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage); + if( eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){ + rc = SQLITE_CORRUPT_BKPT; + } + if( rc!=SQLITE_OK ){ + releasePage(pRoot); + return rc; + } + assert( eType!=PTRMAP_ROOTPAGE ); + assert( eType!=PTRMAP_FREEPAGE ); + rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, 0); + releasePage(pRoot); + + /* Obtain the page at pgnoRoot */ + if( rc!=SQLITE_OK ){ + return rc; + } + rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + rc = sqlite3PagerWrite(pRoot->pDbPage); + if( rc!=SQLITE_OK ){ + releasePage(pRoot); + return rc; + } + }else{ + pRoot = pPageMove; + } + + /* Update the pointer-map and meta-data with the new root-page number. */ + ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0, &rc); + if( rc ){ + releasePage(pRoot); + return rc; + } + + /* When the new root page was allocated, page 1 was made writable in + ** order either to increase the database filesize, or to decrement the + ** freelist count. Hence, the sqlite3BtreeUpdateMeta() call cannot fail. + */ + assert( sqlite3PagerIswriteable(pBt->pPage1->pDbPage) ); + rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot); + if( NEVER(rc) ){ + releasePage(pRoot); + return rc; + } + + }else{ + rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); + if( rc ) return rc; + } +#endif + assert( sqlite3PagerIswriteable(pRoot->pDbPage) ); + if( createTabFlags & BTREE_INTKEY ){ + ptfFlags = PTF_INTKEY | PTF_LEAFDATA | PTF_LEAF; + }else{ + ptfFlags = PTF_ZERODATA | PTF_LEAF; + } + zeroPage(pRoot, ptfFlags); + sqlite3PagerUnref(pRoot->pDbPage); + assert( (pBt->openFlags & BTREE_SINGLE)==0 || pgnoRoot==2 ); + *piTable = (int)pgnoRoot; + return SQLITE_OK; +} +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){ + int rc; + sqlite3BtreeEnter(p); + rc = btreeCreateTable(p, piTable, flags); + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Erase the given database page and all its children. Return +** the page to the freelist. +*/ +static int clearDatabasePage( + BtShared *pBt, /* The BTree that contains the table */ + Pgno pgno, /* Page number to clear */ + int freePageFlag, /* Deallocate page if true */ + int *pnChange /* Add number of Cells freed to this counter */ +){ + MemPage *pPage; + int rc; + unsigned char *pCell; + int i; + int hdr; + u16 szCell; + + assert( sqlite3_mutex_held(pBt->mutex) ); + if( pgno>btreePagecount(pBt) ){ + return SQLITE_CORRUPT_BKPT; + } + rc = getAndInitPage(pBt, pgno, &pPage, 0, 0); + if( rc ) return rc; + if( pPage->bBusy ){ + rc = SQLITE_CORRUPT_BKPT; + goto cleardatabasepage_out; + } + pPage->bBusy = 1; + hdr = pPage->hdrOffset; + for(i=0; i<pPage->nCell; i++){ + pCell = findCell(pPage, i); + if( !pPage->leaf ){ + rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); + if( rc ) goto cleardatabasepage_out; + } + rc = clearCell(pPage, pCell, &szCell); + if( rc ) goto cleardatabasepage_out; + } + if( !pPage->leaf ){ + rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); + if( rc ) goto cleardatabasepage_out; + }else if( pnChange ){ + assert( pPage->intKey || CORRUPT_DB ); + testcase( !pPage->intKey ); + *pnChange += pPage->nCell; + } + if( freePageFlag ){ + freePage(pPage, &rc); + }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){ + zeroPage(pPage, pPage->aData[hdr] | PTF_LEAF); + } + +cleardatabasepage_out: + pPage->bBusy = 0; + releasePage(pPage); + return rc; +} + +/* +** Delete all information from a single table in the database. iTable is +** the page number of the root of the table. After this routine returns, +** the root page is empty, but still exists. +** +** This routine will fail with SQLITE_LOCKED if there are any open +** read cursors on the table. Open write cursors are moved to the +** root of the table. +** +** If pnChange is not NULL, then table iTable must be an intkey table. The +** integer value pointed to by pnChange is incremented by the number of +** entries in the table. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeClearTable(Btree *p, int iTable, int *pnChange){ + int rc; + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); + assert( p->inTrans==TRANS_WRITE ); + + rc = saveAllCursors(pBt, (Pgno)iTable, 0); + + if( SQLITE_OK==rc ){ + /* Invalidate all incrblob cursors open on table iTable (assuming iTable + ** is the root of a table b-tree - if it is not, the following call is + ** a no-op). */ + invalidateIncrblobCursors(p, 0, 1); + rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange); + } + sqlite3BtreeLeave(p); + return rc; +} + +/* +** Delete all information from the single table that pCur is open on. +** +** This routine only work for pCur on an ephemeral table. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeClearTableOfCursor(BtCursor *pCur){ + return sqlite3BtreeClearTable(pCur->pBtree, pCur->pgnoRoot, 0); +} + +/* +** Erase all information in a table and add the root of the table to +** the freelist. Except, the root of the principle table (the one on +** page 1) is never added to the freelist. +** +** This routine will fail with SQLITE_LOCKED if there are any open +** cursors on the table. +** +** If AUTOVACUUM is enabled and the page at iTable is not the last +** root page in the database file, then the last root page +** in the database file is moved into the slot formerly occupied by +** iTable and that last slot formerly occupied by the last root page +** is added to the freelist instead of iTable. In this say, all +** root pages are kept at the beginning of the database file, which +** is necessary for AUTOVACUUM to work right. *piMoved is set to the +** page number that used to be the last root page in the file before +** the move. If no page gets moved, *piMoved is set to 0. +** The last root page is recorded in meta[3] and the value of +** meta[3] is updated by this procedure. +*/ +static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ + int rc; + MemPage *pPage = 0; + BtShared *pBt = p->pBt; + + assert( sqlite3BtreeHoldsMutex(p) ); + assert( p->inTrans==TRANS_WRITE ); + + /* It is illegal to drop a table if any cursors are open on the + ** database. This is because in auto-vacuum mode the backend may + ** need to move another root-page to fill a gap left by the deleted + ** root page. If an open cursor was using this page a problem would + ** occur. + ** + ** This error is caught long before control reaches this point. + */ + if( NEVER(pBt->pCursor) ){ + sqlite3ConnectionBlocked(p->db, pBt->pCursor->pBtree->db); + return SQLITE_LOCKED_SHAREDCACHE; + } + + rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0); + if( rc ) return rc; + rc = sqlite3BtreeClearTable(p, iTable, 0); + if( rc ){ + releasePage(pPage); + return rc; + } + + *piMoved = 0; + + if( iTable>1 ){ +#ifdef SQLITE_OMIT_AUTOVACUUM + freePage(pPage, &rc); + releasePage(pPage); +#else + if( pBt->autoVacuum ){ + Pgno maxRootPgno; + sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &maxRootPgno); + + if( iTable==maxRootPgno ){ + /* If the table being dropped is the table with the largest root-page + ** number in the database, put the root page on the free list. + */ + freePage(pPage, &rc); + releasePage(pPage); + if( rc!=SQLITE_OK ){ + return rc; + } + }else{ + /* The table being dropped does not have the largest root-page + ** number in the database. So move the page that does into the + ** gap left by the deleted root-page. + */ + MemPage *pMove; + releasePage(pPage); + rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0); + releasePage(pMove); + if( rc!=SQLITE_OK ){ + return rc; + } + pMove = 0; + rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); + freePage(pMove, &rc); + releasePage(pMove); + if( rc!=SQLITE_OK ){ + return rc; + } + *piMoved = maxRootPgno; + } + + /* Set the new 'max-root-page' value in the database header. This + ** is the old value less one, less one more if that happens to + ** be a root-page number, less one again if that is the + ** PENDING_BYTE_PAGE. + */ + maxRootPgno--; + while( maxRootPgno==PENDING_BYTE_PAGE(pBt) + || PTRMAP_ISPAGE(pBt, maxRootPgno) ){ + maxRootPgno--; + } + assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) ); + + rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno); + }else{ + freePage(pPage, &rc); + releasePage(pPage); + } +#endif + }else{ + /* If sqlite3BtreeDropTable was called on page 1. + ** This really never should happen except in a corrupt + ** database. + */ + zeroPage(pPage, PTF_INTKEY|PTF_LEAF ); + releasePage(pPage); + } + return rc; +} +SQLITE_API int SQLITE_STDCALL sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){ + int rc; + sqlite3BtreeEnter(p); + rc = btreeDropTable(p, iTable, piMoved); + sqlite3BtreeLeave(p); + return rc; +} + + +/* +** This function may only be called if the b-tree connection already +** has a read or write transaction open on the database. +** +** Read the meta-information out of a database file. Meta[0] +** is the number of free pages currently in the database. Meta[1] +** through meta[15] are available for use by higher layers. Meta[0] +** is read-only, the others are read/write. +** +** The schema layer numbers meta values differently. At the schema +** layer (and the SetCookie and ReadCookie opcodes) the number of +** free pages is not visible. So Cookie[0] is the same as Meta[1]. +** +** This routine treats Meta[BTREE_DATA_VERSION] as a special case. Instead +** of reading the value out of the header, it instead loads the "DataVersion" +** from the pager. The BTREE_DATA_VERSION value is not actually stored in the +** database file. It is a number computed by the pager. But its access +** pattern is the same as header meta values, and so it is convenient to +** read it from this routine. +*/ +SQLITE_API void SQLITE_STDCALL sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){ + BtShared *pBt = p->pBt; + + sqlite3BtreeEnter(p); + assert( p->inTrans>TRANS_NONE ); + assert( SQLITE_OK==querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK) ); + assert( pBt->pPage1 ); + assert( idx>=0 && idx<=15 ); + + if( idx==BTREE_DATA_VERSION ){ + *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iDataVersion; + }else{ + *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]); + } + + /* If auto-vacuum is disabled in this build and this is an auto-vacuum + ** database, mark the database as read-only. */ +#ifdef SQLITE_OMIT_AUTOVACUUM + if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ){ + pBt->btsFlags |= BTS_READ_ONLY; + } +#endif + + sqlite3BtreeLeave(p); +} + +/* +** Write meta-information back into the database. Meta[0] is +** read-only and may not be written. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){ + BtShared *pBt = p->pBt; + unsigned char *pP1; + int rc; + assert( idx>=1 && idx<=15 ); + sqlite3BtreeEnter(p); + assert( p->inTrans==TRANS_WRITE ); + assert( pBt->pPage1!=0 ); + pP1 = pBt->pPage1->aData; + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + if( rc==SQLITE_OK ){ + put4byte(&pP1[36 + idx*4], iMeta); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( idx==BTREE_INCR_VACUUM ){ + assert( pBt->autoVacuum || iMeta==0 ); + assert( iMeta==0 || iMeta==1 ); + pBt->incrVacuum = (u8)iMeta; + } +#endif + } + sqlite3BtreeLeave(p); + return rc; +} + +#ifndef SQLITE_OMIT_BTREECOUNT +/* +** The first argument, pCur, is a cursor opened on some b-tree. Count the +** number of entries in the b-tree and write the result to *pnEntry. +** +** SQLITE_OK is returned if the operation is successfully executed. +** Otherwise, if an error is encountered (i.e. an IO error or database +** corruption) an SQLite error code is returned. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCount(BtCursor *pCur, i64 *pnEntry){ + i64 nEntry = 0; /* Value to return in *pnEntry */ + int rc; /* Return code */ + + if( pCur->pgnoRoot==0 ){ + *pnEntry = 0; + return SQLITE_OK; + } + rc = moveToRoot(pCur); + + /* Unless an error occurs, the following loop runs one iteration for each + ** page in the B-Tree structure (not including overflow pages). + */ + while( rc==SQLITE_OK ){ + int iIdx; /* Index of child node in parent */ + MemPage *pPage; /* Current page of the b-tree */ + + /* If this is a leaf page or the tree is not an int-key tree, then + ** this page contains countable entries. Increment the entry counter + ** accordingly. + */ + pPage = pCur->apPage[pCur->iPage]; + if( pPage->leaf || !pPage->intKey ){ + nEntry += pPage->nCell; + } + + /* pPage is a leaf node. This loop navigates the cursor so that it + ** points to the first interior cell that it points to the parent of + ** the next page in the tree that has not yet been visited. The + ** pCur->aiIdx[pCur->iPage] value is set to the index of the parent cell + ** of the page, or to the number of cells in the page if the next page + ** to visit is the right-child of its parent. + ** + ** If all pages in the tree have been visited, return SQLITE_OK to the + ** caller. + */ + if( pPage->leaf ){ + do { + if( pCur->iPage==0 ){ + /* All pages of the b-tree have been visited. Return successfully. */ + *pnEntry = nEntry; + return moveToRoot(pCur); + } + moveToParent(pCur); + }while ( pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell ); + + pCur->aiIdx[pCur->iPage]++; + pPage = pCur->apPage[pCur->iPage]; + } + + /* Descend to the child node of the cell that the cursor currently + ** points at. This is the right-child if (iIdx==pPage->nCell). + */ + iIdx = pCur->aiIdx[pCur->iPage]; + if( iIdx==pPage->nCell ){ + rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); + }else{ + rc = moveToChild(pCur, get4byte(findCell(pPage, iIdx))); + } + } + + /* An error has occurred. Return an error code. */ + return rc; +} +#endif + +/* +** Return the pager associated with a BTree. This routine is used for +** testing and debugging only. +*/ +SQLITE_API Pager *SQLITE_STDCALL sqlite3BtreePager(Btree *p){ + return p->pBt->pPager; +} + +#ifndef SQLITE_OMIT_INTEGRITY_CHECK +/* +** Append a message to the error message string. +*/ +static void checkAppendMsg( + IntegrityCk *pCheck, + const char *zFormat, + ... +){ + va_list ap; + if( !pCheck->mxErr ) return; + pCheck->mxErr--; + pCheck->nErr++; + va_start(ap, zFormat); + if( pCheck->errMsg.nChar ){ + sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1); + } + if( pCheck->zPfx ){ + sqlite3XPrintf(&pCheck->errMsg, 0, pCheck->zPfx, pCheck->v1, pCheck->v2); + } + sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap); + va_end(ap); + if( pCheck->errMsg.accError==STRACCUM_NOMEM ){ + pCheck->mallocFailed = 1; + } +} +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +#ifndef SQLITE_OMIT_INTEGRITY_CHECK + +/* +** Return non-zero if the bit in the IntegrityCk.aPgRef[] array that +** corresponds to page iPg is already set. +*/ +static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){ + assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 ); + return (pCheck->aPgRef[iPg/8] & (1 << (iPg & 0x07))); +} + +/* +** Set the bit in the IntegrityCk.aPgRef[] array that corresponds to page iPg. +*/ +static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){ + assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 ); + pCheck->aPgRef[iPg/8] |= (1 << (iPg & 0x07)); +} + + +/* +** Add 1 to the reference count for page iPage. If this is the second +** reference to the page, add an error message to pCheck->zErrMsg. +** Return 1 if there are 2 or more references to the page and 0 if +** if this is the first reference to the page. +** +** Also check that the page number is in bounds. +*/ +static int checkRef(IntegrityCk *pCheck, Pgno iPage){ + if( iPage==0 ) return 1; + if( iPage>pCheck->nPage ){ + checkAppendMsg(pCheck, "invalid page number %d", iPage); + return 1; + } + if( getPageReferenced(pCheck, iPage) ){ + checkAppendMsg(pCheck, "2nd reference to page %d", iPage); + return 1; + } + setPageReferenced(pCheck, iPage); + return 0; +} + +#ifndef SQLITE_OMIT_AUTOVACUUM +/* +** Check that the entry in the pointer-map for page iChild maps to +** page iParent, pointer type ptrType. If not, append an error message +** to pCheck. +*/ +static void checkPtrmap( + IntegrityCk *pCheck, /* Integrity check context */ + Pgno iChild, /* Child page number */ + u8 eType, /* Expected pointer map type */ + Pgno iParent /* Expected pointer map parent page number */ +){ + int rc; + u8 ePtrmapType; + Pgno iPtrmapParent; + + rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) pCheck->mallocFailed = 1; + checkAppendMsg(pCheck, "Failed to read ptrmap key=%d", iChild); + return; + } + + if( ePtrmapType!=eType || iPtrmapParent!=iParent ){ + checkAppendMsg(pCheck, + "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", + iChild, eType, iParent, ePtrmapType, iPtrmapParent); + } +} +#endif + +/* +** Check the integrity of the freelist or of an overflow page list. +** Verify that the number of pages on the list is N. +*/ +static void checkList( + IntegrityCk *pCheck, /* Integrity checking context */ + int isFreeList, /* True for a freelist. False for overflow page list */ + int iPage, /* Page number for first page in the list */ + int N /* Expected number of pages in the list */ +){ + int i; + int expected = N; + int iFirst = iPage; + while( N-- > 0 && pCheck->mxErr ){ + DbPage *pOvflPage; + unsigned char *pOvflData; + if( iPage<1 ){ + checkAppendMsg(pCheck, + "%d of %d pages missing from overflow list starting at %d", + N+1, expected, iFirst); + break; + } + if( checkRef(pCheck, iPage) ) break; + if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){ + checkAppendMsg(pCheck, "failed to get page %d", iPage); + break; + } + pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage); + if( isFreeList ){ + int n = get4byte(&pOvflData[4]); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pCheck->pBt->autoVacuum ){ + checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0); + } +#endif + if( n>(int)pCheck->pBt->usableSize/4-2 ){ + checkAppendMsg(pCheck, + "freelist leaf count too big on page %d", iPage); + N--; + }else{ + for(i=0; i<n; i++){ + Pgno iFreePage = get4byte(&pOvflData[8+i*4]); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pCheck->pBt->autoVacuum ){ + checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0); + } +#endif + checkRef(pCheck, iFreePage); + } + N -= n; + } + } +#ifndef SQLITE_OMIT_AUTOVACUUM + else{ + /* If this database supports auto-vacuum and iPage is not the last + ** page in this overflow list, check that the pointer-map entry for + ** the following page matches iPage. + */ + if( pCheck->pBt->autoVacuum && N>0 ){ + i = get4byte(pOvflData); + checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage); + } + } +#endif + iPage = get4byte(pOvflData); + sqlite3PagerUnref(pOvflPage); + + if( isFreeList && N<(iPage!=0) ){ + checkAppendMsg(pCheck, "free-page count in header is too small"); + } + } +} +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +/* +** An implementation of a min-heap. +** +** aHeap[0] is the number of elements on the heap. aHeap[1] is the +** root element. The daughter nodes of aHeap[N] are aHeap[N*2] +** and aHeap[N*2+1]. +** +** The heap property is this: Every node is less than or equal to both +** of its daughter nodes. A consequence of the heap property is that the +** root node aHeap[1] is always the minimum value currently in the heap. +** +** The btreeHeapInsert() routine inserts an unsigned 32-bit number onto +** the heap, preserving the heap property. The btreeHeapPull() routine +** removes the root element from the heap (the minimum value in the heap) +** and then moves other nodes around as necessary to preserve the heap +** property. +** +** This heap is used for cell overlap and coverage testing. Each u32 +** entry represents the span of a cell or freeblock on a btree page. +** The upper 16 bits are the index of the first byte of a range and the +** lower 16 bits are the index of the last byte of that range. +*/ +static void btreeHeapInsert(u32 *aHeap, u32 x){ + u32 j, i = ++aHeap[0]; + aHeap[i] = x; + while( (j = i/2)>0 && aHeap[j]>aHeap[i] ){ + x = aHeap[j]; + aHeap[j] = aHeap[i]; + aHeap[i] = x; + i = j; + } +} +static int btreeHeapPull(u32 *aHeap, u32 *pOut){ + u32 j, i, x; + if( (x = aHeap[0])==0 ) return 0; + *pOut = aHeap[1]; + aHeap[1] = aHeap[x]; + aHeap[x] = 0xffffffff; + aHeap[0]--; + i = 1; + while( (j = i*2)<=aHeap[0] ){ + if( aHeap[j]>aHeap[j+1] ) j++; + if( aHeap[i]<aHeap[j] ) break; + x = aHeap[i]; + aHeap[i] = aHeap[j]; + aHeap[j] = x; + i = j; + } + return 1; +} + +#ifndef SQLITE_OMIT_INTEGRITY_CHECK +/* +** Do various sanity checks on a single page of a tree. Return +** the tree depth. Root pages return 0. Parents of root pages +** return 1, and so forth. +** +** These checks are done: +** +** 1. Make sure that cells and freeblocks do not overlap +** but combine to completely cover the page. +** 2. Make sure integer cell keys are in order. +** 3. Check the integrity of overflow pages. +** 4. Recursively call checkTreePage on all children. +** 5. Verify that the depth of all children is the same. +*/ +static int checkTreePage( + IntegrityCk *pCheck, /* Context for the sanity check */ + int iPage, /* Page number of the page to check */ + i64 *piMinKey, /* Write minimum integer primary key here */ + i64 maxKey /* Error if integer primary key greater than this */ +){ + MemPage *pPage = 0; /* The page being analyzed */ + int i; /* Loop counter */ + int rc; /* Result code from subroutine call */ + int depth = -1, d2; /* Depth of a subtree */ + int pgno; /* Page number */ + int nFrag; /* Number of fragmented bytes on the page */ + int hdr; /* Offset to the page header */ + int cellStart; /* Offset to the start of the cell pointer array */ + int nCell; /* Number of cells */ + int doCoverageCheck = 1; /* True if cell coverage checking should be done */ + int keyCanBeEqual = 1; /* True if IPK can be equal to maxKey + ** False if IPK must be strictly less than maxKey */ + u8 *data; /* Page content */ + u8 *pCell; /* Cell content */ + u8 *pCellIdx; /* Next element of the cell pointer array */ + BtShared *pBt; /* The BtShared object that owns pPage */ + u32 pc; /* Address of a cell */ + u32 usableSize; /* Usable size of the page */ + u32 contentOffset; /* Offset to the start of the cell content area */ + u32 *heap = 0; /* Min-heap used for checking cell coverage */ + u32 x, prev = 0; /* Next and previous entry on the min-heap */ + const char *saved_zPfx = pCheck->zPfx; + int saved_v1 = pCheck->v1; + int saved_v2 = pCheck->v2; + u8 savedIsInit = 0; + + /* Check that the page exists + */ + pBt = pCheck->pBt; + usableSize = pBt->usableSize; + if( iPage==0 ) return 0; + if( checkRef(pCheck, iPage) ) return 0; + pCheck->zPfx = "Page %d: "; + pCheck->v1 = iPage; + if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ + checkAppendMsg(pCheck, + "unable to get the page. error code=%d", rc); + goto end_of_check; + } + + /* Clear MemPage.isInit to make sure the corruption detection code in + ** btreeInitPage() is executed. */ + savedIsInit = pPage->isInit; + pPage->isInit = 0; + if( (rc = btreeInitPage(pPage))!=0 ){ + assert( rc==SQLITE_CORRUPT ); /* The only possible error from InitPage */ + checkAppendMsg(pCheck, + "btreeInitPage() returns error code %d", rc); + goto end_of_check; + } + data = pPage->aData; + hdr = pPage->hdrOffset; + + /* Set up for cell analysis */ + pCheck->zPfx = "On tree page %d cell %d: "; + contentOffset = get2byteNotZero(&data[hdr+5]); + assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */ + + /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the + ** number of cells on the page. */ + nCell = get2byte(&data[hdr+3]); + assert( pPage->nCell==nCell ); + + /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page + ** immediately follows the b-tree page header. */ + cellStart = hdr + 12 - 4*pPage->leaf; + assert( pPage->aCellIdx==&data[cellStart] ); + pCellIdx = &data[cellStart + 2*(nCell-1)]; + + if( !pPage->leaf ){ + /* Analyze the right-child page of internal pages */ + pgno = get4byte(&data[hdr+8]); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum ){ + pCheck->zPfx = "On page %d at right child: "; + checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); + } +#endif + depth = checkTreePage(pCheck, pgno, &maxKey, maxKey); + keyCanBeEqual = 0; + }else{ + /* For leaf pages, the coverage check will occur in the same loop + ** as the other cell checks, so initialize the heap. */ + heap = pCheck->heap; + heap[0] = 0; + } + + /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte + ** integer offsets to the cell contents. */ + for(i=nCell-1; i>=0 && pCheck->mxErr; i--){ + CellInfo info; + + /* Check cell size */ + pCheck->v2 = i; + assert( pCellIdx==&data[cellStart + i*2] ); + pc = get2byteAligned(pCellIdx); + pCellIdx -= 2; + if( pc<contentOffset || pc>usableSize-4 ){ + checkAppendMsg(pCheck, "Offset %d out of range %d..%d", + pc, contentOffset, usableSize-4); + doCoverageCheck = 0; + continue; + } + pCell = &data[pc]; + pPage->xParseCell(pPage, pCell, &info); + if( pc+info.nSize>usableSize ){ + checkAppendMsg(pCheck, "Extends off end of page"); + doCoverageCheck = 0; + continue; + } + + /* Check for integer primary key out of range */ + if( pPage->intKey ){ + if( keyCanBeEqual ? (info.nKey > maxKey) : (info.nKey >= maxKey) ){ + checkAppendMsg(pCheck, "Rowid %lld out of order", info.nKey); + } + maxKey = info.nKey; + } + + /* Check the content overflow list */ + if( info.nPayload>info.nLocal ){ + int nPage; /* Number of pages on the overflow chain */ + Pgno pgnoOvfl; /* First page of the overflow chain */ + assert( pc + info.iOverflow <= usableSize ); + nPage = (info.nPayload - info.nLocal + usableSize - 5)/(usableSize - 4); + pgnoOvfl = get4byte(&pCell[info.iOverflow]); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum ){ + checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage); + } +#endif + checkList(pCheck, 0, pgnoOvfl, nPage); + } + + if( !pPage->leaf ){ + /* Check sanity of left child page for internal pages */ + pgno = get4byte(pCell); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum ){ + checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); + } +#endif + d2 = checkTreePage(pCheck, pgno, &maxKey, maxKey); + keyCanBeEqual = 0; + if( d2!=depth ){ + checkAppendMsg(pCheck, "Child page depth differs"); + depth = d2; + } + }else{ + /* Populate the coverage-checking heap for leaf pages */ + btreeHeapInsert(heap, (pc<<16)|(pc+info.nSize-1)); + } + } + *piMinKey = maxKey; + + /* Check for complete coverage of the page + */ + pCheck->zPfx = 0; + if( doCoverageCheck && pCheck->mxErr>0 ){ + /* For leaf pages, the min-heap has already been initialized and the + ** cells have already been inserted. But for internal pages, that has + ** not yet been done, so do it now */ + if( !pPage->leaf ){ + heap = pCheck->heap; + heap[0] = 0; + for(i=nCell-1; i>=0; i--){ + u32 size; + pc = get2byteAligned(&data[cellStart+i*2]); + size = pPage->xCellSize(pPage, &data[pc]); + btreeHeapInsert(heap, (pc<<16)|(pc+size-1)); + } + } + /* Add the freeblocks to the min-heap + ** + ** EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header + ** is the offset of the first freeblock, or zero if there are no + ** freeblocks on the page. + */ + i = get2byte(&data[hdr+1]); + while( i>0 ){ + int size, j; + assert( (u32)i<=usableSize-4 ); /* Enforced by btreeInitPage() */ + size = get2byte(&data[i+2]); + assert( (u32)(i+size)<=usableSize ); /* Enforced by btreeInitPage() */ + btreeHeapInsert(heap, (((u32)i)<<16)|(i+size-1)); + /* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a + ** big-endian integer which is the offset in the b-tree page of the next + ** freeblock in the chain, or zero if the freeblock is the last on the + ** chain. */ + j = get2byte(&data[i]); + /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of + ** increasing offset. */ + assert( j==0 || j>i+size ); /* Enforced by btreeInitPage() */ + assert( (u32)j<=usableSize-4 ); /* Enforced by btreeInitPage() */ + i = j; + } + /* Analyze the min-heap looking for overlap between cells and/or + ** freeblocks, and counting the number of untracked bytes in nFrag. + ** + ** Each min-heap entry is of the form: (start_address<<16)|end_address. + ** There is an implied first entry the covers the page header, the cell + ** pointer index, and the gap between the cell pointer index and the start + ** of cell content. + ** + ** The loop below pulls entries from the min-heap in order and compares + ** the start_address against the previous end_address. If there is an + ** overlap, that means bytes are used multiple times. If there is a gap, + ** that gap is added to the fragmentation count. + */ + nFrag = 0; + prev = contentOffset - 1; /* Implied first min-heap entry */ + while( btreeHeapPull(heap,&x) ){ + if( (prev&0xffff)>=(x>>16) ){ + checkAppendMsg(pCheck, + "Multiple uses for byte %u of page %d", x>>16, iPage); + break; + }else{ + nFrag += (x>>16) - (prev&0xffff) - 1; + prev = x; + } + } + nFrag += usableSize - (prev&0xffff) - 1; + /* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments + ** is stored in the fifth field of the b-tree page header. + ** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the + ** number of fragmented free bytes within the cell content area. + */ + if( heap[0]==0 && nFrag!=data[hdr+7] ){ + checkAppendMsg(pCheck, + "Fragmentation of %d bytes reported as %d on page %d", + nFrag, data[hdr+7], iPage); + } + } + +end_of_check: + if( !doCoverageCheck ) pPage->isInit = savedIsInit; + releasePage(pPage); + pCheck->zPfx = saved_zPfx; + pCheck->v1 = saved_v1; + pCheck->v2 = saved_v2; + return depth+1; +} +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +#ifndef SQLITE_OMIT_INTEGRITY_CHECK +/* +** This routine does a complete check of the given BTree file. aRoot[] is +** an array of pages numbers were each page number is the root page of +** a table. nRoot is the number of entries in aRoot. +** +** A read-only or read-write transaction must be opened before calling +** this function. +** +** Write the number of error seen in *pnErr. Except for some memory +** allocation errors, an error message held in memory obtained from +** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is +** returned. If a memory allocation error occurs, NULL is returned. +*/ +SQLITE_API char *SQLITE_STDCALL sqlite3BtreeIntegrityCheck( + Btree *p, /* The btree to be checked */ + int *aRoot, /* An array of root pages numbers for individual trees */ + int nRoot, /* Number of entries in aRoot[] */ + int mxErr, /* Stop reporting errors after this many */ + int *pnErr /* Write number of errors seen to this variable */ +){ + Pgno i; + IntegrityCk sCheck; + BtShared *pBt = p->pBt; + int savedDbFlags = pBt->pBt->flags; + char zErr[100]; + VVA_ONLY( int nRef ); + + sqlite3BtreeEnter(p); + assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE ); + assert( (nRef = sqlite3PagerRefcount(pBt->pPager))>=0 ); + sCheck.pBt = pBt; + sCheck.pPager = pBt->pPager; + sCheck.nPage = btreePagecount(sCheck.pBt); + sCheck.mxErr = mxErr; + sCheck.nErr = 0; + sCheck.mallocFailed = 0; + sCheck.zPfx = 0; + sCheck.v1 = 0; + sCheck.v2 = 0; + sCheck.aPgRef = 0; + sCheck.heap = 0; + sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); + if( sCheck.nPage==0 ){ + goto integrity_ck_cleanup; + } + + sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1); + if( !sCheck.aPgRef ){ + sCheck.mallocFailed = 1; + goto integrity_ck_cleanup; + } + sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize ); + if( sCheck.heap==0 ){ + sCheck.mallocFailed = 1; + goto integrity_ck_cleanup; + } + + i = PENDING_BYTE_PAGE(pBt); + if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); + + /* Check the integrity of the freelist + */ + sCheck.zPfx = "Main freelist: "; + checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), + get4byte(&pBt->pPage1->aData[36])); + sCheck.zPfx = 0; + + /* Check all the tables. + */ + testcase( pBt->pBt->flags & SQLITE_CellSizeCk ); + pBt->pBt->flags &= ~SQLITE_CellSizeCk; + for(i=0; (int)i<nRoot && sCheck.mxErr; i++){ + i64 notUsed; + if( aRoot[i]==0 ) continue; +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum && aRoot[i]>1 ){ + checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0); + } +#endif + checkTreePage(&sCheck, aRoot[i], ¬Used, LARGEST_INT64); + } + pBt->pBt->flags = savedDbFlags; + + /* Make sure every page in the file is referenced + */ + for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){ +#ifdef SQLITE_OMIT_AUTOVACUUM + if( getPageReferenced(&sCheck, i)==0 ){ + checkAppendMsg(&sCheck, "Page %d is never used", i); + } +#else + /* If the database supports auto-vacuum, make sure no tables contain + ** references to pointer-map pages. + */ + if( getPageReferenced(&sCheck, i)==0 && + (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){ + checkAppendMsg(&sCheck, "Page %d is never used", i); + } + if( getPageReferenced(&sCheck, i)!=0 && + (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){ + checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i); + } +#endif + } + + /* Clean up and report errors. + */ +integrity_ck_cleanup: + sqlite3PageFree(sCheck.heap); + sqlite3_free(sCheck.aPgRef); + if( sCheck.mallocFailed ){ + sqlite3StrAccumReset(&sCheck.errMsg); + sCheck.nErr++; + } + *pnErr = sCheck.nErr; + if( sCheck.nErr==0 ) sqlite3StrAccumReset(&sCheck.errMsg); + /* Make sure this analysis did not leave any unref() pages. */ + assert( nRef==sqlite3PagerRefcount(pBt->pPager) ); + sqlite3BtreeLeave(p); + return sqlite3StrAccumFinish(&sCheck.errMsg); +} +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +SQLITE_API int SQLITE_STDCALL sqlite3BtreeFileFormat(Btree *p){ + return p->pBt->file_format; +} + +/* +** Return the full pathname of the underlying database file. Return +** an empty string if the database is in-memory or a TEMP database. +** +** The pager filename is invariant as long as the pager is +** open so it is safe to access without the BtShared mutex. +*/ +SQLITE_API const char *SQLITE_STDCALL sqlite3BtreeGetFilename(Btree *p){ + assert( p->pBt->pPager!=0 ); + return sqlite3PagerFilename(p->pBt->pPager, 1); +} + +/* +** Return the pathname of the journal file for this database. The return +** value of this routine is the same regardless of whether the journal file +** has been created or not. +** +** The pager journal filename is invariant as long as the pager is +** open so it is safe to access without the BtShared mutex. +*/ +SQLITE_API const char *SQLITE_STDCALL sqlite3BtreeGetJournalname(Btree *p){ + assert( p->pBt->pPager!=0 ); + return sqlite3PagerJournalname(p->pBt->pPager); +} + +/* +** Return non-zero if a transaction is active. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeIsInTrans(Btree *p){ + assert( p==0 || sqlite3_mutex_held(p->db->mutex) ); + return (p && (p->inTrans==TRANS_WRITE)); +} + +#ifndef SQLITE_OMIT_WAL +/* +** Run a checkpoint on the Btree passed as the first argument. +** +** Return SQLITE_LOCKED if this or any other connection has an open +** transaction on the shared-cache the argument Btree is connected to. +** +** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCheckpoint(Btree *p, int eMode, int *pnLog, int *pnCkpt){ + int rc = SQLITE_OK; + if( p ){ + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); + if( pBt->inTransaction!=TRANS_NONE ){ + rc = SQLITE_LOCKED; + }else{ + rc = sqlite3PagerCheckpoint(pBt->pPager, eMode, pnLog, pnCkpt); + } + sqlite3BtreeLeave(p); + } + return rc; +} +#endif + +/* +** Return non-zero if a read (or write) transaction is active. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeIsInReadTrans(Btree *p){ + assert( p ); + assert( sqlite3_mutex_held(p->db->mutex) ); + return p->inTrans!=TRANS_NONE; +} + +SQLITE_API int SQLITE_STDCALL sqlite3BtreeIsInBackup(Btree *p){ + assert( p ); + assert( sqlite3_mutex_held(p->db->mutex) ); + return p->nBackup!=0; +} + +/* +** This function returns a pointer to a blob of memory associated with +** a single shared-btree. The memory is used by client code for its own +** purposes (for example, to store a high-level schema associated with +** the shared-btree). The btree layer manages reference counting issues. +** +** The first time this is called on a shared-btree, nBytes bytes of memory +** are allocated, zeroed, and returned to the caller. For each subsequent +** call the nBytes parameter is ignored and a pointer to the same blob +** of memory returned. +** +** If the nBytes parameter is 0 and the blob of memory has not yet been +** allocated, a null pointer is returned. If the blob has already been +** allocated, it is returned as normal. +** +** Just before the shared-btree is closed, the function passed as the +** xFree argument when the memory allocation was made is invoked on the +** blob of allocated memory. The xFree function should not call sqlite3_free() +** on the memory, the btree layer does that. +*/ +SQLITE_API void *SQLITE_STDCALL sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){ + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); + if( !pBt->pSchema && nBytes ){ + pBt->pSchema = sqlite3DbMallocZero(0, nBytes); + pBt->xFreeSchema = xFree; + } + sqlite3BtreeLeave(p); + return pBt->pSchema; +} + +/* +** Return SQLITE_LOCKED_SHAREDCACHE if another user of the same shared +** btree as the argument handle holds an exclusive lock on the +** sqlite_master table. Otherwise SQLITE_OK. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSchemaLocked(Btree *p){ + int rc; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + rc = querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK); + assert( rc==SQLITE_OK || rc==SQLITE_LOCKED_SHAREDCACHE ); + sqlite3BtreeLeave(p); + return rc; +} + + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** Obtain a lock on the table whose root page is iTab. The +** lock is a write lock if isWritelock is true or a read lock +** if it is false. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){ + int rc = SQLITE_OK; + assert( p->inTrans!=TRANS_NONE ); + if( p->sharable ){ + u8 lockType = READ_LOCK + isWriteLock; + assert( READ_LOCK+1==WRITE_LOCK ); + assert( isWriteLock==0 || isWriteLock==1 ); + + sqlite3BtreeEnter(p); + rc = querySharedCacheTableLock(p, iTab, lockType); + if( rc==SQLITE_OK ){ + rc = setSharedCacheTableLock(p, iTab, lockType); + } + sqlite3BtreeLeave(p); + } + return rc; +} +#endif + +#ifndef SQLITE_OMIT_INCRBLOB +/* +** Argument pCsr must be a cursor opened for writing on an +** INTKEY table currently pointing at a valid table entry. +** This function modifies the data stored as part of that entry. +** +** Only the data content may only be modified, it is not possible to +** change the length of the data stored. If this function is called with +** parameters that attempt to write past the end of the existing data, +** no modifications are made and SQLITE_CORRUPT is returned. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ + int rc; + assert( cursorHoldsMutex(pCsr) ); + assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) ); + assert( pCsr->curFlags & BTCF_Incrblob ); + + rc = restoreCursorPosition(pCsr); + if( rc!=SQLITE_OK ){ + return rc; + } + assert( pCsr->eState!=CURSOR_REQUIRESEEK ); + if( pCsr->eState!=CURSOR_VALID ){ + return SQLITE_ABORT; + } + + /* Save the positions of all other cursors open on this table. This is + ** required in case any of them are holding references to an xFetch + ** version of the b-tree page modified by the accessPayload call below. + ** + ** Note that pCsr must be open on a INTKEY table and saveCursorPosition() + ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence + ** saveAllCursors can only return SQLITE_OK. + */ + VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr); + assert( rc==SQLITE_OK ); + + /* Check some assumptions: + ** (a) the cursor is open for writing, + ** (b) there is a read/write transaction open, + ** (c) the connection holds a write-lock on the table (if required), + ** (d) there are no conflicting read-locks, and + ** (e) the cursor points at a valid row of an intKey table. + */ + if( (pCsr->curFlags & BTCF_WriteFlag)==0 ){ + return SQLITE_READONLY; + } + assert( (pCsr->pBt->btsFlags & BTS_READ_ONLY)==0 + && pCsr->pBt->inTransaction==TRANS_WRITE ); + assert( hasSharedCacheTableLock(pCsr->pBtree, pCsr->pgnoRoot, 0, 2) ); + assert( !hasReadConflicts(pCsr->pBtree, pCsr->pgnoRoot) ); + assert( pCsr->apPage[pCsr->iPage]->intKey ); + + return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1); +} + +/* +** Mark this cursor as an incremental blob cursor. +*/ +SQLITE_API void SQLITE_STDCALL sqlite3BtreeIncrblobCursor(BtCursor *pCur){ + pCur->curFlags |= BTCF_Incrblob; + pCur->pBtree->hasIncrblobCur = 1; +} +#endif + +/* +** Set both the "read version" (single byte at byte offset 18) and +** "write version" (single byte at byte offset 19) fields in the database +** header to iVersion. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ + BtShared *pBt = pBtree->pBt; + int rc; /* Return code */ + + assert( iVersion==1 || iVersion==2 ); + + /* If setting the version fields to 1, do not automatically open the + ** WAL connection, even if the version fields are currently set to 2. + */ + pBt->btsFlags &= ~BTS_NO_WAL; + if( iVersion==1 ) pBt->btsFlags |= BTS_NO_WAL; + + rc = sqlite3BtreeBeginTrans(pBtree, 0); + if( rc==SQLITE_OK ){ + u8 *aData = pBt->pPage1->aData; + if( aData[18]!=(u8)iVersion || aData[19]!=(u8)iVersion ){ + rc = sqlite3BtreeBeginTrans(pBtree, 2); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + if( rc==SQLITE_OK ){ + aData[18] = (u8)iVersion; + aData[19] = (u8)iVersion; + } + } + } + } + + pBt->btsFlags &= ~BTS_NO_WAL; + return rc; +} + +/* +** set the mask of hint flags for cursor pCsr. +*/ +SQLITE_API void SQLITE_STDCALL sqlite3BtreeCursorHints(BtCursor *pCsr, unsigned int mask){ + assert( mask==BTREE_BULKLOAD || mask==BTREE_SEEK_EQ || mask==0 ); + pCsr->hints = mask; +} + +#ifdef SQLITE_DEBUG +/* +** Return true if the cursor has a hint specified. This routine is +** only used from within assert() statements +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursorHasHint(BtCursor *pCsr, unsigned int mask){ + return (pCsr->hints & mask)!=0; +} +#endif + +/* +** Return true if the given Btree is read-only. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeIsReadonly(Btree *p){ + return (p->pBt->btsFlags & BTS_READ_ONLY)!=0; +} + +/* +** Return the size of the header added to each page by this module. +*/ +SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); } + +SQLITE_API const char *SQLITE_STDCALL sqlite3BtreeErrName(int rc){ + const char *zName = 0; + int i, origRc = rc; + for(i=0; i<2 && zName==0; i++, rc &= 0xff){ + switch( rc ){ + case SQLITE_OK: zName = "SQLITE_OK"; break; + case SQLITE_ERROR: zName = "SQLITE_ERROR"; break; + case SQLITE_INTERNAL: zName = "SQLITE_INTERNAL"; break; + case SQLITE_PERM: zName = "SQLITE_PERM"; break; + case SQLITE_ABORT: zName = "SQLITE_ABORT"; break; + case SQLITE_ABORT_ROLLBACK: zName = "SQLITE_ABORT_ROLLBACK"; break; + case SQLITE_BUSY: zName = "SQLITE_BUSY"; break; + case SQLITE_BUSY_RECOVERY: zName = "SQLITE_BUSY_RECOVERY"; break; + case SQLITE_BUSY_SNAPSHOT: zName = "SQLITE_BUSY_SNAPSHOT"; break; + case SQLITE_LOCKED: zName = "SQLITE_LOCKED"; break; + case SQLITE_LOCKED_SHAREDCACHE: zName = "SQLITE_LOCKED_SHAREDCACHE";break; + case SQLITE_NOMEM: zName = "SQLITE_NOMEM"; break; + case SQLITE_READONLY: zName = "SQLITE_READONLY"; break; + case SQLITE_READONLY_RECOVERY: zName = "SQLITE_READONLY_RECOVERY"; break; + case SQLITE_READONLY_CANTLOCK: zName = "SQLITE_READONLY_CANTLOCK"; break; + case SQLITE_READONLY_ROLLBACK: zName = "SQLITE_READONLY_ROLLBACK"; break; + case SQLITE_READONLY_DBMOVED: zName = "SQLITE_READONLY_DBMOVED"; break; + case SQLITE_INTERRUPT: zName = "SQLITE_INTERRUPT"; break; + case SQLITE_IOERR: zName = "SQLITE_IOERR"; break; + case SQLITE_IOERR_READ: zName = "SQLITE_IOERR_READ"; break; + case SQLITE_IOERR_SHORT_READ: zName = "SQLITE_IOERR_SHORT_READ"; break; + case SQLITE_IOERR_WRITE: zName = "SQLITE_IOERR_WRITE"; break; + case SQLITE_IOERR_FSYNC: zName = "SQLITE_IOERR_FSYNC"; break; + case SQLITE_IOERR_DIR_FSYNC: zName = "SQLITE_IOERR_DIR_FSYNC"; break; + case SQLITE_IOERR_TRUNCATE: zName = "SQLITE_IOERR_TRUNCATE"; break; + case SQLITE_IOERR_FSTAT: zName = "SQLITE_IOERR_FSTAT"; break; + case SQLITE_IOERR_UNLOCK: zName = "SQLITE_IOERR_UNLOCK"; break; + case SQLITE_IOERR_RDLOCK: zName = "SQLITE_IOERR_RDLOCK"; break; + case SQLITE_IOERR_DELETE: zName = "SQLITE_IOERR_DELETE"; break; + case SQLITE_IOERR_NOMEM: zName = "SQLITE_IOERR_NOMEM"; break; + case SQLITE_IOERR_ACCESS: zName = "SQLITE_IOERR_ACCESS"; break; + case SQLITE_IOERR_CHECKRESERVEDLOCK: + zName = "SQLITE_IOERR_CHECKRESERVEDLOCK"; break; + case SQLITE_IOERR_LOCK: zName = "SQLITE_IOERR_LOCK"; break; + case SQLITE_IOERR_CLOSE: zName = "SQLITE_IOERR_CLOSE"; break; + case SQLITE_IOERR_DIR_CLOSE: zName = "SQLITE_IOERR_DIR_CLOSE"; break; + case SQLITE_IOERR_SHMOPEN: zName = "SQLITE_IOERR_SHMOPEN"; break; + case SQLITE_IOERR_SHMSIZE: zName = "SQLITE_IOERR_SHMSIZE"; break; + case SQLITE_IOERR_SHMLOCK: zName = "SQLITE_IOERR_SHMLOCK"; break; + case SQLITE_IOERR_SHMMAP: zName = "SQLITE_IOERR_SHMMAP"; break; + case SQLITE_IOERR_SEEK: zName = "SQLITE_IOERR_SEEK"; break; + case SQLITE_IOERR_DELETE_NOENT: zName = "SQLITE_IOERR_DELETE_NOENT";break; + case SQLITE_IOERR_MMAP: zName = "SQLITE_IOERR_MMAP"; break; + case SQLITE_IOERR_GETTEMPPATH: zName = "SQLITE_IOERR_GETTEMPPATH"; break; + case SQLITE_IOERR_CONVPATH: zName = "SQLITE_IOERR_CONVPATH"; break; + case SQLITE_CORRUPT: zName = "SQLITE_CORRUPT"; break; + case SQLITE_CORRUPT_VTAB: zName = "SQLITE_CORRUPT_VTAB"; break; + case SQLITE_NOTFOUND: zName = "SQLITE_NOTFOUND"; break; + case SQLITE_FULL: zName = "SQLITE_FULL"; break; + case SQLITE_CANTOPEN: zName = "SQLITE_CANTOPEN"; break; + case SQLITE_CANTOPEN_NOTEMPDIR: zName = "SQLITE_CANTOPEN_NOTEMPDIR";break; + case SQLITE_CANTOPEN_ISDIR: zName = "SQLITE_CANTOPEN_ISDIR"; break; + case SQLITE_CANTOPEN_FULLPATH: zName = "SQLITE_CANTOPEN_FULLPATH"; break; + case SQLITE_CANTOPEN_CONVPATH: zName = "SQLITE_CANTOPEN_CONVPATH"; break; + case SQLITE_PROTOCOL: zName = "SQLITE_PROTOCOL"; break; + case SQLITE_EMPTY: zName = "SQLITE_EMPTY"; break; + case SQLITE_SCHEMA: zName = "SQLITE_SCHEMA"; break; + case SQLITE_TOOBIG: zName = "SQLITE_TOOBIG"; break; + case SQLITE_CONSTRAINT: zName = "SQLITE_CONSTRAINT"; break; + case SQLITE_CONSTRAINT_UNIQUE: zName = "SQLITE_CONSTRAINT_UNIQUE"; break; + case SQLITE_CONSTRAINT_TRIGGER: zName = "SQLITE_CONSTRAINT_TRIGGER";break; + case SQLITE_CONSTRAINT_FOREIGNKEY: + zName = "SQLITE_CONSTRAINT_FOREIGNKEY"; break; + case SQLITE_CONSTRAINT_CHECK: zName = "SQLITE_CONSTRAINT_CHECK"; break; + case SQLITE_CONSTRAINT_PRIMARYKEY: + zName = "SQLITE_CONSTRAINT_PRIMARYKEY";break; + case SQLITE_CONSTRAINT_NOTNULL: zName = "SQLITE_CONSTRAINT_NOTNULL";break; + case SQLITE_CONSTRAINT_COMMITHOOK: + zName = "SQLITE_CONSTRAINT_COMMITHOOK";break; + case SQLITE_CONSTRAINT_VTAB: zName = "SQLITE_CONSTRAINT_VTAB"; break; + case SQLITE_CONSTRAINT_FUNCTION: + zName = "SQLITE_CONSTRAINT_FUNCTION";break; + case SQLITE_CONSTRAINT_ROWID: zName = "SQLITE_CONSTRAINT_ROWID"; break; + case SQLITE_MISMATCH: zName = "SQLITE_MISMATCH"; break; + case SQLITE_MISUSE: zName = "SQLITE_MISUSE"; break; + case SQLITE_NOLFS: zName = "SQLITE_NOLFS"; break; + case SQLITE_AUTH: zName = "SQLITE_AUTH"; break; + case SQLITE_FORMAT: zName = "SQLITE_FORMAT"; break; + case SQLITE_RANGE: zName = "SQLITE_RANGE"; break; + case SQLITE_NOTADB: zName = "SQLITE_NOTADB"; break; + case SQLITE_ROW: zName = "SQLITE_ROW"; break; + case SQLITE_NOTICE: zName = "SQLITE_NOTICE"; break; + case SQLITE_NOTICE_RECOVER_WAL: zName = "SQLITE_NOTICE_RECOVER_WAL";break; + case SQLITE_NOTICE_RECOVER_ROLLBACK: + zName = "SQLITE_NOTICE_RECOVER_ROLLBACK"; break; + case SQLITE_WARNING: zName = "SQLITE_WARNING"; break; + case SQLITE_WARNING_AUTOINDEX: zName = "SQLITE_WARNING_AUTOINDEX"; break; + case SQLITE_DONE: zName = "SQLITE_DONE"; break; + } + } + if( zName==0 ){ + static char zBuf[50]; + sqlite3_snprintf(sizeof(zBuf), zBuf, "SQLITE_UNKNOWN(%d)", origRc); + zName = zBuf; + } + return zName; +} + +/* +** The following functions: +** +** sqlite3BtreeSerialType() +** sqlite3BtreeSerialTypeLen() +** sqlite3BtreeSerialLen() +** sqlite3BtreeSerialPut() +** sqlite3BtreeSerialGet() +** +** encapsulate the code that serializes values for storage in SQLite +** data and index records. Each serialized value consists of a +** 'serial-type' and a blob of data. The serial type is an 8-byte unsigned +** integer, stored as a varint. +** +** In an SQLite index record, the serial type is stored directly before +** the blob of data that it corresponds to. In a table record, all serial +** types are stored at the start of the record, and the blobs of data at +** the end. Hence these functions allow the caller to handle the +** serial-type and data blob separately. +** +** The following table describes the various storage classes for data: +** +** serial type bytes of data type +** -------------- --------------- --------------- +** 0 0 NULL +** 1 1 signed integer +** 2 2 signed integer +** 3 3 signed integer +** 4 4 signed integer +** 5 6 signed integer +** 6 8 signed integer +** 7 8 IEEE float +** 8 0 Integer constant 0 +** 9 0 Integer constant 1 +** 10,11 reserved for expansion +** N>=12 and even (N-12)/2 BLOB +** N>=13 and odd (N-13)/2 text +** +** The 8 and 9 types were added in 3.3.0, file format 4. Prior versions +** of SQLite will not understand those serial types. +*/ + +/* +** Return the serial-type for the value stored in pMem. +*/ +SQLITE_API u32 SQLITE_STDCALL sqlite3BtreeSerialType(Mem *pMem, int file_format){ + int flags = pMem->flags; + u32 n; + + if( flags&MEM_Null ){ + return 0; + } + if( flags&MEM_Int ){ + /* Figure out whether to use 1, 2, 4, 6 or 8 bytes. */ +# define MAX_6BYTE ((((i64)0x00008000)<<32)-1) + i64 i = pMem->u.i; + u64 u; + if( i<0 ){ + u = ~i; + }else{ + u = i; + } + if( u<=127 ){ + return ((i&1)==i && file_format>=4) ? 8+(u32)u : 1; + } + if( u<=32767 ) return 2; + if( u<=8388607 ) return 3; + if( u<=2147483647 ) return 4; + if( u<=MAX_6BYTE ) return 5; + return 6; + } + if( flags&MEM_Real ){ + return 7; + } + assert( pMem->pBtree->mallocFailed || flags&(MEM_Str|MEM_Blob) ); + assert( pMem->n>=0 ); + n = (u32)pMem->n; + if( flags & MEM_Zero ){ + n += pMem->u.nZero; + } + return ((n*2) + 12 + ((flags&MEM_Str)!=0)); +} + +/* +** The sizes for serial types less than 12 +*/ +static const u8 sqlite3SmallTypeSizes[] = { + 0, 1, 2, 3, 4, 6, 8, 8, 0, 0, 0, 0 +}; + +/* +** Return the length of the data corresponding to the supplied serial-type. +*/ +SQLITE_API u32 SQLITE_STDCALL sqlite3BtreeSerialTypeLen(u32 serial_type){ + if( serial_type>=12 ){ + return (serial_type-12)/2; + }else{ + return sqlite3SmallTypeSizes[serial_type]; + } +} + +/* +** If we are on an architecture with mixed-endian floating +** points (ex: ARM7) then swap the lower 4 bytes with the +** upper 4 bytes. Return the result. +** +** For most architectures, this is a no-op. +** +** (later): It is reported to me that the mixed-endian problem +** on ARM7 is an issue with GCC, not with the ARM7 chip. It seems +** that early versions of GCC stored the two words of a 64-bit +** float in the wrong order. And that error has been propagated +** ever since. The blame is not necessarily with GCC, though. +** GCC might have just copying the problem from a prior compiler. +** I am also told that newer versions of GCC that follow a different +** ABI get the byte order right. +** +** Developers using SQLite on an ARM7 should compile and run their +** application using -DSQLITE_DEBUG=1 at least once. With DEBUG +** enabled, some asserts below will ensure that the byte order of +** floating point values is correct. +** +** (2007-08-30) Frank van Vugt has studied this problem closely +** and has send his findings to the SQLite developers. Frank +** writes that some Linux kernels offer floating point hardware +** emulation that uses only 32-bit mantissas instead of a full +** 48-bits as required by the IEEE standard. (This is the +** CONFIG_FPE_FASTFPE option.) On such systems, floating point +** byte swapping becomes very complicated. To avoid problems, +** the necessary byte swapping is carried out using a 64-bit integer +** rather than a 64-bit float. Frank assures us that the code here +** works for him. We, the developers, have no way to independently +** verify this, but Frank seems to know what he is talking about +** so we trust him. +*/ +#ifdef SQLITE_MIXED_ENDIAN_64BIT_FLOAT +static u64 floatSwap(u64 in){ + union { + u64 r; + u32 i[2]; + } u; + u32 t; + + u.r = in; + t = u.i[0]; + u.i[0] = u.i[1]; + u.i[1] = t; + return u.r; +} +# define swapMixedEndianFloat(X) X = floatSwap(X) +#else +# define swapMixedEndianFloat(X) +#endif + +/* +** Write the serialized data blob for the value stored in pMem into +** buf. It is assumed that the caller has allocated sufficient space. +** Return the number of bytes written. +** +** nBuf is the amount of space left in buf[]. The caller is responsible +** for allocating enough space to buf[] to hold the entire field, exclusive +** of the pMem->u.nZero bytes for a MEM_Zero value. +** +** Return the number of bytes actually written into buf[]. The number +** of bytes in the zero-filled tail is included in the return value only +** if those bytes were zeroed in buf[]. +*/ +SQLITE_API u32 SQLITE_STDCALL sqlite3BtreeSerialPut(u8 *buf, Mem *pMem, u32 serial_type){ + u32 len; + + /* Integer and Real */ + if( serial_type<=7 && serial_type>0 ){ + u64 v; + u32 i; + if( serial_type==7 ){ + assert( sizeof(v)==sizeof(pMem->u.r) ); + memcpy(&v, &pMem->u.r, sizeof(v)); + swapMixedEndianFloat(v); + }else{ + v = pMem->u.i; + } + len = i = sqlite3SmallTypeSizes[serial_type]; + assert( i>0 ); + do{ + buf[--i] = (u8)(v&0xFF); + v >>= 8; + }while( i ); + return len; + } + + /* String or blob */ + if( serial_type>=12 ){ + assert( pMem->n + ((pMem->flags & MEM_Zero)?pMem->u.nZero:0) + == (int)sqlite3BtreeSerialTypeLen(serial_type) ); + len = pMem->n; + memcpy(buf, pMem->z, len); + return len; + } + + /* NULL or constants 0 or 1 */ + return 0; +} + +/* Input "x" is a sequence of unsigned characters that represent a +** big-endian integer. Return the equivalent native integer +*/ +#define ONE_BYTE_INT(x) ((i8)(x)[0]) +#define TWO_BYTE_INT(x) (256*(i8)((x)[0])|(x)[1]) +#define THREE_BYTE_INT(x) (65536*(i8)((x)[0])|((x)[1]<<8)|(x)[2]) +#define FOUR_BYTE_UINT(x) (((u32)(x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3]) +#define FOUR_BYTE_INT(x) (16777216*(i8)((x)[0])|((x)[1]<<16)|((x)[2]<<8)|(x)[3]) + +/* +** Size of struct Mem not including the Mem.zMalloc member or anything that +** follows. +*/ +#define MEMCELLSIZE offsetof(Mem,zMalloc) + +#define VdbeMemDynamic(X) \ + (((X)->flags&MEM_Dyn)!=0) + +/* +** Initialize bulk memory to be a consistent Mem object. +** +** The minimum amount of initialization feasible is performed. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemInit(Mem *pMem, Btree *pBtree, u16 flags){ + assert( (flags & ~MEM_TypeMask)==0 ); + pMem->flags = flags; + pMem->pBtree = pBtree; + pMem->szMalloc = 0; +} + +/* +** If pMem is an object with a valid string representation, this routine +** ensures the internal encoding for the string representation is +** 'desiredEnc', one of SQLITE_UTF8, SQLITE_UTF16LE or SQLITE_UTF16BE. +** +** If pMem is not a string object, or the encoding of the string +** representation is already stored using the requested encoding, then this +** routine is a no-op. +** +** SQLITE_OK is returned if the conversion is successful (or not required). +** SQLITE_NOMEM may be returned if a malloc() fails during conversion +** between formats. +*/ +SQLITE_PRIVATE int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){ + assert( (pMem->flags&MEM_RowSet)==0 ); + assert( desiredEnc==SQLITE_UTF8 || desiredEnc==SQLITE_UTF16LE + || desiredEnc==SQLITE_UTF16BE ); + if( !(pMem->flags&MEM_Str) || pMem->enc==desiredEnc ){ + return SQLITE_OK; + } + assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); + return SQLITE_ERROR; +} + +/* +** If the memory cell contains a value that must be freed by +** invoking the external callback in Mem.xDel, then this routine +** will free that value. It also sets Mem.flags to MEM_Null. +** +** This is a helper routine for sqlite3VdbeMemSetNull() and +** for sqlite3VdbeMemRelease(). Use those other routines as the +** entry point for releasing Mem resources. +*/ +static SQLITE_NOINLINE void vdbeMemClearExternAndSetNull(Mem *p){ + assert( p->db==0 || sqlite3_mutex_held(p->db->mutex) ); + assert( VdbeMemDynamic(p) ); + if( p->flags&MEM_Dyn ){ + assert( p->xDel!=SQLITE_DYNAMIC && p->xDel!=0 ); + p->xDel((void *)p->z); + } + p->flags = MEM_Null; +} + +/* +** Delete any previous value and set the value stored in *pMem to NULL. +** +** This routine calls the Mem.xDel destructor to dispose of values that +** require the destructor. But it preserves the Mem.zMalloc memory allocation. +** To free all resources, use sqlite3VdbeMemRelease(), which both calls this +** routine to invoke the destructor and deallocates Mem.zMalloc. +** +** Use this routine to reset the Mem prior to insert a new value. +** +** Use sqlite3VdbeMemRelease() to complete erase the Mem prior to abandoning it. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemSetNull(Mem *pMem){ + if( VdbeMemDynamic(pMem) ){ + vdbeMemClearExternAndSetNull(pMem); + }else{ + pMem->flags = MEM_Null; + } +} + +/* +** Make sure pMem->z points to a writable allocation of at least +** min(n,32) bytes. +** +** If the bPreserve argument is true, then copy of the content of +** pMem->z into the new allocation. pMem must be either a string or +** blob if bPreserve is true. If bPreserve is false, any prior content +** in pMem->z is discarded. +*/ +SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3VdbeMemGrow(Mem *pMem, int n, int bPreserve){ + assert( sqlite3VdbeCheckMemInvariants(pMem) ); + assert( (pMem->flags&MEM_RowSet)==0 ); + + /* If the bPreserve flag is set to true, then the memory cell must already + ** contain a valid string or blob value. */ + assert( bPreserve==0 || pMem->flags&(MEM_Blob|MEM_Str) ); + testcase( bPreserve && pMem->z==0 ); + + assert( pMem->szMalloc==0 + || pMem->szMalloc==sqlite3DbMallocSize(pMem->pBtree, pMem->zMalloc) ); + if( pMem->szMalloc<n ){ + if( n<32 ) n = 32; + if( bPreserve && pMem->szMalloc>0 && pMem->z==pMem->zMalloc ){ + pMem->z = pMem->zMalloc = sqlite3DbReallocOrFree(pMem->pBtree, pMem->z, n); + bPreserve = 0; + }else{ + if( pMem->szMalloc>0 ) sqlite3DbFree(pMem->pBtree, pMem->zMalloc); + pMem->zMalloc = sqlite3DbMallocRaw(pMem->pBtree, n); + } + if( pMem->zMalloc==0 ){ + sqlite3VdbeMemSetNull(pMem); + pMem->z = 0; + pMem->szMalloc = 0; + return SQLITE_NOMEM; + }else{ + pMem->szMalloc = sqlite3DbMallocSize(pMem->pBtree, pMem->zMalloc); + } + } + + if( bPreserve && pMem->z && pMem->z!=pMem->zMalloc ){ + memcpy(pMem->zMalloc, pMem->z, pMem->n); + } + if( (pMem->flags&MEM_Dyn)!=0 ){ + assert( pMem->xDel!=0 && pMem->xDel!=SQLITE_DYNAMIC ); + pMem->xDel((void *)(pMem->z)); + } + + pMem->z = pMem->zMalloc; + pMem->flags &= ~(MEM_Dyn|MEM_Ephem|MEM_Static); + return SQLITE_OK; +} + +/* +** Change the pMem->zMalloc allocation to be at least szNew bytes. +** If pMem->zMalloc already meets or exceeds the requested size, this +** routine is a no-op. +** +** Any prior string or blob content in the pMem object may be discarded. +** The pMem->xDel destructor is called, if it exists. Though MEM_Str +** and MEM_Blob values may be discarded, MEM_Int, MEM_Real, and MEM_Null +** values are preserved. +** +** Return SQLITE_OK on success or an error code (probably SQLITE_NOMEM) +** if unable to complete the resizing. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemClearAndResize(Mem *pMem, int szNew){ + assert( szNew>0 ); + assert( (pMem->flags & MEM_Dyn)==0 || pMem->szMalloc==0 ); + if( pMem->szMalloc<szNew ){ + return sqlite3VdbeMemGrow(pMem, szNew, 0); + } + assert( (pMem->flags & MEM_Dyn)==0 ); + pMem->z = pMem->zMalloc; + pMem->flags &= (MEM_Null|MEM_Int|MEM_Real); + return SQLITE_OK; +} + +#ifndef SQLITE_OMIT_INCRBLOB + int sqlite3VdbeMemExpandBlob(Mem *); + #define ExpandBlob(P) (((P)->flags&MEM_Zero)?sqlite3VdbeMemExpandBlob(P):0) +#else + #define sqlite3VdbeMemExpandBlob(x) SQLITE_OK + #define ExpandBlob(P) SQLITE_OK +#endif + +/* +** Change pMem so that its MEM_Str or MEM_Blob value is stored in +** MEM.zMalloc, where it can be safely written. +** +** Return SQLITE_OK on success or SQLITE_NOMEM if malloc fails. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemMakeWriteable(Mem *pMem){ + int f; + assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); + assert( (pMem->flags&MEM_RowSet)==0 ); + ExpandBlob(pMem); + f = pMem->flags; + if( (f&(MEM_Str|MEM_Blob)) && (pMem->szMalloc==0 || pMem->z!=pMem->zMalloc) ){ + if( sqlite3VdbeMemGrow(pMem, pMem->n + 2, 1) ){ + return SQLITE_NOMEM; + } + pMem->z[pMem->n] = 0; + pMem->z[pMem->n+1] = 0; + pMem->flags |= MEM_Term; + } + pMem->flags &= ~MEM_Ephem; +#ifdef SQLITE_DEBUG + pMem->pScopyFrom = 0; +#endif + + return SQLITE_OK; +} + +/* +** It is already known that pMem contains an unterminated string. +** Add the zero terminator. +*/ +static SQLITE_NOINLINE int vdbeMemAddTerminator(Mem *pMem){ + if( sqlite3VdbeMemGrow(pMem, pMem->n+2, 1) ){ + return SQLITE_NOMEM; + } + pMem->z[pMem->n] = 0; + pMem->z[pMem->n+1] = 0; + pMem->flags |= MEM_Term; + return SQLITE_OK; +} + +/* +** Make sure the given Mem is \u0000 terminated. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemNulTerminate(Mem *pMem){ + assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); + testcase( (pMem->flags & (MEM_Term|MEM_Str))==(MEM_Term|MEM_Str) ); + testcase( (pMem->flags & (MEM_Term|MEM_Str))==0 ); + if( (pMem->flags & (MEM_Term|MEM_Str))!=MEM_Str ){ + return SQLITE_OK; /* Nothing to do */ + }else{ + return vdbeMemAddTerminator(pMem); + } +} + +/* +** Add MEM_Str to the set of representations for the given Mem. Numbers +** are converted using sqlite3_snprintf(). Converting a BLOB to a string +** is a no-op. +** +** Existing representations MEM_Int and MEM_Real are invalidated if +** bForce is true but are retained if bForce is false. +** +** A MEM_Null value will never be passed to this function. This function is +** used for converting values to text for returning to the user (i.e. via +** sqlite3_value_text()), or for ensuring that values to be used as btree +** keys are strings. In the former case a NULL pointer is returned the +** user and the latter is an internal programming error. +*/ +SQLITE_PRIVATE int sqlite3VdbeMemStringify(Mem *pMem, u8 enc, u8 bForce){ + int fg = pMem->flags; + const int nByte = 32; + + assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); + assert( !(fg&MEM_Zero) ); + assert( !(fg&(MEM_Str|MEM_Blob)) ); + assert( fg&(MEM_Int|MEM_Real) ); + assert( (pMem->flags&MEM_RowSet)==0 ); + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + + + if( sqlite3VdbeMemClearAndResize(pMem, nByte) ){ + return SQLITE_NOMEM; + } + + /* For a Real or Integer, use sqlite3_snprintf() to produce the UTF-8 + ** string representation of the value. Then, if the required encoding + ** is UTF-16le or UTF-16be do a translation. + ** + ** FIX ME: It would be better if sqlite3_snprintf() could do UTF-16. + */ + if( fg & MEM_Int ){ + sqlite3_snprintf(nByte, pMem->z, "%lld", pMem->u.i); + }else{ + assert( fg & MEM_Real ); + sqlite3_snprintf(nByte, pMem->z, "%!.15g", pMem->u.r); + } + pMem->n = sqlite3Strlen30(pMem->z); + pMem->enc = SQLITE_UTF8; + pMem->flags |= MEM_Str|MEM_Term; + if( bForce ) pMem->flags &= ~(MEM_Int|MEM_Real); + sqlite3VdbeChangeEncoding(pMem, enc); + return SQLITE_OK; +} + +/* +** Release memory held by the Mem p, both external memory cleared +** by p->xDel and memory in p->zMalloc. +** +** This is a helper routine invoked by sqlite3VdbeMemRelease() in +** the unusual case where there really is memory in p that needs +** to be freed. +*/ +static SQLITE_NOINLINE void vdbeMemClear(Mem *p){ + if( VdbeMemDynamic(p) ){ + vdbeMemClearExternAndSetNull(p); + } + if( p->szMalloc ){ + sqlite3DbFree(p->pBtree, p->zMalloc); + p->szMalloc = 0; + } + p->z = 0; +} + +/* +** Release any memory resources held by the Mem. Both the memory that is +** free by Mem.xDel and the Mem.zMalloc allocation are freed. +** +** Use this routine prior to clean up prior to abandoning a Mem, or to +** reset a Mem back to its minimum memory utilization. +** +** Use sqlite3VdbeMemSetNull() to release just the Mem.xDel space +** prior to inserting new content into the Mem. +*/ +SQLITE_PRIVATE void sqlite3VdbeMemRelease(Mem *p){ + assert( sqlite3VdbeCheckMemInvariants(p) ); + if( VdbeMemDynamic(p) || p->szMalloc ){ + vdbeMemClear(p); + } +} + +/* +** Make an shallow copy of pFrom into pTo. Prior contents of +** pTo are freed. The pFrom->z field is not duplicated. If +** pFrom->z is used, then pTo->z points to the same thing as pFrom->z +** and flags gets srcType (either MEM_Ephem or MEM_Static). +*/ +static void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType); +static SQLITE_NOINLINE void vdbeClrCopy(Mem *pTo, const Mem *pFrom, int eType){ + vdbeMemClearExternAndSetNull(pTo); + assert( !VdbeMemDynamic(pTo) ); + sqlite3VdbeMemShallowCopy(pTo, pFrom, eType); +} +static void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType){ + assert( (pFrom->flags & MEM_RowSet)==0 ); + assert( pTo->pBtree==pFrom->pBtree ); + if( VdbeMemDynamic(pTo) ){ vdbeClrCopy(pTo,pFrom,srcType); return; } + memcpy(pTo, pFrom, MEMCELLSIZE); + if( (pFrom->flags&MEM_Static)==0 ){ + pTo->flags &= ~(MEM_Dyn|MEM_Static|MEM_Ephem); + assert( srcType==MEM_Ephem || srcType==MEM_Static ); + pTo->flags |= srcType; + } +} + +/* +** Deserialize the data blob pointed to by buf as serial type serial_type +** and store the result in pMem. Return the number of bytes read. +** +** This function is implemented as two separate routines for performance. +** The few cases that require local variables are broken out into a separate +** routine so that in most cases the overhead of moving the stack pointer +** is avoided. +*/ +static u32 SQLITE_NOINLINE serialGet( + const unsigned char *buf, /* Buffer to deserialize from */ + u32 serial_type, /* Serial type to deserialize */ + Mem *pMem /* Memory cell to write value into */ +){ + u64 x = FOUR_BYTE_UINT(buf); + u32 y = FOUR_BYTE_UINT(buf+4); + x = (x<<32) + y; + if( serial_type==6 ){ + /* EVIDENCE-OF: R-29851-52272 Value is a big-endian 64-bit + ** twos-complement integer. */ + pMem->u.i = *(i64*)&x; + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + }else{ + /* EVIDENCE-OF: R-57343-49114 Value is a big-endian IEEE 754-2008 64-bit + ** floating point number. */ +#if !defined(NDEBUG) && !defined(SQLITE_OMIT_FLOATING_POINT) + /* Verify that integers and floating point values use the same + ** byte order. Or, that if SQLITE_MIXED_ENDIAN_64BIT_FLOAT is + ** defined that 64-bit floating point values really are mixed + ** endian. + */ + static const u64 t1 = ((u64)0x3ff00000)<<32; + static const double r1 = 1.0; + u64 t2 = t1; + swapMixedEndianFloat(t2); + assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 ); +#endif + assert( sizeof(x)==8 && sizeof(pMem->u.r)==8 ); + swapMixedEndianFloat(x); + memcpy(&pMem->u.r, &x, sizeof(x)); + pMem->flags = sqlite3IsNaN(pMem->u.r) ? MEM_Null : MEM_Real; + } + return 8; +} +SQLITE_API u32 SQLITE_STDCALL sqlite3BtreeSerialGet( + const unsigned char *buf, /* Buffer to deserialize from */ + u32 serial_type, /* Serial type to deserialize */ + Mem *pMem /* Memory cell to write value into */ +){ + switch( serial_type ){ + case 10: /* Reserved for future use */ + case 11: /* Reserved for future use */ + case 0: { /* Null */ + /* EVIDENCE-OF: R-24078-09375 Value is a NULL. */ + pMem->flags = MEM_Null; + break; + } + case 1: { + /* EVIDENCE-OF: R-44885-25196 Value is an 8-bit twos-complement + ** integer. */ + pMem->u.i = ONE_BYTE_INT(buf); + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + return 1; + } + case 2: { /* 2-byte signed integer */ + /* EVIDENCE-OF: R-49794-35026 Value is a big-endian 16-bit + ** twos-complement integer. */ + pMem->u.i = TWO_BYTE_INT(buf); + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + return 2; + } + case 3: { /* 3-byte signed integer */ + /* EVIDENCE-OF: R-37839-54301 Value is a big-endian 24-bit + ** twos-complement integer. */ + pMem->u.i = THREE_BYTE_INT(buf); + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + return 3; + } + case 4: { /* 4-byte signed integer */ + /* EVIDENCE-OF: R-01849-26079 Value is a big-endian 32-bit + ** twos-complement integer. */ + pMem->u.i = FOUR_BYTE_INT(buf); + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + return 4; + } + case 5: { /* 6-byte signed integer */ + /* EVIDENCE-OF: R-50385-09674 Value is a big-endian 48-bit + ** twos-complement integer. */ + pMem->u.i = FOUR_BYTE_UINT(buf+2) + (((i64)1)<<32)*TWO_BYTE_INT(buf); + pMem->flags = MEM_Int; + testcase( pMem->u.i<0 ); + return 6; + } + case 6: /* 8-byte signed integer */ + case 7: { /* IEEE floating point */ + /* These use local variables, so do them in a separate routine + ** to avoid having to move the frame pointer in the common case */ + return serialGet(buf,serial_type,pMem); + } + case 8: /* Integer 0 */ + case 9: { /* Integer 1 */ + /* EVIDENCE-OF: R-12976-22893 Value is the integer 0. */ + /* EVIDENCE-OF: R-18143-12121 Value is the integer 1. */ + pMem->u.i = serial_type-8; + pMem->flags = MEM_Int; + return 0; + } + default: { + /* EVIDENCE-OF: R-14606-31564 Value is a BLOB that is (N-12)/2 bytes in + ** length. + ** EVIDENCE-OF: R-28401-00140 Value is a string in the text encoding and + ** (N-13)/2 bytes in length. */ + static const u16 aFlag[] = { MEM_Blob|MEM_Ephem, MEM_Str|MEM_Ephem }; + pMem->z = (char *)buf; + pMem->n = (serial_type-12)/2; + pMem->flags = aFlag[serial_type&1]; + return pMem->n; + } + } + return 0; +} + +/* +** This routine is used to allocate sufficient space for an UnpackedRecord +** structure large enough to be used with sqlite3VdbeRecordUnpack() if +** the first argument is a pointer to KeyInfo structure pKeyInfo. +** +** The space is either allocated using sqlite3DbMallocRaw() or from within +** the unaligned buffer passed via the second and third arguments (presumably +** stack space). If the former, then *ppFree is set to a pointer that should +** be eventually freed by the caller using sqlite3DbFree(). Or, if the +** allocation comes from the pSpace/szSpace buffer, *ppFree is set to NULL +** before returning. +** +** If an OOM error occurs, NULL is returned. +*/ +SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord( + KeyInfo *pKeyInfo, /* Description of the record */ + char *pSpace, /* Unaligned space available */ + int szSpace, /* Size of pSpace[] in bytes */ + char **ppFree /* OUT: Caller should free this pointer */ +){ + UnpackedRecord *p; /* Unpacked record to return */ + int nOff; /* Increment pSpace by nOff to align it */ + int nByte; /* Number of bytes required for *p */ + + /* We want to shift the pointer pSpace up such that it is 8-byte aligned. + ** Thus, we need to calculate a value, nOff, between 0 and 7, to shift + ** it by. If pSpace is already 8-byte aligned, nOff should be zero. + */ + nOff = (8 - (SQLITE_PTR_TO_INT(pSpace) & 7)) & 7; + nByte = ROUND8(sizeof(UnpackedRecord)) + sizeof(Mem)*(pKeyInfo->nField+1); + if( nByte>szSpace+nOff ){ + p = (UnpackedRecord *)sqlite3DbMallocRaw(pKeyInfo->pBtree, nByte); + *ppFree = (char *)p; + if( !p ) return 0; + }else{ + p = (UnpackedRecord*)&pSpace[nOff]; + *ppFree = 0; + } + + p->aMem = (Mem*)&((char*)p)[ROUND8(sizeof(UnpackedRecord))]; + assert( pKeyInfo->aSortOrder!=0 ); + p->pKeyInfo = pKeyInfo; + p->nField = pKeyInfo->nField + 1; + return p; +} + +/* +** Given the nKey-byte encoding of a record in pKey[], populate the +** UnpackedRecord structure indicated by the fourth argument with the +** contents of the decoded record. +*/ +SQLITE_PRIVATE void sqlite3VdbeRecordUnpack( + KeyInfo *pKeyInfo, /* Information about the record format */ + int nKey, /* Size of the binary record */ + const void *pKey, /* The binary record */ + UnpackedRecord *p /* Populate this structure before returning. */ +){ + const unsigned char *aKey = (const unsigned char *)pKey; + int d; + u32 idx; /* Offset in aKey[] to read from */ + u16 u; /* Unsigned loop counter */ + u32 szHdr; + Mem *pMem = p->aMem; + + p->default_rc = 0; + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + idx = getVarint32(aKey, szHdr); + d = szHdr; + u = 0; + while( idx<szHdr && d<=nKey ){ + u32 serial_type; + + idx += getVarint32(&aKey[idx], serial_type); + pMem->enc = pKeyInfo->enc; + pMem->pBtree = pKeyInfo->pBtree; + /* pMem->flags = 0; // sqlite3BtreeSerialGet() will set this for us */ + pMem->szMalloc = 0; + d += sqlite3BtreeSerialGet(&aKey[d], serial_type, pMem); + pMem++; + if( (++u)>=p->nField ) break; + } + assert( u<=pKeyInfo->nField + 1 ); + p->nField = u; +} + +#if SQLITE_DEBUG +/* +** Count the number of fields (a.k.a. columns) in the record given by +** pKey,nKey. The verify that this count is less than or equal to the +** limit given by pKeyInfo->nField + pKeyInfo->nXField. +** +** If this constraint is not satisfied, it means that the high-speed +** vdbeRecordCompareInt() and vdbeRecordCompareString() routines will +** not work correctly. If this assert() ever fires, it probably means +** that the KeyInfo.nField or KeyInfo.nXField values were computed +** incorrectly. +*/ +static void vdbeAssertFieldCountWithinLimits( + int nKey, const void *pKey, /* The record to verify */ + const KeyInfo *pKeyInfo /* Compare size with this KeyInfo */ +){ + int nField = 0; + u32 szHdr; + u32 idx; + u32 notUsed; + const unsigned char *aKey = (const unsigned char*)pKey; + + if( CORRUPT_DB ) return; + idx = getVarint32(aKey, szHdr); + assert( nKey>=0 ); + assert( szHdr<=(u32)nKey ); + while( idx<szHdr ){ + idx += getVarint32(aKey+idx, notUsed); + nField++; + } + assert( nField <= pKeyInfo->nField+pKeyInfo->nXField ); +} +#else +# define vdbeAssertFieldCountWithinLimits(A,B,C) +#endif + +/* +** This function is an optimized version of sqlite3BtreeRecordCompare() +** that (a) the first field of pPKey2 is an integer, and (b) the +** size-of-header varint at the start of (pKey1/nKey1) fits in a single +** byte (i.e. is less than 128). +** +** To avoid concerns about buffer overreads, this routine is only used +** on schemas where the maximum valid header size is 63 bytes or less. +*/ +static int vdbeRecordCompareInt( + int nKey1, const void *pKey1, /* Left key */ + UnpackedRecord *pPKey2 /* Right key */ +){ + const u8 *aKey = &((const u8*)pKey1)[*(const u8*)pKey1 & 0x3F]; + int serial_type = ((const u8*)pKey1)[1]; + int res; + u32 y; + u64 x; + i64 v = pPKey2->aMem[0].u.i; + i64 lhs; + + vdbeAssertFieldCountWithinLimits(nKey1, pKey1, pPKey2->pKeyInfo); + assert( (*(u8*)pKey1)<=0x3F || CORRUPT_DB ); + switch( serial_type ){ + case 1: { /* 1-byte signed integer */ + lhs = ONE_BYTE_INT(aKey); + testcase( lhs<0 ); + break; + } + case 2: { /* 2-byte signed integer */ + lhs = TWO_BYTE_INT(aKey); + testcase( lhs<0 ); + break; + } + case 3: { /* 3-byte signed integer */ + lhs = THREE_BYTE_INT(aKey); + testcase( lhs<0 ); + break; + } + case 4: { /* 4-byte signed integer */ + y = FOUR_BYTE_UINT(aKey); + lhs = (i64)*(int*)&y; + testcase( lhs<0 ); + break; + } + case 5: { /* 6-byte signed integer */ + lhs = FOUR_BYTE_UINT(aKey+2) + (((i64)1)<<32)*TWO_BYTE_INT(aKey); + testcase( lhs<0 ); + break; + } + case 6: { /* 8-byte signed integer */ + x = FOUR_BYTE_UINT(aKey); + x = (x<<32) | FOUR_BYTE_UINT(aKey+4); + lhs = *(i64*)&x; + testcase( lhs<0 ); + break; + } + case 8: + lhs = 0; + break; + case 9: + lhs = 1; + break; + + /* This case could be removed without changing the results of running + ** this code. Including it causes gcc to generate a faster switch + ** statement (since the range of switch targets now starts at zero and + ** is contiguous) but does not cause any duplicate code to be generated + ** (as gcc is clever enough to combine the two like cases). Other + ** compilers might be similar. */ + case 0: case 7: + return sqlite3BtreeRecordCompare(nKey1, pKey1, pPKey2); + + default: + return sqlite3BtreeRecordCompare(nKey1, pKey1, pPKey2); + } + + if( v>lhs ){ + res = pPKey2->r1; + }else if( v<lhs ){ + res = pPKey2->r2; + }else if( pPKey2->nField>1 ){ + /* The first fields of the two keys are equal. Compare the trailing + ** fields. */ + res = sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1); + }else{ + /* The first fields of the two keys are equal and there are no trailing + ** fields. Return pPKey2->default_rc in this case. */ + res = pPKey2->default_rc; + } + + assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) ); + return res; +} + +/* +** This function is an optimized version of sqlite3BtreeRecordCompare() +** that (a) the first field of pPKey2 is a string, that (b) the first field +** uses the collation sequence BINARY and (c) that the size-of-header varint +** at the start of (pKey1/nKey1) fits in a single byte. +*/ +static int vdbeRecordCompareString( + int nKey1, const void *pKey1, /* Left key */ + UnpackedRecord *pPKey2 /* Right key */ +){ + const u8 *aKey1 = (const u8*)pKey1; + int serial_type; + int res; + + vdbeAssertFieldCountWithinLimits(nKey1, pKey1, pPKey2->pKeyInfo); + getVarint32(&aKey1[1], serial_type); + if( serial_type<12 ){ + res = pPKey2->r1; /* (pKey1/nKey1) is a number or a null */ + }else if( !(serial_type & 0x01) ){ + res = pPKey2->r2; /* (pKey1/nKey1) is a blob */ + }else{ + int nCmp; + int nStr; + int szHdr = aKey1[0]; + + nStr = (serial_type-12) / 2; + if( (szHdr + nStr) > nKey1 ){ + pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ + } + nCmp = MIN( pPKey2->aMem[0].n, nStr ); + res = memcmp(&aKey1[szHdr], pPKey2->aMem[0].z, nCmp); + + if( res==0 ){ + res = nStr - pPKey2->aMem[0].n; + if( res==0 ){ + if( pPKey2->nField>1 ){ + res = sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1); + }else{ + res = pPKey2->default_rc; + } + }else if( res>0 ){ + res = pPKey2->r2; + }else{ + res = pPKey2->r1; + } + }else if( res>0 ){ + res = pPKey2->r2; + }else{ + res = pPKey2->r1; + } + } + + assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) + || CORRUPT_DB + || pPKey2->pKeyInfo->pBtree->mallocFailed + ); + return res; +} + +/* +** If the given Mem* has a zero-filled tail, turn it into an ordinary +** blob stored in dynamically allocated space. +*/ +#ifndef SQLITE_OMIT_INCRBLOB +SQLITE_PRIVATE int sqlite3VdbeMemExpandBlob(Mem *pMem){ + if( pMem->flags & MEM_Zero ){ + int nByte; + assert( pMem->flags&MEM_Blob ); + assert( (pMem->flags&MEM_RowSet)==0 ); + assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); + + /* Set nByte to the number of bytes required to store the expanded blob. */ + nByte = pMem->n + pMem->u.nZero; + if( nByte<=0 ){ + nByte = 1; + } + if( sqlite3VdbeMemGrow(pMem, nByte, 1) ){ + return SQLITE_NOMEM; + } + + memset(&pMem->z[pMem->n], 0, pMem->u.nZero); + pMem->n += pMem->u.nZero; + pMem->flags &= ~MEM_Zero; + } + return SQLITE_OK; +} +#endif + +/* +** Convert a 64-bit IEEE double into a 64-bit signed integer. +** If the double is out of range of a 64-bit signed integer then +** return the closest available 64-bit signed integer. +*/ +static i64 doubleToInt64(double r){ +#ifdef SQLITE_OMIT_FLOATING_POINT + /* When floating-point is omitted, double and int64 are the same thing */ + return r; +#else + /* + ** Many compilers we encounter do not define constants for the + ** minimum and maximum 64-bit integers, or they define them + ** inconsistently. And many do not understand the "LL" notation. + ** So we define our own static constants here using nothing + ** larger than a 32-bit integer constant. + */ + static const i64 maxInt = LARGEST_INT64; + static const i64 minInt = SMALLEST_INT64; + + if( r<=(double)minInt ){ + return minInt; + }else if( r>=(double)maxInt ){ + return maxInt; + }else{ + return (i64)r; + } +#endif +} + +/* +** Return some kind of integer value which is the best we can do +** at representing the value that *pMem describes as an integer. +** If pMem is an integer, then the value is exact. If pMem is +** a floating-point then the value returned is the integer part. +** If pMem is a string or blob, then we make an attempt to convert +** it into an integer and return that. If pMem represents an +** an SQL-NULL value, return 0. +** +** If pMem represents a string value, its encoding might be changed. +*/ +SQLITE_PRIVATE i64 sqlite3VdbeIntValue(Mem *pMem){ + int flags; + assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + flags = pMem->flags; + if( flags & MEM_Int ){ + return pMem->u.i; + }else if( flags & MEM_Real ){ + return doubleToInt64(pMem->u.r); + }else if( flags & (MEM_Str|MEM_Blob) ){ + i64 value = 0; + assert( pMem->z || pMem->n==0 ); + sqlite3Atoi64(pMem->z, &value, pMem->n, pMem->enc); + return value; + }else{ + return 0; + } +} + +/* +** Return the best representation of pMem that we can get into a +** double. If pMem is already a double or an integer, return its +** value. If it is a string or blob, try to convert it to a double. +** If it is a NULL, return 0.0. +*/ +SQLITE_PRIVATE double sqlite3VdbeRealValue(Mem *pMem){ + assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); + assert( EIGHT_BYTE_ALIGNMENT(pMem) ); + if( pMem->flags & MEM_Real ){ + return pMem->u.r; + }else if( pMem->flags & MEM_Int ){ + return (double)pMem->u.i; + }else if( pMem->flags & (MEM_Str|MEM_Blob) ){ + /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */ + double val = (double)0; + sqlite3AtoF(pMem->z, &val, pMem->n, pMem->enc); + return val; + }else{ + /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */ + return (double)0; + } +} + +/* +** The pVal argument is known to be a value other than NULL. +** Convert it into a string with encoding enc and return a pointer +** to a zero-terminated version of that string. +*/ +static SQLITE_NOINLINE const void *valueToText(Mem* pVal, u8 enc){ + assert( pVal!=0 ); + assert( pVal->pBtree==0 || sqlite3_mutex_held(pVal->pBtree->mutex) ); + assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) ); + assert( (pVal->flags & MEM_RowSet)==0 ); + assert( (pVal->flags & (MEM_Null))==0 ); + if( pVal->flags & (MEM_Blob|MEM_Str) ){ + pVal->flags |= MEM_Str; + if( pVal->flags & MEM_Zero ){ + sqlite3VdbeMemExpandBlob(pVal); + } + if( pVal->enc != (enc & ~SQLITE_UTF16_ALIGNED) ){ + sqlite3VdbeChangeEncoding(pVal, enc & ~SQLITE_UTF16_ALIGNED); + } + if( (enc & SQLITE_UTF16_ALIGNED)!=0 && 1==(1&SQLITE_PTR_TO_INT(pVal->z)) ){ + assert( (pVal->flags & (MEM_Ephem|MEM_Static))!=0 ); + if( sqlite3VdbeMemMakeWriteable(pVal)!=SQLITE_OK ){ + return 0; + } + } + sqlite3VdbeMemNulTerminate(pVal); /* IMP: R-31275-44060 */ + }else{ + sqlite3VdbeMemStringify(pVal, enc, 0); + assert( 0==(1&SQLITE_PTR_TO_INT(pVal->z)) ); + } + assert(pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) || pVal->pBtree==0 + || pVal->pBtree->mallocFailed ); + if( pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) ){ + return pVal->z; + }else{ + return 0; + } +} + +/* This function is only available internally, it is not part of the +** external API. It works in a similar way to sqlite3_value_text(), +** except the data returned is in the encoding specified by the second +** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or +** SQLITE_UTF8. +** +** (2006-02-16:) The enc value can be or-ed with SQLITE_UTF16_ALIGNED. +** If that is the case, then the result must be aligned on an even byte +** boundary. +*/ +SQLITE_PRIVATE const void *sqlite3ValueText(Mem* pVal, u8 enc){ + if( !pVal ) return 0; + assert( pVal->pBtree==0 || sqlite3_mutex_held(pVal->pBtree->mutex) ); + assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) ); + assert( (pVal->flags & MEM_RowSet)==0 ); + if( (pVal->flags&MEM_Str)==MEM_Str && pVal->enc==enc ){ + return pVal->z; + } + if( pVal->flags&MEM_Null ){ + return 0; + } + return valueToText(pVal, enc); +} + +/* +** Both *pMem1 and *pMem2 contain string values. Compare the two values +** using the collation sequence pColl. As usual, return a negative , zero +** or positive value if *pMem1 is less than, equal to or greater than +** *pMem2, respectively. Similar in spirit to "rc = (*pMem1) - (*pMem2);". +*/ +static int vdbeCompareMemString( + const Mem *pMem1, + const Mem *pMem2, + const CollSeq *pColl, + u8 *prcErr /* If an OOM occurs, set to SQLITE_NOMEM */ +){ + if( pMem1->enc==pColl->enc ){ + /* The strings are already in the correct encoding. Call the + ** comparison function directly */ + return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z); + }else{ + int rc; + const void *v1, *v2; + int n1, n2; + Mem c1; + Mem c2; + sqlite3VdbeMemInit(&c1, pMem1->pBtree, MEM_Null); + sqlite3VdbeMemInit(&c2, pMem1->pBtree, MEM_Null); + sqlite3VdbeMemShallowCopy(&c1, pMem1, MEM_Ephem); + sqlite3VdbeMemShallowCopy(&c2, pMem2, MEM_Ephem); + v1 = sqlite3ValueText(&c1, pColl->enc); + n1 = v1==0 ? 0 : c1.n; + v2 = sqlite3ValueText(&c2, pColl->enc); + n2 = v2==0 ? 0 : c2.n; + rc = pColl->xCmp(pColl->pUser, n1, v1, n2, v2); + sqlite3VdbeMemRelease(&c1); + sqlite3VdbeMemRelease(&c2); + if( (v1==0 || v2==0) && prcErr ) *prcErr = SQLITE_NOMEM; + return rc; + } +} + +/* +** The first argument passed to this function is a serial-type that +** corresponds to an integer - all values between 1 and 9 inclusive +** except 7. The second points to a buffer containing an integer value +** serialized according to serial_type. This function deserializes +** and returns the value. +*/ +static i64 vdbeRecordDecodeInt(u32 serial_type, const u8 *aKey){ + u32 y; + assert( CORRUPT_DB || (serial_type>=1 && serial_type<=9 && serial_type!=7) ); + switch( serial_type ){ + case 0: + case 1: + testcase( aKey[0]&0x80 ); + return ONE_BYTE_INT(aKey); + case 2: + testcase( aKey[0]&0x80 ); + return TWO_BYTE_INT(aKey); + case 3: + testcase( aKey[0]&0x80 ); + return THREE_BYTE_INT(aKey); + case 4: { + testcase( aKey[0]&0x80 ); + y = FOUR_BYTE_UINT(aKey); + return (i64)*(int*)&y; + } + case 5: { + testcase( aKey[0]&0x80 ); + return FOUR_BYTE_UINT(aKey+2) + (((i64)1)<<32)*TWO_BYTE_INT(aKey); + } + case 6: { + u64 x = FOUR_BYTE_UINT(aKey); + testcase( aKey[0]&0x80 ); + x = (x<<32) | FOUR_BYTE_UINT(aKey+4); + return (i64)*(i64*)&x; + } + } + + return (serial_type - 8); +} + +/* +** This function compares the two table rows or index records +** specified by {nKey1, pKey1} and pPKey2. It returns a negative, zero +** or positive integer if key1 is less than, equal to or +** greater than key2. The {nKey1, pKey1} key must be a blob +** created by the OP_MakeRecord opcode of the VDBE. The pPKey2 +** key must be a parsed key such as obtained from +** sqlite3VdbeParseRecord. +** +** If argument bSkip is non-zero, it is assumed that the caller has already +** determined that the first fields of the keys are equal. +** +** Key1 and Key2 do not have to contain the same number of fields. If all +** fields that appear in both keys are equal, then pPKey2->default_rc is +** returned. +** +** If database corruption is discovered, set pPKey2->errCode to +** SQLITE_CORRUPT and return 0. If an OOM error is encountered, +** pPKey2->errCode is set to SQLITE_NOMEM and, if it is not NULL, the +** malloc-failed flag set on database handle (pPKey2->pKeyInfo->pBtree). +*/ +SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip( + int nKey1, const void *pKey1, /* Left key */ + UnpackedRecord *pPKey2, /* Right key */ + int bSkip /* If true, skip the first field */ +){ + u32 d1; /* Offset into aKey[] of next data element */ + int i; /* Index of next field to compare */ + u32 szHdr1; /* Size of record header in bytes */ + u32 idx1; /* Offset of first type in header */ + int rc = 0; /* Return value */ + Mem *pRhs = pPKey2->aMem; /* Next field of pPKey2 to compare */ + KeyInfo *pKeyInfo = pPKey2->pKeyInfo; + const unsigned char *aKey1 = (const unsigned char *)pKey1; + Mem mem1; + + /* If bSkip is true, then the caller has already determined that the first + ** two elements in the keys are equal. Fix the various stack variables so + ** that this routine begins comparing at the second field. */ + if( bSkip ){ + u32 s1; + idx1 = 1 + getVarint32(&aKey1[1], s1); + szHdr1 = aKey1[0]; + d1 = szHdr1 + sqlite3BtreeSerialTypeLen(s1); + i = 1; + pRhs++; + }else{ + idx1 = getVarint32(aKey1, szHdr1); + d1 = szHdr1; + if( d1>(unsigned)nKey1 ){ + pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ + } + i = 0; + } + + VVA_ONLY( mem1.szMalloc = 0; ) /* Only needed by assert() statements */ + assert( pPKey2->pKeyInfo->nField+pPKey2->pKeyInfo->nXField>=pPKey2->nField + || CORRUPT_DB ); + assert( pPKey2->pKeyInfo->aSortOrder!=0 ); + assert( pPKey2->pKeyInfo->nField>0 ); + assert( idx1<=szHdr1 || CORRUPT_DB ); + do{ + u32 serial_type; + + /* RHS is an integer */ + if( pRhs->flags & MEM_Int ){ + serial_type = aKey1[idx1]; + testcase( serial_type==12 ); + if( serial_type>=10 ){ + rc = +1; + }else if( serial_type==0 ){ + rc = -1; + }else if( serial_type==7 ){ + double rhs = (double)pRhs->u.i; + sqlite3BtreeSerialGet(&aKey1[d1], serial_type, &mem1); + if( mem1.u.r<rhs ){ + rc = -1; + }else if( mem1.u.r>rhs ){ + rc = +1; + } + }else{ + i64 lhs = vdbeRecordDecodeInt(serial_type, &aKey1[d1]); + i64 rhs = pRhs->u.i; + if( lhs<rhs ){ + rc = -1; + }else if( lhs>rhs ){ + rc = +1; + } + } + } + + /* RHS is real */ + else if( pRhs->flags & MEM_Real ){ + serial_type = aKey1[idx1]; + if( serial_type>=10 ){ + /* Serial types 12 or greater are strings and blobs (greater than + ** numbers). Types 10 and 11 are currently "reserved for future + ** use", so it doesn't really matter what the results of comparing + ** them to numberic values are. */ + rc = +1; + }else if( serial_type==0 ){ + rc = -1; + }else{ + double rhs = pRhs->u.r; + double lhs; + sqlite3BtreeSerialGet(&aKey1[d1], serial_type, &mem1); + if( serial_type==7 ){ + lhs = mem1.u.r; + }else{ + lhs = (double)mem1.u.i; + } + if( lhs<rhs ){ + rc = -1; + }else if( lhs>rhs ){ + rc = +1; + } + } + } + + /* RHS is a string */ + else if( pRhs->flags & MEM_Str ){ + getVarint32(&aKey1[idx1], serial_type); + testcase( serial_type==12 ); + if( serial_type<12 ){ + rc = -1; + }else if( !(serial_type & 0x01) ){ + rc = +1; + }else{ + mem1.n = (serial_type - 12) / 2; + testcase( (d1+mem1.n)==(unsigned)nKey1 ); + testcase( (d1+mem1.n+1)==(unsigned)nKey1 ); + if( (d1+mem1.n) > (unsigned)nKey1 ){ + pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ + }else if( pKeyInfo->aColl[i] ){ + mem1.enc = pKeyInfo->enc; + mem1.pBtree = pKeyInfo->pBtree; + mem1.flags = MEM_Str; + mem1.z = (char*)&aKey1[d1]; + rc = vdbeCompareMemString( + &mem1, pRhs, pKeyInfo->aColl[i], &pPKey2->errCode + ); + }else{ + int nCmp = MIN(mem1.n, pRhs->n); + rc = memcmp(&aKey1[d1], pRhs->z, nCmp); + if( rc==0 ) rc = mem1.n - pRhs->n; + } + } + } + + /* RHS is a blob */ + else if( pRhs->flags & MEM_Blob ){ + getVarint32(&aKey1[idx1], serial_type); + testcase( serial_type==12 ); + if( serial_type<12 || (serial_type & 0x01) ){ + rc = -1; + }else{ + int nStr = (serial_type - 12) / 2; + testcase( (d1+nStr)==(unsigned)nKey1 ); + testcase( (d1+nStr+1)==(unsigned)nKey1 ); + if( (d1+nStr) > (unsigned)nKey1 ){ + pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; + return 0; /* Corruption */ + }else{ + int nCmp = MIN(nStr, pRhs->n); + rc = memcmp(&aKey1[d1], pRhs->z, nCmp); + if( rc==0 ) rc = nStr - pRhs->n; + } + } + } + + /* RHS is null */ + else{ + serial_type = aKey1[idx1]; + rc = (serial_type!=0); + } + + if( rc!=0 ){ + if( pKeyInfo->aSortOrder[i] ){ + rc = -rc; + } + assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, rc) ); + assert( mem1.szMalloc==0 ); /* See comment below */ + return rc; + } + + i++; + pRhs++; + d1 += sqlite3BtreeSerialTypeLen(serial_type); + idx1 += sqlite3BtreeVarintLen(serial_type); + }while( idx1<(unsigned)szHdr1 && i<pPKey2->nField && d1<=(unsigned)nKey1 ); + + /* No memory allocation is ever used on mem1. Prove this using + ** the following assert(). If the assert() fails, it indicates a + ** memory leak and a need to call sqlite3VdbeMemRelease(&mem1). */ + assert( mem1.szMalloc==0 ); + + /* rc==0 here means that one or both of the keys ran out of fields and + ** all the fields up to that point were equal. Return the default_rc + ** value. */ + assert( CORRUPT_DB + || vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, pPKey2->default_rc) + || pKeyInfo->pBtree->mallocFailed + ); + return pPKey2->default_rc; +} +SQLITE_API int SQLITE_STDCALL sqlite3BtreeRecordCompare( + int nKey1, const void *pKey1, /* Left key */ + UnpackedRecord *pPKey2 /* Right key */ +){ + return sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 0); +} + +/* +** Return a pointer to an sqlite3BtreeRecordCompare() compatible function +** suitable for comparing serialized records to the unpacked record passed +** as the only argument. +*/ +SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord *p){ + /* varintRecordCompareInt() and varintRecordCompareString() both assume + ** that the size-of-header varint that occurs at the start of each record + ** fits in a single byte (i.e. is 127 or less). varintRecordCompareInt() + ** also assumes that it is safe to overread a buffer by at least the + ** maximum possible legal header size plus 8 bytes. Because there is + ** guaranteed to be at least 74 (but not 136) bytes of padding following each + ** buffer passed to varintRecordCompareInt() this makes it convenient to + ** limit the size of the header to 64 bytes in cases where the first field + ** is an integer. + ** + ** The easiest way to enforce this limit is to consider only records with + ** 13 fields or less. If the first field is an integer, the maximum legal + ** header size is (12*5 + 1 + 1) bytes. */ + if( (p->pKeyInfo->nField + p->pKeyInfo->nXField)<=13 ){ + int flags = p->aMem[0].flags; + if( p->pKeyInfo->aSortOrder[0] ){ + p->r1 = 1; + p->r2 = -1; + }else{ + p->r1 = -1; + p->r2 = 1; + } + if( (flags & MEM_Int) ){ + return vdbeRecordCompareInt; + } + testcase( flags & MEM_Real ); + testcase( flags & MEM_Null ); + testcase( flags & MEM_Blob ); + if( (flags & (MEM_Real|MEM_Null|MEM_Blob))==0 && p->pKeyInfo->aColl[0]==0 ){ + assert( flags & MEM_Str ); + return vdbeRecordCompareString; + } + } + + return sqlite3BtreeRecordCompare; +} + +/* +** Move data out of a btree key or data field and into a Mem structure. +** The data or key is taken from the entry that pCur is currently pointing +** to. offset and amt determine what portion of the data or key to retrieve. +** key is true to get the key or false to get data. The result is written +** into the pMem element. +** +** The pMem object must have been initialized. This routine will use +** pMem->zMalloc to hold the content from the btree, if possible. New +** pMem->zMalloc space will be allocated if necessary. The calling routine +** is responsible for making sure that the pMem object is eventually +** destroyed. +** +** If this routine fails for any reason (malloc returns NULL or unable +** to read from the disk) then the pMem is left in an inconsistent state. +*/ +static SQLITE_NOINLINE int vdbeMemFromBtreeResize( + BtCursor *pCur, /* Cursor pointing at record to retrieve. */ + u32 offset, /* Offset from the start of data to return bytes from. */ + u32 amt, /* Number of bytes to return. */ + int key, /* If true, retrieve from the btree key, not data. */ + Mem *pMem /* OUT: Return data in this Mem structure. */ +){ + int rc; + pMem->flags = MEM_Null; + if( SQLITE_OK==(rc = sqlite3VdbeMemClearAndResize(pMem, amt+2)) ){ + if( key ){ + rc = sqlite3BtreeKey(pCur, offset, amt, pMem->z); + }else{ + rc = sqlite3BtreeData(pCur, offset, amt, pMem->z); + } + if( rc==SQLITE_OK ){ + pMem->z[amt] = 0; + pMem->z[amt+1] = 0; + pMem->flags = MEM_Blob|MEM_Term; + pMem->n = (int)amt; + }else{ + sqlite3VdbeMemRelease(pMem); + } + } + return rc; +} + +SQLITE_PRIVATE int sqlite3VdbeMemFromBtree( + BtCursor *pCur, /* Cursor pointing at record to retrieve. */ + u32 offset, /* Offset from the start of data to return bytes from. */ + u32 amt, /* Number of bytes to return. */ + int key, /* If true, retrieve from the btree key, not data. */ + Mem *pMem /* OUT: Return data in this Mem structure. */ +){ + char *zData; /* Data from the btree layer */ + u32 available = 0; /* Number of bytes available on the local btree page */ + int rc = SQLITE_OK; /* Return code */ + + assert( sqlite3BtreeCursorIsValid(pCur) ); + assert( !VdbeMemDynamic(pMem) ); + + /* Note: the calls to BtreeKeyFetch() and DataFetch() below assert() + ** that both the BtShared and database handle mutexes are held. */ + assert( (pMem->flags & MEM_RowSet)==0 ); + if( key ){ + zData = (char *)sqlite3BtreeKeyFetch(pCur, &available); + }else{ + zData = (char *)sqlite3BtreeDataFetch(pCur, &available); + } + assert( zData!=0 ); + + if( offset+amt<=available ){ + pMem->z = &zData[offset]; + pMem->flags = MEM_Blob|MEM_Ephem; + pMem->n = (int)amt; + }else{ + rc = vdbeMemFromBtreeResize(pCur, offset, amt, key, pMem); + } + + return rc; +} + +/* +** pCur points at an index entry created using the OP_MakeRecord opcode. +** Read the rowid (the last field in the record) and store it in *rowid. +** Return SQLITE_OK if everything works, or an error code otherwise. +** +** pCur might be pointing to text obtained from a corrupt database file. +** So the content cannot be trusted. Do appropriate checks on the content. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeIdxRowid(Btree* pBtree, BtCursor *pCur, i64 *rowid){ + i64 nCellKey = 0; + int rc; + u32 szHdr; /* Size of the header */ + u32 typeRowid; /* Serial type of the rowid */ + u32 lenRowid; /* Size of the rowid */ + Mem m, v; + + /* Get the size of the index entry. Only indices entries of less + ** than 2GiB are support - anything large must be database corruption. + ** Any corruption is detected in sqlite3BtreeParseCellPtr(), though, so + ** this code can safely assume that nCellKey is 32-bits + */ + assert( sqlite3BtreeCursorIsValid(pCur) ); + VVA_ONLY(rc =) sqlite3BtreeKeySize(pCur, &nCellKey); + assert( rc==SQLITE_OK ); /* pCur is always valid so KeySize cannot fail */ + assert( (nCellKey & SQLITE_MAX_U32)==(u64)nCellKey ); + + /* Read in the complete content of the index entry */ + sqlite3VdbeMemInit(&m, pBtree, 0); + rc = sqlite3VdbeMemFromBtree(pCur, 0, (u32)nCellKey, 1, &m); + if( rc ){ + return rc; + } + + /* The index entry must begin with a header size */ + (void)getVarint32((u8*)m.z, szHdr); + testcase( szHdr==3 ); + testcase( szHdr==m.n ); + if( unlikely(szHdr<3 || (int)szHdr>m.n) ){ + goto idx_rowid_corruption; + } + + /* The last field of the index should be an integer - the ROWID. + ** Verify that the last entry really is an integer. */ + (void)getVarint32((u8*)&m.z[szHdr-1], typeRowid); + testcase( typeRowid==1 ); + testcase( typeRowid==2 ); + testcase( typeRowid==3 ); + testcase( typeRowid==4 ); + testcase( typeRowid==5 ); + testcase( typeRowid==6 ); + testcase( typeRowid==8 ); + testcase( typeRowid==9 ); + if( unlikely(typeRowid<1 || typeRowid>9 || typeRowid==7) ){ + goto idx_rowid_corruption; + } + lenRowid = sqlite3SmallTypeSizes[typeRowid]; + testcase( (u32)m.n==szHdr+lenRowid ); + if( unlikely((u32)m.n<szHdr+lenRowid) ){ + goto idx_rowid_corruption; + } + + /* Fetch the integer off the end of the index record */ + sqlite3BtreeSerialGet((u8*)&m.z[m.n-lenRowid], typeRowid, &v); + *rowid = v.u.i; + sqlite3VdbeMemRelease(&m); + return SQLITE_OK; + + /* Jump here if database corruption is detected after m has been + ** allocated. Free the m object and return SQLITE_CORRUPT. */ +idx_rowid_corruption: + testcase( m.szMalloc!=0 ); + sqlite3VdbeMemRelease(&m); + return SQLITE_CORRUPT_BKPT; +} + +/************** End of btree.c ***********************************************/ +/************** Begin file backup.c ******************************************/ +/* +** 2009 January 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the implementation of the sqlite3_backup_XXX() +** API functions and the related features. +*/ +/* #include "sqliteInt.h" */ +/* #include "btreeInt.h" */ + +/* +** Structure allocated for each backup operation. +*/ +struct sqlite3_backup { + Btree *pDest; /* Destination b-tree file */ + u32 iDestSchema; /* Original schema cookie in destination */ + int bDestLocked; /* True once a write-transaction is open on pDest */ + + Pgno iNext; /* Page number of the next source page to copy */ + Btree *pSrc; /* Source b-tree file */ + + int rc; /* Backup process error code */ + + /* These two variables are set by every call to backup_step(). They are + ** read by calls to backup_remaining() and backup_pagecount(). + */ + Pgno nRemaining; /* Number of pages left to copy */ + Pgno nPagecount; /* Total number of pages to copy */ + + int isAttached; /* True once backup has been registered with pager */ + sqlite3_backup *pNext; /* Next backup associated with source pager */ +}; + +/* +** Argument rc is an SQLite error code. Return true if this error is +** considered fatal if encountered during a backup operation. All errors +** are considered fatal except for SQLITE_BUSY and SQLITE_LOCKED. +*/ +static int isFatalError(int rc){ + return (rc!=SQLITE_OK && rc!=SQLITE_BUSY && ALWAYS(rc!=SQLITE_LOCKED)); +} + +/* +** Parameter zSrcData points to a buffer containing the data for +** page iSrcPg from the source database. Copy this data into the +** destination database. +*/ +static int backupOnePage( + sqlite3_backup *p, /* Backup handle */ + Pgno iSrcPg, /* Source database page to backup */ + const u8 *zSrcData, /* Source database page data */ + int bUpdate /* True for an update, false otherwise */ +){ + Pager * const pDestPager = sqlite3BtreePager(p->pDest); + const int nSrcPgsz = sqlite3BtreeGetPageSize(p->pSrc); + int nDestPgsz = sqlite3BtreeGetPageSize(p->pDest); + const int nCopy = MIN(nSrcPgsz, nDestPgsz); + const i64 iEnd = (i64)iSrcPg*(i64)nSrcPgsz; +#ifdef SQLITE_HAS_CODEC + /* Use BtreeGetReserveNoMutex() for the source b-tree, as although it is + ** guaranteed that the shared-mutex is held by this thread, handle + ** p->pSrc may not actually be the owner. */ + int nSrcReserve = sqlite3BtreeGetReserveNoMutex(p->pSrc); + int nDestReserve = sqlite3BtreeGetOptimalReserve(p->pDest); +#endif + int rc = SQLITE_OK; + i64 iOff; + + assert( sqlite3BtreeGetReserveNoMutex(p->pSrc)>=0 ); + assert( p->bDestLocked ); + assert( !isFatalError(p->rc) ); + assert( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) ); + assert( zSrcData ); + +#ifdef SQLITE_HAS_CODEC + /* Backup is not possible if the page size of the destination is changing + ** and a codec is in use. + */ + if( nSrcPgsz!=nDestPgsz && sqlite3PagerGetCodec(pDestPager)!=0 ){ + rc = SQLITE_READONLY; + } + + /* Backup is not possible if the number of bytes of reserve space differ + ** between source and destination. If there is a difference, try to + ** fix the destination to agree with the source. If that is not possible, + ** then the backup cannot proceed. + */ + if( nSrcReserve!=nDestReserve ){ + u32 newPgsz = nSrcPgsz; + rc = sqlite3PagerSetPagesize(pDestPager, &newPgsz, nSrcReserve); + if( rc==SQLITE_OK && newPgsz!=nSrcPgsz ) rc = SQLITE_READONLY; + } +#endif + + /* This loop runs once for each destination page spanned by the source + ** page. For each iteration, variable iOff is set to the byte offset + ** of the destination page. + */ + for(iOff=iEnd-(i64)nSrcPgsz; rc==SQLITE_OK && iOff<iEnd; iOff+=nDestPgsz){ + DbPage *pDestPg = 0; + Pgno iDest = (Pgno)(iOff/nDestPgsz)+1; + if( iDest==PENDING_BYTE_PAGE(p->pDest->pBt) ) continue; + if( SQLITE_OK==(rc = sqlite3PagerGet(pDestPager, iDest, &pDestPg)) + && SQLITE_OK==(rc = sqlite3PagerWrite(pDestPg)) + ){ + const u8 *zIn = &zSrcData[iOff%nSrcPgsz]; + u8 *zDestData = sqlite3PagerGetData(pDestPg); + u8 *zOut = &zDestData[iOff%nDestPgsz]; + + /* Copy the data from the source page into the destination page. + ** Then clear the Btree layer MemPage.isInit flag. Both this module + ** and the pager code use this trick (clearing the first byte + ** of the page 'extra' space to invalidate the Btree layers + ** cached parse of the page). MemPage.isInit is marked + ** "MUST BE FIRST" for this purpose. + */ + memcpy(zOut, zIn, nCopy); + ((u8 *)sqlite3PagerGetExtra(pDestPg))[0] = 0; + if( iOff==0 && bUpdate==0 ){ + sqlite3Put4byte(&zOut[28], sqlite3BtreeLastPage(p->pSrc)); + } + } + sqlite3PagerUnref(pDestPg); + } + + return rc; +} + +/* +** This function is called after the contents of page iPage of the +** source database have been modified. If page iPage has already been +** copied into the destination database, then the data written to the +** destination is now invalidated. The destination copy of iPage needs +** to be updated with the new data before the backup operation is +** complete. +** +** It is assumed that the mutex associated with the BtShared object +** corresponding to the source database is held when this function is +** called. +*/ +static SQLITE_NOINLINE void backupUpdate( + sqlite3_backup *p, + Pgno iPage, + const u8 *aData +){ + assert( p!=0 ); + do{ + assert( sqlite3_mutex_held(p->pSrc->pBt->mutex) ); + if( !isFatalError(p->rc) && iPage<p->iNext ){ + /* The backup process p has already copied page iPage. But now it + ** has been modified by a transaction on the source pager. Copy + ** the new data into the backup. + */ + int rc; + assert( p->pDest ); + sqlite3_mutex_enter(p->pDest->mutex); + rc = backupOnePage(p, iPage, aData, 1); + sqlite3_mutex_leave(p->pDest->mutex); + assert( rc!=SQLITE_BUSY && rc!=SQLITE_LOCKED ); + if( rc!=SQLITE_OK ){ + p->rc = rc; + } + } + }while( (p = p->pNext)!=0 ); +} +SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, const u8 *aData){ + if( pBackup ) backupUpdate(pBackup, iPage, aData); +} + +/* +** Restart the backup process. This is called when the pager layer +** detects that the database has been modified by an external database +** connection. In this case there is no way of knowing which of the +** pages that have been copied into the destination database are still +** valid and which are not, so the entire process needs to be restarted. +** +** It is assumed that the mutex associated with the BtShared object +** corresponding to the source database is held when this function is +** called. +*/ +SQLITE_PRIVATE void sqlite3BackupRestart(sqlite3_backup *pBackup){ + sqlite3_backup *p; /* Iterator variable */ + for(p=pBackup; p; p=p->pNext){ + assert( sqlite3_mutex_held(p->pSrc->pBt->mutex) ); + p->iNext = 1; + } +} + +/************** End of backup.c **********************************************/ +/************** Begin file journal.c *****************************************/ +/* +** 2007 August 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file implements a special kind of sqlite3_file object used +** by SQLite to create journal files if the atomic-write optimization +** is enabled. +** +** The distinctive characteristic of this sqlite3_file is that the +** actual on disk file is created lazily. When the file is created, +** the caller specifies a buffer size for an in-memory buffer to +** be used to service read() and write() requests. The actual file +** on disk is not created or populated until either: +** +** 1) The in-memory representation grows too large for the allocated +** buffer, or +** 2) The sqlite3JournalCreate() function is called. +*/ +#ifdef SQLITE_ENABLE_ATOMIC_WRITE +/* #include "sqliteInt.h" */ + + +/* +** A JournalFile object is a subclass of sqlite3_file used by +** as an open file handle for journal files. +*/ +struct JournalFile { + sqlite3_io_methods *pMethod; /* I/O methods on journal files */ + int nBuf; /* Size of zBuf[] in bytes */ + char *zBuf; /* Space to buffer journal writes */ + int iSize; /* Amount of zBuf[] currently used */ + int flags; /* xOpen flags */ + sqlite3_vfs *pVfs; /* The "real" underlying VFS */ + sqlite3_file *pReal; /* The "real" underlying file descriptor */ + const char *zJournal; /* Name of the journal file */ +}; +typedef struct JournalFile JournalFile; + +/* +** If it does not already exists, create and populate the on-disk file +** for JournalFile p. +*/ +static int createFile(JournalFile *p){ + int rc = SQLITE_OK; + if( !p->pReal ){ + sqlite3_file *pReal = (sqlite3_file *)&p[1]; + rc = sqlite3OsOpen(p->pVfs, p->zJournal, pReal, p->flags, 0); + if( rc==SQLITE_OK ){ + p->pReal = pReal; + if( p->iSize>0 ){ + assert(p->iSize<=p->nBuf); + rc = sqlite3OsWrite(p->pReal, p->zBuf, p->iSize, 0); + } + if( rc!=SQLITE_OK ){ + /* If an error occurred while writing to the file, close it before + ** returning. This way, SQLite uses the in-memory journal data to + ** roll back changes made to the internal page-cache before this + ** function was called. */ + sqlite3OsClose(pReal); + p->pReal = 0; + } + } + } + return rc; +} + +/* +** Close the file. +*/ +static int jrnlClose(sqlite3_file *pJfd){ + JournalFile *p = (JournalFile *)pJfd; + if( p->pReal ){ + sqlite3OsClose(p->pReal); + } + sqlite3_free(p->zBuf); + return SQLITE_OK; +} + +/* +** Read data from the file. +*/ +static int jrnlRead( + sqlite3_file *pJfd, /* The journal file from which to read */ + void *zBuf, /* Put the results here */ + int iAmt, /* Number of bytes to read */ + sqlite_int64 iOfst /* Begin reading at this offset */ +){ + int rc = SQLITE_OK; + JournalFile *p = (JournalFile *)pJfd; + if( p->pReal ){ + rc = sqlite3OsRead(p->pReal, zBuf, iAmt, iOfst); + }else if( (iAmt+iOfst)>p->iSize ){ + rc = SQLITE_IOERR_SHORT_READ; + }else{ + memcpy(zBuf, &p->zBuf[iOfst], iAmt); + } + return rc; +} + +/* +** Write data to the file. +*/ +static int jrnlWrite( + sqlite3_file *pJfd, /* The journal file into which to write */ + const void *zBuf, /* Take data to be written from here */ + int iAmt, /* Number of bytes to write */ + sqlite_int64 iOfst /* Begin writing at this offset into the file */ +){ + int rc = SQLITE_OK; + JournalFile *p = (JournalFile *)pJfd; + if( !p->pReal && (iOfst+iAmt)>p->nBuf ){ + rc = createFile(p); + } + if( rc==SQLITE_OK ){ + if( p->pReal ){ + rc = sqlite3OsWrite(p->pReal, zBuf, iAmt, iOfst); + }else{ + memcpy(&p->zBuf[iOfst], zBuf, iAmt); + if( p->iSize<(iOfst+iAmt) ){ + p->iSize = (iOfst+iAmt); + } + } + } + return rc; +} + +/* +** Truncate the file. +*/ +static int jrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){ + int rc = SQLITE_OK; + JournalFile *p = (JournalFile *)pJfd; + if( p->pReal ){ + rc = sqlite3OsTruncate(p->pReal, size); + }else if( size<p->iSize ){ + p->iSize = size; + } + return rc; +} + +/* +** Sync the file. +*/ +static int jrnlSync(sqlite3_file *pJfd, int flags){ + int rc; + JournalFile *p = (JournalFile *)pJfd; + if( p->pReal ){ + rc = sqlite3OsSync(p->pReal, flags); + }else{ + rc = SQLITE_OK; + } + return rc; +} + +/* +** Query the size of the file in bytes. +*/ +static int jrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){ + int rc = SQLITE_OK; + JournalFile *p = (JournalFile *)pJfd; + if( p->pReal ){ + rc = sqlite3OsFileSize(p->pReal, pSize); + }else{ + *pSize = (sqlite_int64) p->iSize; + } + return rc; +} + +/* +** Table of methods for JournalFile sqlite3_file object. +*/ +static struct sqlite3_io_methods JournalFileMethods = { + 1, /* iVersion */ + jrnlClose, /* xClose */ + jrnlRead, /* xRead */ + jrnlWrite, /* xWrite */ + jrnlTruncate, /* xTruncate */ + jrnlSync, /* xSync */ + jrnlFileSize, /* xFileSize */ + 0, /* xLock */ + 0, /* xUnlock */ + 0, /* xCheckReservedLock */ + 0, /* xFileControl */ + 0, /* xSectorSize */ + 0, /* xDeviceCharacteristics */ + 0, /* xShmMap */ + 0, /* xShmLock */ + 0, /* xShmBarrier */ + 0 /* xShmUnmap */ +}; + +/* +** Open a journal file. +*/ +SQLITE_PRIVATE int sqlite3JournalOpen( + sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */ + const char *zName, /* Name of the journal file */ + sqlite3_file *pJfd, /* Preallocated, blank file handle */ + int flags, /* Opening flags */ + int nBuf /* Bytes buffered before opening the file */ +){ + JournalFile *p = (JournalFile *)pJfd; + memset(p, 0, sqlite3JournalSize(pVfs)); + if( nBuf>0 ){ + p->zBuf = sqlite3MallocZero(nBuf); + if( !p->zBuf ){ + return SQLITE_NOMEM; + } + }else{ + return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0); + } + p->pMethod = &JournalFileMethods; + p->nBuf = nBuf; + p->flags = flags; + p->zJournal = zName; + p->pVfs = pVfs; + return SQLITE_OK; +} + +/* +** If the argument p points to a JournalFile structure, and the underlying +** file has not yet been created, create it now. +*/ +SQLITE_PRIVATE int sqlite3JournalCreate(sqlite3_file *p){ + if( p->pMethods!=&JournalFileMethods ){ + return SQLITE_OK; + } + return createFile((JournalFile *)p); +} + +/* +** The file-handle passed as the only argument is guaranteed to be an open +** file. It may or may not be of class JournalFile. If the file is a +** JournalFile, and the underlying file on disk has not yet been opened, +** return 0. Otherwise, return 1. +*/ +SQLITE_PRIVATE int sqlite3JournalExists(sqlite3_file *p){ + return (p->pMethods!=&JournalFileMethods || ((JournalFile *)p)->pReal!=0); +} + +/* +** Return the number of bytes required to store a JournalFile that uses vfs +** pVfs to create the underlying on-disk files. +*/ +SQLITE_PRIVATE int sqlite3JournalSize(sqlite3_vfs *pVfs){ + return (pVfs->szOsFile+sizeof(JournalFile)); +} +#endif + +/************** End of journal.c *********************************************/ +/************** Begin file memjournal.c **************************************/ +/* +** 2008 October 7 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code use to implement an in-memory rollback journal. +** The in-memory rollback journal is used to journal transactions for +** ":memory:" databases and when the journal_mode=MEMORY pragma is used. +*/ +/* #include "sqliteInt.h" */ + +/* Forward references to internal structures */ +typedef struct MemJournal MemJournal; +typedef struct FilePoint FilePoint; +typedef struct FileChunk FileChunk; + +/* Space to hold the rollback journal is allocated in increments of +** this many bytes. +** +** The size chosen is a little less than a power of two. That way, +** the FileChunk object will have a size that almost exactly fills +** a power-of-two allocation. This minimizes wasted space in power-of-two +** memory allocators. +*/ +#define JOURNAL_CHUNKSIZE ((int)(1024-sizeof(FileChunk*))) + +/* +** The rollback journal is composed of a linked list of these structures. +*/ +struct FileChunk { + FileChunk *pNext; /* Next chunk in the journal */ + u8 zChunk[JOURNAL_CHUNKSIZE]; /* Content of this chunk */ +}; + +/* +** An instance of this object serves as a cursor into the rollback journal. +** The cursor can be either for reading or writing. +*/ +struct FilePoint { + sqlite3_int64 iOffset; /* Offset from the beginning of the file */ + FileChunk *pChunk; /* Specific chunk into which cursor points */ +}; + +/* +** This subclass is a subclass of sqlite3_file. Each open memory-journal +** is an instance of this class. +*/ +struct MemJournal { + sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */ + FileChunk *pFirst; /* Head of in-memory chunk-list */ + FilePoint endpoint; /* Pointer to the end of the file */ + FilePoint readpoint; /* Pointer to the end of the last xRead() */ +}; + +/* +** Read data from the in-memory journal file. This is the implementation +** of the sqlite3_vfs.xRead method. +*/ +static int memjrnlRead( + sqlite3_file *pJfd, /* The journal file from which to read */ + void *zBuf, /* Put the results here */ + int iAmt, /* Number of bytes to read */ + sqlite_int64 iOfst /* Begin reading at this offset */ +){ + MemJournal *p = (MemJournal *)pJfd; + u8 *zOut = zBuf; + int nRead = iAmt; + int iChunkOffset; + FileChunk *pChunk; + + /* SQLite never tries to read past the end of a rollback journal file */ + assert( iOfst+iAmt<=p->endpoint.iOffset ); + + if( p->readpoint.iOffset!=iOfst || iOfst==0 ){ + sqlite3_int64 iOff = 0; + for(pChunk=p->pFirst; + ALWAYS(pChunk) && (iOff+JOURNAL_CHUNKSIZE)<=iOfst; + pChunk=pChunk->pNext + ){ + iOff += JOURNAL_CHUNKSIZE; + } + }else{ + pChunk = p->readpoint.pChunk; + } + + iChunkOffset = (int)(iOfst%JOURNAL_CHUNKSIZE); + do { + int iSpace = JOURNAL_CHUNKSIZE - iChunkOffset; + int nCopy = MIN(nRead, (JOURNAL_CHUNKSIZE - iChunkOffset)); + memcpy(zOut, &pChunk->zChunk[iChunkOffset], nCopy); + zOut += nCopy; + nRead -= iSpace; + iChunkOffset = 0; + } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 ); + p->readpoint.iOffset = iOfst+iAmt; + p->readpoint.pChunk = pChunk; + + return SQLITE_OK; +} + +/* +** Write data to the file. +*/ +static int memjrnlWrite( + sqlite3_file *pJfd, /* The journal file into which to write */ + const void *zBuf, /* Take data to be written from here */ + int iAmt, /* Number of bytes to write */ + sqlite_int64 iOfst /* Begin writing at this offset into the file */ +){ + MemJournal *p = (MemJournal *)pJfd; + int nWrite = iAmt; + u8 *zWrite = (u8 *)zBuf; + + /* An in-memory journal file should only ever be appended to. Random + ** access writes are not required by sqlite. + */ + assert( iOfst==p->endpoint.iOffset ); + UNUSED_PARAMETER(iOfst); + + while( nWrite>0 ){ + FileChunk *pChunk = p->endpoint.pChunk; + int iChunkOffset = (int)(p->endpoint.iOffset%JOURNAL_CHUNKSIZE); + int iSpace = MIN(nWrite, JOURNAL_CHUNKSIZE - iChunkOffset); + + if( iChunkOffset==0 ){ + /* New chunk is required to extend the file. */ + FileChunk *pNew = sqlite3_malloc(sizeof(FileChunk)); + if( !pNew ){ + return SQLITE_IOERR_NOMEM; + } + pNew->pNext = 0; + if( pChunk ){ + assert( p->pFirst ); + pChunk->pNext = pNew; + }else{ + assert( !p->pFirst ); + p->pFirst = pNew; + } + p->endpoint.pChunk = pNew; + } + + memcpy(&p->endpoint.pChunk->zChunk[iChunkOffset], zWrite, iSpace); + zWrite += iSpace; + nWrite -= iSpace; + p->endpoint.iOffset += iSpace; + } + + return SQLITE_OK; +} + +/* +** Truncate the file. +*/ +static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){ + MemJournal *p = (MemJournal *)pJfd; + FileChunk *pChunk; + assert(size==0); + UNUSED_PARAMETER(size); + pChunk = p->pFirst; + while( pChunk ){ + FileChunk *pTmp = pChunk; + pChunk = pChunk->pNext; + sqlite3_free(pTmp); + } + sqlite3MemJournalOpen(pJfd); + return SQLITE_OK; +} + +/* +** Close the file. +*/ +static int memjrnlClose(sqlite3_file *pJfd){ + memjrnlTruncate(pJfd, 0); + return SQLITE_OK; +} + + +/* +** Sync the file. +** +** Syncing an in-memory journal is a no-op. And, in fact, this routine +** is never called in a working implementation. This implementation +** exists purely as a contingency, in case some malfunction in some other +** part of SQLite causes Sync to be called by mistake. +*/ +static int memjrnlSync(sqlite3_file *NotUsed, int NotUsed2){ + UNUSED_PARAMETER2(NotUsed, NotUsed2); + return SQLITE_OK; +} + +/* +** Query the size of the file in bytes. +*/ +static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){ + MemJournal *p = (MemJournal *)pJfd; + *pSize = (sqlite_int64) p->endpoint.iOffset; + return SQLITE_OK; +} + +/* +** Table of methods for MemJournal sqlite3_file object. +*/ +static const struct sqlite3_io_methods MemJournalMethods = { + 1, /* iVersion */ + memjrnlClose, /* xClose */ + memjrnlRead, /* xRead */ + memjrnlWrite, /* xWrite */ + memjrnlTruncate, /* xTruncate */ + memjrnlSync, /* xSync */ + memjrnlFileSize, /* xFileSize */ + 0, /* xLock */ + 0, /* xUnlock */ + 0, /* xCheckReservedLock */ + 0, /* xFileControl */ + 0, /* xSectorSize */ + 0, /* xDeviceCharacteristics */ + 0, /* xShmMap */ + 0, /* xShmLock */ + 0, /* xShmBarrier */ + 0, /* xShmUnmap */ + 0, /* xFetch */ + 0 /* xUnfetch */ +}; + +/* +** Open a journal file. +*/ +SQLITE_PRIVATE void sqlite3MemJournalOpen(sqlite3_file *pJfd){ + MemJournal *p = (MemJournal *)pJfd; + assert( EIGHT_BYTE_ALIGNMENT(p) ); + memset(p, 0, sqlite3MemJournalSize()); + p->pMethod = (sqlite3_io_methods*)&MemJournalMethods; +} + +/* +** Return true if the file-handle passed as an argument is +** an in-memory journal +*/ +SQLITE_PRIVATE int sqlite3IsMemJournal(sqlite3_file *pJfd){ + return pJfd->pMethods==&MemJournalMethods; +} + +/* +** Return the number of bytes required to store a MemJournal file descriptor. +*/ +SQLITE_PRIVATE int sqlite3MemJournalSize(void){ + return sizeof(MemJournal); +} + +/************** End of memjournal.c ******************************************/ +/************** Begin file main.c ********************************************/ +/* #include "sqliteInt.h" */ + +/* IMPLEMENTATION-OF: R-63124-39300 The sqlite3_sourceid() function returns a +** pointer to a string constant whose value is the same as the +** SQLITE_SOURCE_ID C preprocessor macro. +*/ +SQLITE_PRIVATE const char *sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; } + +/* +** If the following global variable points to a string which is the +** name of a directory, then that directory will be used to store +** temporary files. +** +** See also the "PRAGMA temp_store_directory" SQL command. +*/ +SQLITE_PRIVATE char *sqlite3_temp_directory = 0; + +/* +** Initialize SQLite. +** +** This routine must be called to initialize the memory allocation, +** VFS, and mutex subsystems prior to doing any serious work with +** SQLite. But as long as you do not compile with SQLITE_OMIT_AUTOINIT +** this routine will be called automatically by key routines such as +** sqlite3_open(). +** +** This routine is a no-op except on its very first call for the process, +** or for the first call after a call to sqlite3BtreeShutdown. +** +** The first thread to call this routine runs the initialization to +** completion. If subsequent threads call this routine before the first +** thread has finished the initialization process, then the subsequent +** threads must block until the first thread finishes with the initialization. +** +** The first thread might call this routine recursively. Recursive +** calls to this routine should not block, of course. Otherwise the +** initialization process would never complete. +** +** Let X be the first thread to enter this routine. Let Y be some other +** thread. Then while the initial invocation of this routine by X is +** incomplete, it is required that: +** +** * Calls to this routine from Y must block until the outer-most +** call by X completes. +** +** * Recursive calls to this routine from thread X return immediately +** without blocking. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeInitialize(void){ + MUTEX_LOGIC( sqlite3_mutex *pMaster; ) /* The main static mutex */ + int rc; /* Result code */ +#ifdef SQLITE_EXTRA_INIT + int bRunExtraInit = 0; /* Extra initialization needed */ +#endif + +#ifdef SQLITE_OMIT_WSD + rc = sqlite3_wsd_init(4096, 24); + if( rc!=SQLITE_OK ){ + return rc; + } +#endif + + /* If the following assert() fails on some obscure processor/compiler + ** combination, the work-around is to set the correct pointer + ** size at compile-time using -DSQLITE_PTRSIZE=n compile-time option */ + assert( SQLITE_PTRSIZE==sizeof(char*) ); + + /* If SQLite is already completely initialized, then this call + ** to sqlite3BtreeInitialize() should be a no-op. But the initialization + ** must be complete. So isInit must not be set until the very end + ** of this routine. + */ + if( sqlite3GlobalConfig.isInit ) return SQLITE_OK; + + /* Make sure the mutex subsystem is initialized. If unable to + ** initialize the mutex subsystem, return early with the error. + ** If the system is so sick that we are unable to allocate a mutex, + ** there is not much SQLite is going to be able to do. + ** + ** The mutex subsystem must take care of serializing its own + ** initialization. + */ + rc = sqlite3MutexInit(); + if( rc ) return rc; + + /* Initialize the malloc() system and the recursive pInitMutex mutex. + ** This operation is protected by the STATIC_MASTER mutex. Note that + ** MutexAlloc() is called for a static mutex prior to initializing the + ** malloc subsystem - this implies that the allocation of a static + ** mutex must not require support from the malloc subsystem. + */ + MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); ) + sqlite3_mutex_enter(pMaster); + sqlite3GlobalConfig.isMutexInit = 1; + if( !sqlite3GlobalConfig.isMallocInit ){ + rc = sqlite3MallocInit(); + } + if( rc==SQLITE_OK ){ + sqlite3GlobalConfig.isMallocInit = 1; + if( !sqlite3GlobalConfig.pInitMutex ){ + sqlite3GlobalConfig.pInitMutex = + sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE); + if( sqlite3GlobalConfig.bCoreMutex && !sqlite3GlobalConfig.pInitMutex ){ + rc = SQLITE_NOMEM; + } + } + } + if( rc==SQLITE_OK ){ + sqlite3GlobalConfig.nRefInitMutex++; + } + sqlite3_mutex_leave(pMaster); + + /* If rc is not SQLITE_OK at this point, then either the malloc + ** subsystem could not be initialized or the system failed to allocate + ** the pInitMutex mutex. Return an error in either case. */ + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Do the rest of the initialization under the recursive mutex so + ** that we will be able to handle recursive calls into + ** sqlite3BtreeInitialize(). The recursive calls normally come through + ** sqlite3_os_init() when it invokes sqlite3_vfs_register(), but other + ** recursive calls might also be possible. + ** + ** IMPLEMENTATION-OF: R-00140-37445 SQLite automatically serializes calls + ** to the xInit method, so the xInit method need not be threadsafe. + ** + ** The following mutex is what serializes access to the appdef pcache xInit + ** methods. The sqlite3_pcache_methods.xInit() all is embedded in the + ** call to sqlite3PcacheInitialize(). + */ + sqlite3_mutex_enter(sqlite3GlobalConfig.pInitMutex); + if( sqlite3GlobalConfig.isInit==0 && sqlite3GlobalConfig.inProgress==0 ){ + sqlite3GlobalConfig.inProgress = 1; + if( sqlite3GlobalConfig.isPCacheInit==0 ){ + rc = sqlite3PcacheInitialize(); + } + if( rc==SQLITE_OK ){ + sqlite3GlobalConfig.isPCacheInit = 1; + rc = sqlite3OsInit(); + } + if( rc==SQLITE_OK ){ + sqlite3PCacheBufferSetup( sqlite3GlobalConfig.pPage, + sqlite3GlobalConfig.szPage, sqlite3GlobalConfig.nPage); + sqlite3GlobalConfig.isInit = 1; +#ifdef SQLITE_EXTRA_INIT + bRunExtraInit = 1; +#endif + } + sqlite3GlobalConfig.inProgress = 0; + } + sqlite3_mutex_leave(sqlite3GlobalConfig.pInitMutex); + + /* Go back under the static mutex and clean up the recursive + ** mutex to prevent a resource leak. + */ + sqlite3_mutex_enter(pMaster); + sqlite3GlobalConfig.nRefInitMutex--; + if( sqlite3GlobalConfig.nRefInitMutex<=0 ){ + assert( sqlite3GlobalConfig.nRefInitMutex==0 ); + sqlite3_mutex_free(sqlite3GlobalConfig.pInitMutex); + sqlite3GlobalConfig.pInitMutex = 0; + } + sqlite3_mutex_leave(pMaster); + + /* The following is just a sanity check to make sure SQLite has + ** been compiled correctly. It is important to run this code, but + ** we don't want to run it too often and soak up CPU cycles for no + ** reason. So we run it once during initialization. + */ +#ifndef NDEBUG +#ifndef SQLITE_OMIT_FLOATING_POINT + /* This section of code's only "output" is via assert() statements. */ + if ( rc==SQLITE_OK ){ + u64 x = (((u64)1)<<63)-1; + double y; + assert(sizeof(x)==8); + assert(sizeof(x)==sizeof(y)); + memcpy(&y, &x, 8); + assert( sqlite3IsNaN(y) ); + } +#endif +#endif + + /* Do extra initialization steps requested by the SQLITE_EXTRA_INIT + ** compile-time option. + */ +#ifdef SQLITE_EXTRA_INIT + if( bRunExtraInit ){ + int SQLITE_EXTRA_INIT(const char*); + rc = SQLITE_EXTRA_INIT(0); + } +#endif + + return rc; +} + +/* +** Undo the effects of sqlite3BtreeInitialize(). Must not be called while +** there are outstanding database connections or memory allocations or +** while any part of SQLite is otherwise in use in any thread. This +** routine is not threadsafe. But it is safe to invoke this routine +** on when SQLite is already shut down. If SQLite is already shut down +** when this routine is invoked, then this routine is a harmless no-op. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3BtreeShutdown(void){ +#ifdef SQLITE_OMIT_WSD + int rc = sqlite3_wsd_init(4096, 24); + if( rc!=SQLITE_OK ){ + return rc; + } +#endif + + if( sqlite3GlobalConfig.isInit ){ +#ifdef SQLITE_EXTRA_SHUTDOWN + void SQLITE_EXTRA_SHUTDOWN(void); + SQLITE_EXTRA_SHUTDOWN(); +#endif + sqlite3_os_end(); + sqlite3GlobalConfig.isInit = 0; + } + if( sqlite3GlobalConfig.isPCacheInit ){ + sqlite3PcacheShutdown(); + sqlite3GlobalConfig.isPCacheInit = 0; + } + if( sqlite3GlobalConfig.isMallocInit ){ + sqlite3MallocEnd(); + sqlite3GlobalConfig.isMallocInit = 0; + } + if( sqlite3GlobalConfig.isMutexInit ){ + sqlite3MutexEnd(); + sqlite3GlobalConfig.isMutexInit = 0; + } + + return SQLITE_OK; +} + +/* +** The following routines are substitutes for constants SQLITE_CORRUPT, +** SQLITE_MISUSE, SQLITE_CANTOPEN, SQLITE_IOERR and possibly other error +** constants. They serve two purposes: +** +** 1. Serve as a convenient place to set a breakpoint in a debugger +** to detect when version error conditions occurs. +** +** 2. Invoke sqlite3_log() to provide the source code location where +** a low-level error is first detected. +*/ +SQLITE_PRIVATE int sqlite3CorruptError(int lineno){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_CORRUPT, + "database corruption at line %d of [%.10s]", + lineno, 20+sqlite3_sourceid()); + return SQLITE_CORRUPT; +} +SQLITE_PRIVATE int sqlite3MisuseError(int lineno){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_MISUSE, + "misuse at line %d of [%.10s]", + lineno, 20+sqlite3_sourceid()); + return SQLITE_MISUSE; +} +SQLITE_PRIVATE int sqlite3CantopenError(int lineno){ + testcase( sqlite3GlobalConfig.xLog!=0 ); + sqlite3_log(SQLITE_CANTOPEN, + "cannot open file at line %d of [%.10s]", + lineno, 20+sqlite3_sourceid()); + return SQLITE_CANTOPEN; +} + +/* +** Invoke the given busy handler. +** +** This routine is called when an operation failed with a lock. +** If this routine returns non-zero, the lock is retried. If it +** returns 0, the operation aborts with an SQLITE_BUSY error. +*/ +SQLITE_PRIVATE int sqlite3InvokeBusyHandler(BusyHandler *p){ + int rc; + if( NEVER(p==0) || p->xFunc==0 || p->nBusy<0 ) return 0; + rc = p->xFunc(p->pArg, p->nBusy); + if( rc==0 ){ + p->nBusy = -1; + }else{ + p->nBusy++; + } + return rc; +} + +/* +** This API allows applications to modify the global configuration of +** the SQLite library at run-time. +** +** This routine should only be called when there are no outstanding +** database connections or memory allocations. This routine is not +** threadsafe. Failure to heed these warnings can lead to unpredictable +** behavior. +*/ +SQLITE_PRIVATE int sqlite3_config(int op, ...){ + va_list ap; + int rc = SQLITE_OK; + + /* sqlite3_config() shall return SQLITE_MISUSE if it is invoked while + ** the SQLite library is in use. */ + if( sqlite3GlobalConfig.isInit ) return SQLITE_MISUSE_BKPT; + + va_start(ap, op); + switch( op ){ + + /* Mutex configuration options are only available in a threadsafe + ** compile. + */ +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-54466-46756 */ + case SQLITE_CONFIG_SINGLETHREAD: { + /* EVIDENCE-OF: R-02748-19096 This option sets the threading mode to + ** Single-thread. */ + sqlite3GlobalConfig.bCoreMutex = 0; /* Disable mutex on core */ + sqlite3GlobalConfig.bFullMutex = 0; /* Disable mutex on connections */ + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-20520-54086 */ + case SQLITE_CONFIG_MULTITHREAD: { + /* EVIDENCE-OF: R-14374-42468 This option sets the threading mode to + ** Multi-thread. */ + sqlite3GlobalConfig.bCoreMutex = 1; /* Enable mutex on core */ + sqlite3GlobalConfig.bFullMutex = 0; /* Disable mutex on connections */ + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-59593-21810 */ + case SQLITE_CONFIG_SERIALIZED: { + /* EVIDENCE-OF: R-41220-51800 This option sets the threading mode to + ** Serialized. */ + sqlite3GlobalConfig.bCoreMutex = 1; /* Enable mutex on core */ + sqlite3GlobalConfig.bFullMutex = 1; /* Enable mutex on connections */ + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-63666-48755 */ + case SQLITE_CONFIG_MUTEX: { + /* Specify an alternative mutex implementation */ + sqlite3GlobalConfig.mutex = *va_arg(ap, sqlite3_mutex_methods*); + break; + } +#endif +#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-14450-37597 */ + case SQLITE_CONFIG_GETMUTEX: { + /* Retrieve the current mutex implementation */ + *va_arg(ap, sqlite3_mutex_methods*) = sqlite3GlobalConfig.mutex; + break; + } +#endif + + case SQLITE_CONFIG_MALLOC: { + /* EVIDENCE-OF: R-55594-21030 The SQLITE_CONFIG_MALLOC option takes a + ** single argument which is a pointer to an instance of the + ** sqlite3_mem_methods structure. The argument specifies alternative + ** low-level memory allocation routines to be used in place of the memory + ** allocation routines built into SQLite. */ + sqlite3GlobalConfig.m = *va_arg(ap, sqlite3_mem_methods*); + break; + } + case SQLITE_CONFIG_GETMALLOC: { + /* EVIDENCE-OF: R-51213-46414 The SQLITE_CONFIG_GETMALLOC option takes a + ** single argument which is a pointer to an instance of the + ** sqlite3_mem_methods structure. The sqlite3_mem_methods structure is + ** filled with the currently defined memory allocation routines. */ + if( sqlite3GlobalConfig.m.xMalloc==0 ) sqlite3MemSetDefault(); + *va_arg(ap, sqlite3_mem_methods*) = sqlite3GlobalConfig.m; + break; + } + case SQLITE_CONFIG_MEMSTATUS: { + /* EVIDENCE-OF: R-61275-35157 The SQLITE_CONFIG_MEMSTATUS option takes + ** single argument of type int, interpreted as a boolean, which enables + ** or disables the collection of memory allocation statistics. */ + sqlite3GlobalConfig.bMemstat = va_arg(ap, int); + break; + } + case SQLITE_CONFIG_SCRATCH: { + /* EVIDENCE-OF: R-08404-60887 There are three arguments to + ** SQLITE_CONFIG_SCRATCH: A pointer an 8-byte aligned memory buffer from + ** which the scratch allocations will be drawn, the size of each scratch + ** allocation (sz), and the maximum number of scratch allocations (N). */ + sqlite3GlobalConfig.pScratch = va_arg(ap, void*); + sqlite3GlobalConfig.szScratch = va_arg(ap, int); + sqlite3GlobalConfig.nScratch = va_arg(ap, int); + break; + } + case SQLITE_CONFIG_PAGECACHE: { + /* EVIDENCE-OF: R-31408-40510 There are three arguments to + ** SQLITE_CONFIG_PAGECACHE: A pointer to 8-byte aligned memory, the size + ** of each page buffer (sz), and the number of pages (N). */ + sqlite3GlobalConfig.pPage = va_arg(ap, void*); + sqlite3GlobalConfig.szPage = va_arg(ap, int); + sqlite3GlobalConfig.nPage = va_arg(ap, int); + break; + } + case SQLITE_CONFIG_PCACHE_HDRSZ: { + /* EVIDENCE-OF: R-39100-27317 The SQLITE_CONFIG_PCACHE_HDRSZ option takes + ** a single parameter which is a pointer to an integer and writes into + ** that integer the number of extra bytes per page required for each page + ** in SQLITE_CONFIG_PAGECACHE. */ + *va_arg(ap, int*) = + sqlite3HeaderSizeBtree() + + sqlite3HeaderSizePcache() + + sqlite3HeaderSizePcache1(); + break; + } + + case SQLITE_CONFIG_PCACHE: { + /* no-op */ + break; + } + case SQLITE_CONFIG_GETPCACHE: { + /* now an error */ + rc = SQLITE_ERROR; + break; + } + + case SQLITE_CONFIG_PCACHE2: { + /* EVIDENCE-OF: R-63325-48378 The SQLITE_CONFIG_PCACHE2 option takes a + ** single argument which is a pointer to an sqlite3_pcache_methods2 + ** object. This object specifies the interface to a custom page cache + ** implementation. */ + sqlite3GlobalConfig.pcache2 = *va_arg(ap, sqlite3_pcache_methods2*); + break; + } + case SQLITE_CONFIG_GETPCACHE2: { + /* EVIDENCE-OF: R-22035-46182 The SQLITE_CONFIG_GETPCACHE2 option takes a + ** single argument which is a pointer to an sqlite3_pcache_methods2 + ** object. SQLite copies of the current page cache implementation into + ** that object. */ + if( sqlite3GlobalConfig.pcache2.xInit==0 ){ + sqlite3PCacheSetDefault(); + } + *va_arg(ap, sqlite3_pcache_methods2*) = sqlite3GlobalConfig.pcache2; + break; + } + +/* EVIDENCE-OF: R-06626-12911 The SQLITE_CONFIG_HEAP option is only +** available if SQLite is compiled with either SQLITE_ENABLE_MEMSYS3 or +** SQLITE_ENABLE_MEMSYS5 and returns SQLITE_ERROR if invoked otherwise. */ +#if defined(SQLITE_ENABLE_MEMSYS3) || defined(SQLITE_ENABLE_MEMSYS5) + case SQLITE_CONFIG_HEAP: { + /* EVIDENCE-OF: R-19854-42126 There are three arguments to + ** SQLITE_CONFIG_HEAP: An 8-byte aligned pointer to the memory, the + ** number of bytes in the memory buffer, and the minimum allocation size. + */ + sqlite3GlobalConfig.pHeap = va_arg(ap, void*); + sqlite3GlobalConfig.nHeap = va_arg(ap, int); + sqlite3GlobalConfig.mnReq = va_arg(ap, int); + + if( sqlite3GlobalConfig.mnReq<1 ){ + sqlite3GlobalConfig.mnReq = 1; + }else if( sqlite3GlobalConfig.mnReq>(1<<12) ){ + /* cap min request size at 2^12 */ + sqlite3GlobalConfig.mnReq = (1<<12); + } + + if( sqlite3GlobalConfig.pHeap==0 ){ + /* EVIDENCE-OF: R-49920-60189 If the first pointer (the memory pointer) + ** is NULL, then SQLite reverts to using its default memory allocator + ** (the system malloc() implementation), undoing any prior invocation of + ** SQLITE_CONFIG_MALLOC. + ** + ** Setting sqlite3GlobalConfig.m to all zeros will cause malloc to + ** revert to its default implementation when sqlite3BtreeInitialize() is run + */ + memset(&sqlite3GlobalConfig.m, 0, sizeof(sqlite3GlobalConfig.m)); + }else{ + /* EVIDENCE-OF: R-61006-08918 If the memory pointer is not NULL then the + ** alternative memory allocator is engaged to handle all of SQLites + ** memory allocation needs. */ +#ifdef SQLITE_ENABLE_MEMSYS3 + sqlite3GlobalConfig.m = *sqlite3MemGetMemsys3(); +#endif +#ifdef SQLITE_ENABLE_MEMSYS5 + sqlite3GlobalConfig.m = *sqlite3MemGetMemsys5(); +#endif + } + break; + } +#endif + + case SQLITE_CONFIG_LOOKASIDE: { + sqlite3GlobalConfig.szLookaside = va_arg(ap, int); + sqlite3GlobalConfig.nLookaside = va_arg(ap, int); + break; + } + + /* Record a pointer to the logger function and its first argument. + ** The default is NULL. Logging is disabled if the function pointer is + ** NULL. + */ + case SQLITE_CONFIG_LOG: { + /* MSVC is picky about pulling func ptrs from va lists. + ** http://support.microsoft.com/kb/47961 + ** sqlite3GlobalConfig.xLog = va_arg(ap, void(*)(void*,int,const char*)); + */ + typedef void(*LOGFUNC_t)(void*,int,const char*); + sqlite3GlobalConfig.xLog = va_arg(ap, LOGFUNC_t); + sqlite3GlobalConfig.pLogArg = va_arg(ap, void*); + break; + } + + /* EVIDENCE-OF: R-55548-33817 The compile-time setting for URI filenames + ** can be changed at start-time using the + ** sqlite3_config(SQLITE_CONFIG_URI,1) or + ** sqlite3_config(SQLITE_CONFIG_URI,0) configuration calls. + */ + case SQLITE_CONFIG_URI: { + /* EVIDENCE-OF: R-25451-61125 The SQLITE_CONFIG_URI option takes a single + ** argument of type int. If non-zero, then URI handling is globally + ** enabled. If the parameter is zero, then URI handling is globally + ** disabled. */ + sqlite3GlobalConfig.bOpenUri = va_arg(ap, int); + break; + } + + case SQLITE_CONFIG_COVERING_INDEX_SCAN: { + /* EVIDENCE-OF: R-36592-02772 The SQLITE_CONFIG_COVERING_INDEX_SCAN + ** option takes a single integer argument which is interpreted as a + ** boolean in order to enable or disable the use of covering indices for + ** full table scans in the query optimizer. */ + sqlite3GlobalConfig.bUseCis = va_arg(ap, int); + break; + } + + case SQLITE_CONFIG_MMAP_SIZE: { + /* EVIDENCE-OF: R-58063-38258 SQLITE_CONFIG_MMAP_SIZE takes two 64-bit + ** integer (sqlite3_int64) values that are the default mmap size limit + ** (the default setting for PRAGMA mmap_size) and the maximum allowed + ** mmap size limit. */ + sqlite3_int64 szMmap = va_arg(ap, sqlite3_int64); + sqlite3_int64 mxMmap = va_arg(ap, sqlite3_int64); + /* EVIDENCE-OF: R-53367-43190 If either argument to this option is + ** negative, then that argument is changed to its compile-time default. + ** + ** EVIDENCE-OF: R-34993-45031 The maximum allowed mmap size will be + ** silently truncated if necessary so that it does not exceed the + ** compile-time maximum mmap size set by the SQLITE_MAX_MMAP_SIZE + ** compile-time option. + */ + if( mxMmap<0 || mxMmap>SQLITE_MAX_MMAP_SIZE ){ + mxMmap = SQLITE_MAX_MMAP_SIZE; + } + if( szMmap<0 ) szMmap = SQLITE_DEFAULT_MMAP_SIZE; + if( szMmap>mxMmap) szMmap = mxMmap; + sqlite3GlobalConfig.mxMmap = mxMmap; + sqlite3GlobalConfig.szMmap = szMmap; + break; + } + +#if SQLITE_OS_WIN && defined(SQLITE_WIN32_MALLOC) /* IMP: R-04780-55815 */ + case SQLITE_CONFIG_WIN32_HEAPSIZE: { + /* EVIDENCE-OF: R-34926-03360 SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit + ** unsigned integer value that specifies the maximum size of the created + ** heap. */ + sqlite3GlobalConfig.nHeap = va_arg(ap, int); + break; + } +#endif + + case SQLITE_CONFIG_PMASZ: { + sqlite3GlobalConfig.szPma = va_arg(ap, unsigned int); + break; + } + + default: { + rc = SQLITE_ERROR; + break; + } + } + va_end(ap); + return rc; +} + +/************** End of main.c ************************************************/ |
