diff options
| author | Stefano Sinigardi <stesinigardi@hotmail.com> | 2019-10-07 19:35:13 +0200 |
|---|---|---|
| committer | Curtis J Bezault <curtbezault@gmail.com> | 2019-10-07 10:35:13 -0700 |
| commit | 726c11148105a97aef39bec024fdb7c140b1b154 (patch) | |
| tree | 26bd2aee0c13a8351b259cc4ffffaf0efededb4e /ports/lmdb | |
| parent | e86ff2cc54bda9e9ee322ab69141e7113d5c40a9 (diff) | |
| download | vcpkg-726c11148105a97aef39bec024fdb7c140b1b154.tar.gz vcpkg-726c11148105a97aef39bec024fdb7c140b1b154.zip | |
[vcpkg] fatal_error when patch fails to apply (#8087)
vcpkg will now fail on failure to apply patches except when using `--head`.
Diffstat (limited to 'ports/lmdb')
| -rw-r--r-- | ports/lmdb/CONTROL | 2 | ||||
| -rw-r--r-- | ports/lmdb/lmdb_45a88275d2a410e683bae4ef44881e0f55fa3c4d.patch | 3958 | ||||
| -rw-r--r-- | ports/lmdb/portfile.cmake | 1 |
3 files changed, 1 insertions, 3960 deletions
diff --git a/ports/lmdb/CONTROL b/ports/lmdb/CONTROL index eac4b017f..eb174de87 100644 --- a/ports/lmdb/CONTROL +++ b/ports/lmdb/CONTROL @@ -1,4 +1,4 @@ Source: lmdb -Version: 0.9.23-2 +Version: 0.9.23-3 Homepage: https://github.com/LMDB/lmdb Description: LMDB is an extraordinarily fast, memory-efficient database diff --git a/ports/lmdb/lmdb_45a88275d2a410e683bae4ef44881e0f55fa3c4d.patch b/ports/lmdb/lmdb_45a88275d2a410e683bae4ef44881e0f55fa3c4d.patch deleted file mode 100644 index e6d1f108c..000000000 --- a/ports/lmdb/lmdb_45a88275d2a410e683bae4ef44881e0f55fa3c4d.patch +++ /dev/null @@ -1,3958 +0,0 @@ -diff --git a/libraries/liblmdb/Makefile b/libraries/liblmdb/Makefile -index 0940c49..72d0984 100644 ---- a/libraries/liblmdb/Makefile -+++ b/libraries/liblmdb/Makefile -@@ -8,7 +8,7 @@ - # platforms; you should not need to change any of these. - # Read their descriptions in mdb.c if you do: - # --# - MDB_USE_POSIX_SEM -+# - MDB_USE_POSIX_MUTEX, MDB_USE_POSIX_SEM, MDB_USE_SYSV_SEM - # - MDB_DSYNC - # - MDB_FDATASYNC - # - MDB_FDATASYNC_WORKS -@@ -24,8 +24,9 @@ W = -W -Wall -Wno-unused-parameter -Wbad-function-cast -Wuninitialized - THREADS = -pthread - OPT = -O2 -g - CFLAGS = $(THREADS) $(OPT) $(W) $(XCFLAGS) --LDLIBS = --SOLIBS = -+LDLIBS = # -lntdll # Windows needs ntdll -+SOLIBS = # -lntdll -+SOEXT = .so - prefix = /usr/local - exec_prefix = $(prefix) - bindir = $(exec_prefix)/bin -@@ -37,7 +38,7 @@ mandir = $(datarootdir)/man - ######################################################################## - - IHDRS = lmdb.h --ILIBS = liblmdb.a liblmdb.so -+ILIBS = liblmdb.a liblmdb$(SOEXT) - IPROGS = mdb_stat mdb_copy mdb_dump mdb_load - IDOCS = mdb_stat.1 mdb_copy.1 mdb_dump.1 mdb_load.1 - PROGS = $(IPROGS) mtest mtest2 mtest3 mtest4 mtest5 -@@ -63,7 +64,7 @@ test: all - liblmdb.a: mdb.o midl.o - $(AR) rs $@ mdb.o midl.o - --liblmdb.so: mdb.lo midl.lo -+liblmdb$(SOEXT): mdb.lo midl.lo - # $(CC) $(LDFLAGS) -pthread -shared -Wl,-Bsymbolic -o $@ mdb.o midl.o $(SOLIBS) - $(CC) $(LDFLAGS) -pthread -shared -o $@ mdb.lo midl.lo $(SOLIBS) - -diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h -index c4d05d1..30d5862 100644 ---- a/libraries/liblmdb/lmdb.h -+++ b/libraries/liblmdb/lmdb.h -@@ -53,14 +53,15 @@ - * - * Fix: Check for stale readers periodically, using the - * #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. -- * Stale writers will be cleared automatically on some systems: -+ * Stale writers will be cleared automatically on most systems: - * - Windows - automatic -+ * - BSD, systems using SysV semaphores - automatic - * - Linux, systems using POSIX mutexes with Robust option - automatic -- * - not on BSD, systems using POSIX semaphores. - * Otherwise just make all programs using the database close it; - * the lockfile is always reset on first open of the environment. - * -- * - On BSD systems or others configured with MDB_USE_POSIX_SEM, -+ * - On BSD systems or others configured with MDB_USE_SYSV_SEM or -+ * MDB_USE_POSIX_SEM, - * startup can fail due to semaphores owned by another userid. - * - * Fix: Open and close the database as the user which owns the -@@ -77,6 +78,11 @@ - * access to locks and lock file. Exceptions: On read-only filesystems - * or with the #MDB_NOLOCK flag described under #mdb_env_open(). - * -+ * - An LMDB configuration will often reserve considerable \b unused -+ * memory address space and maybe file size for future growth. -+ * This does not use actual memory or disk space, but users may need -+ * to understand the difference so they won't be scared off. -+ * - * - By default, in versions before 0.9.10, unused portions of the data - * file might receive garbage data from memory freed by other code. - * (This does not happen when using the #MDB_WRITEMAP flag.) As of -@@ -160,6 +166,8 @@ - #define _LMDB_H_ - - #include <sys/types.h> -+#include <inttypes.h> -+#include <limits.h> - - #ifdef __cplusplus - extern "C" { -@@ -172,6 +180,32 @@ typedef int mdb_mode_t; - typedef mode_t mdb_mode_t; - #endif - -+#ifdef _WIN32 -+# define MDB_FMT_Z "I" -+#else -+# define MDB_FMT_Z "z" /**< printf/scanf format modifier for size_t */ -+#endif -+ -+#ifndef MDB_VL32 -+/** Unsigned type used for mapsize, entry counts and page/transaction IDs. -+ * -+ * It is normally size_t, hence the name. Defining MDB_VL32 makes it -+ * uint64_t, but do not try this unless you know what you are doing. -+ */ -+typedef size_t mdb_size_t; -+# define MDB_SIZE_MAX SIZE_MAX /**< max #mdb_size_t */ -+/** #mdb_size_t printf formats, \b t = one of [diouxX] without quotes */ -+# define MDB_PRIy(t) MDB_FMT_Z #t -+/** #mdb_size_t scanf formats, \b t = one of [dioux] without quotes */ -+# define MDB_SCNy(t) MDB_FMT_Z #t -+#else -+typedef uint64_t mdb_size_t; -+# define MDB_SIZE_MAX UINT64_MAX -+# define MDB_PRIy(t) PRI##t##64 -+# define MDB_SCNy(t) SCN##t##64 -+# define mdb_env_create mdb_env_create_vl32 /**< Prevent mixing with non-VL32 builds */ -+#endif -+ - /** An abstraction for a file handle. - * On POSIX systems file handles are small integers. On Windows - * they're opaque pointers. -@@ -194,7 +228,7 @@ typedef int mdb_filehandle_t; - /** Library minor version */ - #define MDB_VERSION_MINOR 9 - /** Library patch version */ --#define MDB_VERSION_PATCH 18 -+#define MDB_VERSION_PATCH 70 - - /** Combine args a,b,c into a single integer for easy version comparisons */ - #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) -@@ -204,7 +238,7 @@ typedef int mdb_filehandle_t; - MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) - - /** The release date of this library version */ --#define MDB_VERSION_DATE "February 5, 2016" -+#define MDB_VERSION_DATE "December 19, 2015" - - /** A stringifier for the version info */ - #define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")" -@@ -306,7 +340,8 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel - #define MDB_REVERSEKEY 0x02 - /** use sorted duplicates */ - #define MDB_DUPSORT 0x04 -- /** numeric keys in native byte order: either unsigned int or size_t. -+ /** numeric keys in native byte order, either unsigned int or #mdb_size_t. -+ * (lmdb expects 32-bit int <= size_t <= 32/64-bit mdb_size_t.) - * The keys must all be of the same size. */ - #define MDB_INTEGERKEY 0x08 - /** with #MDB_DUPSORT, sorted dup items have fixed size */ -@@ -383,7 +418,9 @@ typedef enum MDB_cursor_op { - MDB_PREV_NODUP, /**< Position at last data item of previous key */ - MDB_SET, /**< Position at specified key */ - MDB_SET_KEY, /**< Position at specified key, return key + data */ -- MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */ -+ MDB_SET_RANGE, /**< Position at first key greater than or equal to specified key. */ -+ MDB_PREV_MULTIPLE /**< Position at previous page and return key and up to -+ a page of duplicate data items. Only for #MDB_DUPFIXED */ - } MDB_cursor_op; - - /** @defgroup errors Return Codes -@@ -440,8 +477,10 @@ typedef enum MDB_cursor_op { - #define MDB_BAD_VALSIZE (-30781) - /** The specified DBI was changed unexpectedly */ - #define MDB_BAD_DBI (-30780) -+ /** Unexpected problem - txn should abort */ -+#define MDB_PROBLEM (-30779) - /** The last defined error code */ --#define MDB_LAST_ERRCODE MDB_BAD_DBI -+#define MDB_LAST_ERRCODE MDB_PROBLEM - /** @} */ - - /** @brief Statistics for a database in the environment */ -@@ -449,18 +488,18 @@ typedef struct MDB_stat { - unsigned int ms_psize; /**< Size of a database page. - This is currently the same for all databases. */ - unsigned int ms_depth; /**< Depth (height) of the B-tree */ -- size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ -- size_t ms_leaf_pages; /**< Number of leaf pages */ -- size_t ms_overflow_pages; /**< Number of overflow pages */ -- size_t ms_entries; /**< Number of data items */ -+ mdb_size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ -+ mdb_size_t ms_leaf_pages; /**< Number of leaf pages */ -+ mdb_size_t ms_overflow_pages; /**< Number of overflow pages */ -+ mdb_size_t ms_entries; /**< Number of data items */ - } MDB_stat; - - /** @brief Information about the environment */ - typedef struct MDB_envinfo { - void *me_mapaddr; /**< Address of map, if fixed */ -- size_t me_mapsize; /**< Size of the data memory map */ -- size_t me_last_pgno; /**< ID of the last used page */ -- size_t me_last_txnid; /**< ID of the last committed transaction */ -+ mdb_size_t me_mapsize; /**< Size of the data memory map */ -+ mdb_size_t me_last_pgno; /**< ID of the last used page */ -+ mdb_size_t me_last_txnid; /**< ID of the last committed transaction */ - unsigned int me_maxreaders; /**< max reader slots in the environment */ - unsigned int me_numreaders; /**< max reader slots used in the environment */ - } MDB_envinfo; -@@ -672,6 +711,7 @@ int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); - * <li>#MDB_CP_COMPACT - Perform compaction while copying: omit free - * pages and sequentially renumber all pages in output. This option - * consumes more CPU and runs more slowly than the default. -+ * Currently it fails if the environment has suffered a page leak. - * </ul> - * @return A non-zero error value on failure and 0 on success. - */ -@@ -829,7 +869,7 @@ int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd); - * an active write transaction. - * </ul> - */ --int mdb_env_set_mapsize(MDB_env *env, size_t size); -+int mdb_env_set_mapsize(MDB_env *env, mdb_size_t size); - - /** @brief Set the maximum number of threads/reader slots for the environment. - * -@@ -942,6 +982,10 @@ int mdb_env_set_assert(MDB_env *env, MDB_assert_func *func); - * <ul> - * <li>#MDB_RDONLY - * This transaction will not perform any write operations. -+ * <li>#MDB_NOSYNC -+ * Don't flush system buffers to disk when committing this transaction. -+ * <li>#MDB_NOMETASYNC -+ * Flush system buffers but omit metadata flush when committing this transaction. - * </ul> - * @param[out] txn Address where the new #MDB_txn handle will be stored - * @return A non-zero error value on failure and 0 on success. Some possible -@@ -974,7 +1018,7 @@ MDB_env *mdb_txn_env(MDB_txn *txn); - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @return A transaction ID, valid if input is an active transaction. - */ --size_t mdb_txn_id(MDB_txn *txn); -+mdb_size_t mdb_txn_id(MDB_txn *txn); - - /** @brief Commit all the operations of a transaction into the database. - * -@@ -1084,7 +1128,8 @@ int mdb_txn_renew(MDB_txn *txn); - * keys must be unique and may have only a single data item. - * <li>#MDB_INTEGERKEY - * Keys are binary integers in native byte order, either unsigned int -- * or size_t, and will be sorted as such. -+ * or #mdb_size_t, and will be sorted as such. -+ * (lmdb expects 32-bit int <= size_t <= 32/64-bit mdb_size_t.) - * The keys must all be of the same size. - * <li>#MDB_DUPFIXED - * This flag may only be used in combination with #MDB_DUPSORT. This option -@@ -1524,7 +1569,7 @@ int mdb_cursor_del(MDB_cursor *cursor, unsigned int flags); - * <li>EINVAL - cursor is not initialized, or an invalid parameter was specified. - * </ul> - */ --int mdb_cursor_count(MDB_cursor *cursor, size_t *countp); -+int mdb_cursor_count(MDB_cursor *cursor, mdb_size_t *countp); - - /** @brief Compare two data items according to a particular database. - * -diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c -index d2e81e4..1d9e9fa 100644 ---- a/libraries/liblmdb/mdb.c -+++ b/libraries/liblmdb/mdb.c -@@ -35,9 +35,42 @@ - #ifndef _GNU_SOURCE - #define _GNU_SOURCE 1 - #endif -+#if defined(MDB_VL32) || defined(__WIN64__) -+#define _FILE_OFFSET_BITS 64 -+#endif - #ifdef _WIN32 - #include <malloc.h> - #include <windows.h> -+ -+/* We use native NT APIs to setup the memory map, so that we can -+ * let the DB file grow incrementally instead of always preallocating -+ * the full size. These APIs are defined in <wdm.h> and <ntifs.h> -+ * but those headers are meant for driver-level development and -+ * conflict with the regular user-level headers, so we explicitly -+ * declare them here. Using these APIs also means we must link to -+ * ntdll.dll, which is not linked by default in user code. -+ */ -+NTSTATUS WINAPI -+NtCreateSection(OUT PHANDLE sh, IN ACCESS_MASK acc, -+ IN void * oa OPTIONAL, -+ IN PLARGE_INTEGER ms OPTIONAL, -+ IN ULONG pp, IN ULONG aa, IN HANDLE fh OPTIONAL); -+ -+typedef enum _SECTION_INHERIT { -+ ViewShare = 1, -+ ViewUnmap = 2 -+} SECTION_INHERIT; -+ -+NTSTATUS WINAPI -+NtMapViewOfSection(IN PHANDLE sh, IN HANDLE ph, -+ IN OUT PVOID *addr, IN ULONG_PTR zbits, -+ IN SIZE_T cs, IN OUT PLARGE_INTEGER off OPTIONAL, -+ IN OUT PSIZE_T vs, IN SECTION_INHERIT ih, -+ IN ULONG at, IN ULONG pp); -+ -+NTSTATUS WINAPI -+NtClose(HANDLE h); -+ - /** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it - * as int64 which is wrong. MSVC doesn't define it at all, so just - * don't use it. -@@ -56,6 +89,10 @@ - # define SSIZE_MAX INT_MAX - # endif - #endif -+/* Windows uses 32-bit off_t to define our own -+ 64 bit offset type -+*/ -+#define offset_t __int64 - #else - #include <sys/types.h> - #include <sys/stat.h> -@@ -68,6 +105,7 @@ - #include <sys/file.h> - #endif - #include <fcntl.h> -+#define offset_t off_t - #endif - - #if defined(__mips) && defined(__linux) -@@ -116,7 +154,9 @@ typedef SSIZE_T ssize_t; - #endif - - #if defined(__APPLE__) || defined (BSD) --# define MDB_USE_POSIX_SEM 1 -+# if !(defined(MDB_USE_POSIX_MUTEX) || defined(MDB_USE_POSIX_SEM)) -+# define MDB_USE_SYSV_SEM 1 -+# endif - # define MDB_FDATASYNC fsync - #elif defined(ANDROID) - # define MDB_FDATASYNC fsync -@@ -127,12 +167,22 @@ typedef SSIZE_T ssize_t; - #ifdef MDB_USE_POSIX_SEM - # define MDB_USE_HASH 1 - #include <semaphore.h> -+#elif defined(MDB_USE_SYSV_SEM) -+#include <sys/ipc.h> -+#include <sys/sem.h> -+#ifdef _SEM_SEMUN_UNDEFINED -+union semun { -+ int val; -+ struct semid_ds *buf; -+ unsigned short *array; -+}; -+#endif /* _SEM_SEMUN_UNDEFINED */ - #else - #define MDB_USE_POSIX_MUTEX 1 --#endif --#endif -+#endif /* MDB_USE_POSIX_SEM */ -+#endif /* !_WIN32 */ - --#if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) \ -+#if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) + defined(MDB_USE_SYSV_SEM) \ - + defined(MDB_USE_POSIX_MUTEX) != 1 - # error "Ambiguous shared-lock implementation" - #endif -@@ -183,7 +233,7 @@ typedef SSIZE_T ssize_t; - - #if (BYTE_ORDER == LITTLE_ENDIAN) == (BYTE_ORDER == BIG_ENDIAN) - # error "Unknown or unsupported endianness (BYTE_ORDER)" --#elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF -+#elif (-6 & 5) || CHAR_BIT!=8 || UINT_MAX!=0xffffffff || MDB_SIZE_MAX%UINT_MAX - # error "Two's complement, reasonably sized integer types, please" - #endif - -@@ -234,6 +284,8 @@ typedef SSIZE_T ssize_t; - #define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10) - #ifdef _WIN32 - #define MDB_OWNERDEAD ((int) WAIT_ABANDONED) -+#elif defined MDB_USE_SYSV_SEM -+#define MDB_OWNERDEAD (MDB_LAST_ERRCODE + 11) - #elif defined(MDB_USE_POSIX_MUTEX) && defined(EOWNERDEAD) - #define MDB_OWNERDEAD EOWNERDEAD /**< #LOCK_MUTEX0() result if dead owner */ - #endif -@@ -256,16 +308,20 @@ typedef SSIZE_T ssize_t; - # define MDB_USE_ROBUST 0 - # else - # define MDB_USE_ROBUST 1 -+# endif -+#endif /* !MDB_USE_ROBUST */ -+ -+#if defined(MDB_USE_POSIX_MUTEX) && (MDB_USE_ROBUST) - /* glibc < 2.12 only provided _np API */ --# if defined(__GLIBC__) && GLIBC_VER < 0x02000c -+# if (defined(__GLIBC__) && GLIBC_VER < 0x02000c) || \ -+ (defined(PTHREAD_MUTEX_ROBUST_NP) && !defined(PTHREAD_MUTEX_ROBUST)) - # define PTHREAD_MUTEX_ROBUST PTHREAD_MUTEX_ROBUST_NP - # define pthread_mutexattr_setrobust(attr, flag) pthread_mutexattr_setrobust_np(attr, flag) - # define pthread_mutex_consistent(mutex) pthread_mutex_consistent_np(mutex) - # endif --# endif --#endif /* MDB_USE_ROBUST */ -+#endif /* MDB_USE_POSIX_MUTEX && MDB_USE_ROBUST */ - --#if defined(MDB_OWNERDEAD) && MDB_USE_ROBUST -+#if defined(MDB_OWNERDEAD) && (MDB_USE_ROBUST) - #define MDB_ROBUST_SUPPORTED 1 - #endif - -@@ -288,8 +344,10 @@ typedef HANDLE mdb_mutex_t, mdb_mutexref_t; - #define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE) - #define pthread_cond_signal(x) SetEvent(*x) - #define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0) --#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL) --#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE) -+#define THREAD_CREATE(thr,start,arg) \ -+ (((thr) = CreateThread(NULL, 0, start, arg, 0, NULL)) ? 0 : ErrCode()) -+#define THREAD_FINISH(thr) \ -+ (WaitForSingleObject(thr, INFINITE) ? ErrCode() : 0) - #define LOCK_MUTEX0(mutex) WaitForSingleObject(mutex, INFINITE) - #define UNLOCK_MUTEX(mutex) ReleaseMutex(mutex) - #define mdb_mutex_consistent(mutex) 0 -@@ -305,12 +363,10 @@ typedef HANDLE mdb_mutex_t, mdb_mutexref_t; - #else - #define MDB_PROCESS_QUERY_LIMITED_INFORMATION 0x1000 - #endif --#define Z "I" - #else - #define THREAD_RET void * - #define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg) - #define THREAD_FINISH(thr) pthread_join(thr,NULL) --#define Z "z" /**< printf format modifier for size_t */ - - /** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */ - #define MDB_PIDLOCK 1 -@@ -329,12 +385,46 @@ mdb_sem_wait(sem_t *sem) - return rc; - } - -+#elif defined MDB_USE_SYSV_SEM -+ -+typedef struct mdb_mutex { -+ int semid; -+ int semnum; -+ int *locked; -+} mdb_mutex_t[1], *mdb_mutexref_t; -+ -+#define LOCK_MUTEX0(mutex) mdb_sem_wait(mutex) -+#define UNLOCK_MUTEX(mutex) do { \ -+ struct sembuf sb = { 0, 1, SEM_UNDO }; \ -+ sb.sem_num = (mutex)->semnum; \ -+ *(mutex)->locked = 0; \ -+ semop((mutex)->semid, &sb, 1); \ -+} while(0) -+ -+static int -+mdb_sem_wait(mdb_mutexref_t sem) -+{ -+ int rc, *locked = sem->locked; -+ struct sembuf sb = { 0, -1, SEM_UNDO }; -+ sb.sem_num = sem->semnum; -+ do { -+ if (!semop(sem->semid, &sb, 1)) { -+ rc = *locked ? MDB_OWNERDEAD : MDB_SUCCESS; -+ *locked = 1; -+ break; -+ } -+ } while ((rc = errno) == EINTR); -+ return rc; -+} -+ -+#define mdb_mutex_consistent(mutex) 0 -+ - #else /* MDB_USE_POSIX_MUTEX: */ - /** Shared mutex/semaphore as it is stored (mdb_mutex_t), and as - * local variables keep it (mdb_mutexref_t). - * -- * When #mdb_mutexref_t is a pointer declaration and #mdb_mutex_t is -- * not, then it is array[size 1] so it can be assigned to a pointer. -+ * An mdb_mutex_t can be assigned to an mdb_mutexref_t. They can -+ * be the same, or an array[size 1] and a pointer. - * @{ - */ - typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t; -@@ -349,7 +439,7 @@ typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t; - /** Mark mutex-protected data as repaired, after death of previous owner. - */ - #define mdb_mutex_consistent(mutex) pthread_mutex_consistent(mutex) --#endif /* MDB_USE_POSIX_SEM */ -+#endif /* MDB_USE_POSIX_SEM || MDB_USE_SYSV_SEM */ - - /** Get the error code for the last failed system function. - */ -@@ -374,12 +464,24 @@ typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t; - #define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE)) - #endif - -+#define Z MDB_FMT_Z /**< printf/scanf format modifier for size_t */ -+#define Yu MDB_PRIy(u) /**< printf format for #mdb_size_t */ -+#define Yd MDB_PRIy(d) /**< printf format for "signed #mdb_size_t" */ -+ - #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) - #define MNAME_LEN 32 -+#elif defined(MDB_USE_SYSV_SEM) -+#define MNAME_LEN (sizeof(int)) - #else - #define MNAME_LEN (sizeof(pthread_mutex_t)) - #endif - -+#ifdef MDB_USE_SYSV_SEM -+#define SYSV_SEM_FLAG 1 /**< SysV sems in lockfile format */ -+#else -+#define SYSV_SEM_FLAG 0 -+#endif -+ - /** @} */ - - #ifdef MDB_ROBUST_SUPPORTED -@@ -521,7 +623,7 @@ static txnid_t mdb_debug_start; - /** The version number for a database's datafile format. */ - #define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1) - /** The version number for a database's lockfile format. */ --#define MDB_LOCK_VERSION 1 -+#define MDB_LOCK_VERSION ((MDB_DEVEL) ? 999 : 1) - - /** @brief The max size of a key we can write, or 0 for computed max. - * -@@ -712,6 +814,9 @@ typedef struct MDB_txbody { - uint32_t mtb_format; - #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) - char mtb_rmname[MNAME_LEN]; -+#elif defined(MDB_USE_SYSV_SEM) -+ int mtb_semid; -+ int mtb_rlocked; - #else - /** Mutex protecting access to this table. - * This is the reader table lock used with LOCK_MUTEX(). -@@ -740,12 +845,19 @@ typedef struct MDB_txninfo { - #define mti_rmname mt1.mtb.mtb_rmname - #define mti_txnid mt1.mtb.mtb_txnid - #define mti_numreaders mt1.mtb.mtb_numreaders -+#ifdef MDB_USE_SYSV_SEM -+#define mti_semid mt1.mtb.mtb_semid -+#define mti_rlocked mt1.mtb.mtb_rlocked -+#endif - char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)]; - } mt1; - union { - #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) - char mt2_wmname[MNAME_LEN]; - #define mti_wmname mt2.mt2_wmname -+#elif defined MDB_USE_SYSV_SEM -+ int mt2_wlocked; -+#define mti_wlocked mt2.mt2_wlocked - #else - mdb_mutex_t mt2_wmutex; - #define mti_wmutex mt2.mt2_wmutex -@@ -760,12 +872,27 @@ typedef struct MDB_txninfo { - ((uint32_t) \ - ((MDB_LOCK_VERSION) \ - /* Flags which describe functionality */ \ -+ + (SYSV_SEM_FLAG << 18) \ - + (((MDB_PIDLOCK) != 0) << 16))) - /** @} */ - --/** Common header for all page types. -- * Overflow records occupy a number of contiguous pages with no -- * headers on any page after the first. -+/** Common header for all page types. The page type depends on #mp_flags. -+ * -+ * #P_BRANCH and #P_LEAF pages have unsorted '#MDB_node's at the end, with -+ * sorted #mp_ptrs[] entries referring to them. Exception: #P_LEAF2 pages -+ * omit mp_ptrs and pack sorted #MDB_DUPFIXED values after the page header. -+ * -+ * #P_OVERFLOW records occupy one or more contiguous pages where only the -+ * first has a page header. They hold the real data of #F_BIGDATA nodes. -+ * -+ * #P_SUBP sub-pages are small leaf "pages" with duplicate data. -+ * A node with flag #F_DUPDATA but not #F_SUBDATA contains a sub-page. -+ * (Duplicate data can also go in sub-databases, which use normal pages.) -+ * -+ * #P_META pages contain #MDB_meta, the start point of an LMDB snapshot. -+ * -+ * Each non-metapage up to #MDB_meta.%mm_last_pg is reachable exactly once -+ * in the snapshot: Either used by a database or listed in a freeDB record. - */ - typedef struct MDB_page { - #define mp_pgno mp_p.p_pgno -@@ -774,7 +901,7 @@ typedef struct MDB_page { - pgno_t p_pgno; /**< page number */ - struct MDB_page *p_next; /**< for in-memory list of freed pages */ - } mp_p; -- uint16_t mp_pad; -+ uint16_t mp_pad; /**< key size if this is a LEAF2 page */ - /** @defgroup mdb_page Page Flags - * @ingroup internal - * Flags for the page headers. -@@ -841,7 +968,9 @@ typedef struct MDB_page { - /** The number of overflow pages needed to store the given size. */ - #define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) - -- /** Link in #MDB_txn.%mt_loose_pgs list */ -+ /** Link in #MDB_txn.%mt_loose_pgs list. -+ * Kept outside the page header, which is needed when reusing the page. -+ */ - #define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2)) - - /** Header for a single key/data pair within a page. -@@ -924,7 +1053,7 @@ typedef struct MDB_node { - #ifdef MISALIGNED_OK - #define COPY_PGNO(dst,src) dst = src - #else --#if SIZE_MAX > 4294967295UL -+#if MDB_SIZE_MAX > 0xffffffffU - #define COPY_PGNO(dst,src) do { \ - unsigned short *s, *d; \ - s = (unsigned short *)&(src); \ -@@ -965,13 +1094,13 @@ typedef struct MDB_db { - pgno_t md_branch_pages; /**< number of internal pages */ - pgno_t md_leaf_pages; /**< number of leaf pages */ - pgno_t md_overflow_pages; /**< number of overflow pages */ -- size_t md_entries; /**< number of data items */ -+ mdb_size_t md_entries; /**< number of data items */ - pgno_t md_root; /**< the root page of this tree */ - } MDB_db; - -- /** mdb_dbi_open flags */ - #define MDB_VALID 0x8000 /**< DB handle is valid, for me_dbflags */ - #define PERSISTENT_FLAGS (0xffff & ~(MDB_VALID)) -+ /** #mdb_dbi_open() flags */ - #define VALID_FLAGS (MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|\ - MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE) - -@@ -995,14 +1124,25 @@ typedef struct MDB_meta { - uint32_t mm_magic; - /** Version number of this file. Must be set to #MDB_DATA_VERSION. */ - uint32_t mm_version; -+#ifdef MDB_VL32 -+ union { /* always zero since we don't support fixed mapping in MDB_VL32 */ -+ MDB_ID mmun_ull; -+ void *mmun_address; -+ } mm_un; -+#define mm_address mm_un.mmun_address -+#else - void *mm_address; /**< address for fixed mapping */ -- size_t mm_mapsize; /**< size of mmap region */ -+#endif -+ pgno_t mm_mapsize; /**< size of mmap region */ - MDB_db mm_dbs[CORE_DBS]; /**< first is free space, 2nd is main db */ - /** The size of pages used in this DB */ - #define mm_psize mm_dbs[FREE_DBI].md_pad - /** Any persistent environment flags. @ref mdb_env */ - #define mm_flags mm_dbs[FREE_DBI].md_flags -- pgno_t mm_last_pg; /**< last used page in file */ -+ /** Last used page in the datafile. -+ * Actually the file may be shorter if the freeDB lists the final pages. -+ */ -+ pgno_t mm_last_pg; - volatile txnid_t mm_txnid; /**< txnid that committed this page */ - } MDB_meta; - -@@ -1039,6 +1179,9 @@ struct MDB_txn { - /** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */ - MDB_txn *mt_child; - pgno_t mt_next_pgno; /**< next unallocated page */ -+#ifdef MDB_VL32 -+ pgno_t mt_last_pgno; /**< last written page */ -+#endif - /** The ID of this transaction. IDs are integers incrementing from 1. - * Only committed write transactions increment the ID. If a transaction - * aborts, the ID may be re-used by the next writer. -@@ -1052,7 +1195,7 @@ struct MDB_txn { - * in this transaction, linked through #NEXT_LOOSE_PAGE(page). - */ - MDB_page *mt_loose_pgs; -- /* #Number of loose pages (#mt_loose_pgs) */ -+ /** Number of loose pages (#mt_loose_pgs) */ - int mt_loose_count; - /** The sorted list of dirty pages we temporarily wrote to disk - * because the dirty list was full. page numbers in here are -@@ -1085,6 +1228,19 @@ struct MDB_txn { - MDB_cursor **mt_cursors; - /** Array of flags for each DB */ - unsigned char *mt_dbflags; -+#ifdef MDB_VL32 -+ /** List of read-only pages (actually chunks) */ -+ MDB_ID3L mt_rpages; -+ /** We map chunks of 16 pages. Even though Windows uses 4KB pages, all -+ * mappings must begin on 64KB boundaries. So we round off all pgnos to -+ * a chunk boundary. We do the same on Linux for symmetry, and also to -+ * reduce the frequency of mmap/munmap calls. -+ */ -+#define MDB_RPAGE_CHUNK 16 -+#define MDB_TRPAGE_SIZE 4096 /**< size of #mt_rpages array of chunks */ -+#define MDB_TRPAGE_MAX (MDB_TRPAGE_SIZE-1) /**< maximum chunk index */ -+ unsigned int mt_rpcheck; /**< threshold for reclaiming unref'd chunks */ -+#endif - /** Number of DB records in use, or 0 when the txn is finished. - * This number only ever increments until the txn finishes; we - * don't decrement it when individual DB handles are closed. -@@ -1096,7 +1252,9 @@ struct MDB_txn { - * @{ - */ - /** #mdb_txn_begin() flags */ --#define MDB_TXN_BEGIN_FLAGS MDB_RDONLY -+#define MDB_TXN_BEGIN_FLAGS (MDB_NOMETASYNC|MDB_NOSYNC|MDB_RDONLY) -+#define MDB_TXN_NOMETASYNC MDB_NOMETASYNC /**< don't sync meta for this txn on commit */ -+#define MDB_TXN_NOSYNC MDB_NOSYNC /**< don't sync this txn on commit */ - #define MDB_TXN_RDONLY MDB_RDONLY /**< read-only transaction */ - /* internal txn flags */ - #define MDB_TXN_WRITEMAP MDB_WRITEMAP /**< copy of #MDB_env flag in writers */ -@@ -1162,10 +1320,24 @@ struct MDB_cursor { - #define C_SUB 0x04 /**< Cursor is a sub-cursor */ - #define C_DEL 0x08 /**< last op was a cursor_del */ - #define C_UNTRACK 0x40 /**< Un-track cursor when closing */ -+#define C_WRITEMAP MDB_TXN_WRITEMAP /**< Copy of txn flag */ -+/** Read-only cursor into the txn's original snapshot in the map. -+ * Set for read-only txns, and in #mdb_page_alloc() for #FREE_DBI when -+ * #MDB_DEVEL & 2. Only implements code which is necessary for this. -+ */ -+#define C_ORIG_RDONLY MDB_TXN_RDONLY - /** @} */ - unsigned int mc_flags; /**< @ref mdb_cursor */ - MDB_page *mc_pg[CURSOR_STACK]; /**< stack of pushed pages */ - indx_t mc_ki[CURSOR_STACK]; /**< stack of page indices */ -+#ifdef MDB_VL32 -+ MDB_page *mc_ovpg; /**< a referenced overflow page */ -+# define MC_OVPG(mc) ((mc)->mc_ovpg) -+# define MC_SET_OVPG(mc, pg) ((mc)->mc_ovpg = (pg)) -+#else -+# define MC_OVPG(mc) ((MDB_page *)0) -+# define MC_SET_OVPG(mc, pg) ((void)0) -+#endif - }; - - /** Context for sorted-dup records. -@@ -1195,6 +1367,9 @@ struct MDB_env { - HANDLE me_fd; /**< The main data file */ - HANDLE me_lfd; /**< The lock file */ - HANDLE me_mfd; /**< just for writing the meta pages */ -+#if defined(MDB_VL32) && defined(_WIN32) -+ HANDLE me_fmh; /**< File Mapping handle */ -+#endif - /** Failed to update the meta page. Probably an I/O error. */ - #define MDB_FATAL_ERROR 0x80000000U - /** Some fields are initialized. */ -@@ -1219,8 +1394,8 @@ struct MDB_env { - void *me_pbuf; /**< scratch area for DUPSORT put() */ - MDB_txn *me_txn; /**< current write transaction */ - MDB_txn *me_txn0; /**< prealloc'd write transaction */ -- size_t me_mapsize; /**< size of the data memory map */ -- off_t me_size; /**< current file size */ -+ mdb_size_t me_mapsize; /**< size of the data memory map */ -+ offset_t me_size; /**< current file size */ - pgno_t me_maxpg; /**< me_mapsize / me_psize */ - MDB_dbx *me_dbxs; /**< array of static DB info */ - uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */ -@@ -1253,6 +1428,13 @@ struct MDB_env { - mdb_mutex_t me_rmutex; - mdb_mutex_t me_wmutex; - #endif -+#ifdef MDB_VL32 -+ MDB_ID3L me_rpages; /**< like #mt_rpages, but global to env */ -+ pthread_mutex_t me_rpmutex; /**< control access to #me_rpages */ -+#define MDB_ERPAGE_SIZE 16384 -+#define MDB_ERPAGE_MAX (MDB_ERPAGE_SIZE-1) -+ unsigned int me_rpcheck; -+#endif - void *me_userctx; /**< User-settable context */ - MDB_assert_func *me_assert_func; /**< Callback for assertion failures */ - }; -@@ -1298,7 +1480,7 @@ enum { - #define MDB_END_SLOT MDB_NOTLS /**< release any reader slot if #MDB_NOTLS */ - static void mdb_txn_end(MDB_txn *txn, unsigned mode); - --static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl); -+static int mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **mp, int *lvl); - static int mdb_page_search_root(MDB_cursor *mc, - MDB_val *key, int modify); - #define MDB_PS_MODIFY 1 -@@ -1327,7 +1509,7 @@ static int mdb_node_add(MDB_cursor *mc, indx_t indx, - static void mdb_node_del(MDB_cursor *mc, int ksize); - static void mdb_node_shrink(MDB_page *mp, indx_t indx); - static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft); --static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data); -+static int mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data); - static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data); - static size_t mdb_branch_size(MDB_env *env, MDB_val *key); - -@@ -1360,13 +1542,18 @@ static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead); - static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long; - /** @endcond */ - --/** Compare two items pointing at size_t's of unknown alignment. */ -+/** Compare two items pointing at '#mdb_size_t's of unknown alignment. */ - #ifdef MISALIGNED_OK - # define mdb_cmp_clong mdb_cmp_long - #else - # define mdb_cmp_clong mdb_cmp_cint - #endif - -+/** True if we need #mdb_cmp_clong() instead of \b cmp for #MDB_INTEGERDUP */ -+#define NEED_CMP_CLONG(cmp, ksize) \ -+ (UINT_MAX < MDB_SIZE_MAX && \ -+ (cmp) == mdb_cmp_int && (ksize) == sizeof(mdb_size_t)) -+ - #ifdef _WIN32 - static SECURITY_DESCRIPTOR mdb_null_sd; - static SECURITY_ATTRIBUTES mdb_all_sa; -@@ -1407,6 +1594,7 @@ static char *const mdb_errstr[] = { - "MDB_BAD_TXN: Transaction must abort, has a child, or is invalid", - "MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size", - "MDB_BAD_DBI: The specified DBI handle was closed/changed unexpectedly", -+ "MDB_PROBLEM: Unexpected problem - txn should abort", - }; - - char * -@@ -1417,8 +1605,9 @@ mdb_strerror(int err) - * This works as long as no function between the call to mdb_strerror - * and the actual use of the message uses more than 4K of stack. - */ -- char pad[4096]; -- char buf[1024], *ptr = buf; -+#define MSGSIZE 1024 -+#define PADSIZE 4096 -+ char buf[MSGSIZE+PADSIZE], *ptr = buf; - #endif - int i; - if (!err) -@@ -1450,7 +1639,7 @@ mdb_strerror(int err) - buf[0] = 0; - FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, -- NULL, err, 0, ptr, sizeof(buf), (va_list *)pad); -+ NULL, err, 0, ptr, MSGSIZE, (va_list *)buf+MSGSIZE); - return ptr; - #else - return strerror(err); -@@ -1550,20 +1739,20 @@ mdb_page_list(MDB_page *mp) - case P_LEAF|P_LEAF2: type = "LEAF2 page"; break; - case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break; - case P_OVERFLOW: -- fprintf(stderr, "Overflow page %"Z"u pages %u%s\n", -+ fprintf(stderr, "Overflow page %"Yu" pages %u%s\n", - pgno, mp->mp_pages, state); - return; - case P_META: -- fprintf(stderr, "Meta-page %"Z"u txnid %"Z"u\n", -+ fprintf(stderr, "Meta-page %"Yu" txnid %"Yu"\n", - pgno, ((MDB_meta *)METADATA(mp))->mm_txnid); - return; - default: -- fprintf(stderr, "Bad page %"Z"u flags 0x%u\n", pgno, mp->mp_flags); -+ fprintf(stderr, "Bad page %"Yu" flags 0x%X\n", pgno, mp->mp_flags); - return; - } - - nkeys = NUMKEYS(mp); -- fprintf(stderr, "%s %"Z"u numkeys %d%s\n", type, pgno, nkeys, state); -+ fprintf(stderr, "%s %"Yu" numkeys %d%s\n", type, pgno, nkeys, state); - - for (i=0; i<nkeys; i++) { - if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */ -@@ -1578,7 +1767,7 @@ mdb_page_list(MDB_page *mp) - key.mv_data = node->mn_data; - nsize = NODESIZE + key.mv_size; - if (IS_BRANCH(mp)) { -- fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node), -+ fprintf(stderr, "key %d: page %"Yu", %s\n", i, NODEPGNO(node), - DKEY(&key)); - total += nsize; - } else { -@@ -1674,7 +1863,7 @@ static void mdb_audit(MDB_txn *txn) - } - } - if (freecount + count + NUM_METAS != txn->mt_next_pgno) { -- fprintf(stderr, "audit: %lu freecount: %lu count: %lu total: %lu next_pgno: %lu\n", -+ fprintf(stderr, "audit: %"Yu" freecount: %"Yu" count: %"Yu" total: %"Yu" next_pgno: %"Yu"\n", - txn->mt_txnid, freecount, count+NUM_METAS, - freecount+count+NUM_METAS, txn->mt_next_pgno); - } -@@ -1691,10 +1880,8 @@ int - mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) - { - MDB_cmp_func *dcmp = txn->mt_dbxs[dbi].md_dcmp; --#if UINT_MAX < SIZE_MAX -- if (dcmp == mdb_cmp_int && a->mv_size == sizeof(size_t)) -+ if (NEED_CMP_CLONG(dcmp, a->mv_size)) - dcmp = mdb_cmp_clong; --#endif - return dcmp(a, b); - } - -@@ -1774,6 +1961,51 @@ mdb_dlist_free(MDB_txn *txn) - dl[0].mid = 0; - } - -+#ifdef MDB_VL32 -+static void -+mdb_page_unref(MDB_txn *txn, MDB_page *mp) -+{ -+ pgno_t pgno; -+ MDB_ID3L tl = txn->mt_rpages; -+ unsigned x, rem; -+ if (mp->mp_flags & (P_SUBP|P_DIRTY)) -+ return; -+ rem = mp->mp_pgno & (MDB_RPAGE_CHUNK-1); -+ pgno = mp->mp_pgno ^ rem; -+ x = mdb_mid3l_search(tl, pgno); -+ if (x != tl[0].mid && tl[x+1].mid == mp->mp_pgno) -+ x++; -+ if (tl[x].mref) -+ tl[x].mref--; -+} -+#define MDB_PAGE_UNREF(txn, mp) mdb_page_unref(txn, mp) -+ -+static void -+mdb_cursor_unref(MDB_cursor *mc) -+{ -+ int i; -+ if (!mc->mc_snum || !mc->mc_pg[0] || IS_SUBP(mc->mc_pg[0])) -+ return; -+ for (i=0; i<mc->mc_snum; i++) -+ mdb_page_unref(mc->mc_txn, mc->mc_pg[i]); -+ if (mc->mc_ovpg) { -+ mdb_page_unref(mc->mc_txn, mc->mc_ovpg); -+ mc->mc_ovpg = 0; -+ } -+ mc->mc_snum = mc->mc_top = 0; -+ mc->mc_pg[0] = NULL; -+ mc->mc_flags &= ~C_INITIALIZED; -+} -+#define MDB_CURSOR_UNREF(mc, force) \ -+ (((force) || ((mc)->mc_flags & C_INITIALIZED)) \ -+ ? mdb_cursor_unref(mc) \ -+ : (void)0) -+ -+#else -+#define MDB_PAGE_UNREF(txn, mp) -+#define MDB_CURSOR_UNREF(mc, force) ((void)0) -+#endif /* MDB_VL32 */ -+ - /** Loosen or free a single page. - * Saves single pages to a list for future reuse - * in this same txn. It has been pulled from the freeDB -@@ -1803,7 +2035,7 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp) - if (mp != dl[x].mptr) { /* bad cursor? */ - mc->mc_flags &= ~(C_INITIALIZED|C_EOF); - txn->mt_flags |= MDB_TXN_ERROR; -- return MDB_CORRUPTED; -+ return MDB_PROBLEM; - } - /* ok, it's ours */ - loose = 1; -@@ -1815,8 +2047,7 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp) - } - } - if (loose) { -- DPRINTF(("loosen db %d page %"Z"u", DDBI(mc), -- mp->mp_pgno)); -+ DPRINTF(("loosen db %d page %"Yu, DDBI(mc), mp->mp_pgno)); - NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs; - txn->mt_loose_pgs = mp; - txn->mt_loose_count++; -@@ -1842,7 +2073,7 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) - { - enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP }; - MDB_txn *txn = mc->mc_txn; -- MDB_cursor *m3; -+ MDB_cursor *m3, *m0 = mc; - MDB_xcursor *mx; - MDB_page *dp, *mp; - MDB_node *leaf; -@@ -1885,7 +2116,7 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) - pgno_t pgno = txn->mt_dbs[i].md_root; - if (pgno == P_INVALID) - continue; -- if ((rc = mdb_page_get(txn, pgno, &dp, &level)) != MDB_SUCCESS) -+ if ((rc = mdb_page_get(m0, pgno, &dp, &level)) != MDB_SUCCESS) - break; - if ((dp->mp_flags & Mask) == pflags && level <= 1) - dp->mp_flags ^= P_KEEP; -@@ -2074,6 +2305,8 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp) - * Do not modify the freedB, just merge freeDB records into me_pghead[] - * and move me_pglast to say which records were consumed. Only this - * function can create me_pghead and move me_pglast/mt_next_pgno. -+ * When #MDB_DEVEL & 2, it is not affected by #mdb_freelist_save(): it -+ * then uses the transaction's original snapshot of the freeDB. - * @param[in] mc cursor A cursor handle identifying the transaction and - * database for which we are allocating. - * @param[in] num the number of pages to allocate. -@@ -2111,8 +2344,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) - np = txn->mt_loose_pgs; - txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np); - txn->mt_loose_count--; -- DPRINTF(("db %d use loose page %"Z"u", DDBI(mc), -- np->mp_pgno)); -+ DPRINTF(("db %d use loose page %"Yu, DDBI(mc), np->mp_pgno)); - *mp = np; - return MDB_SUCCESS; - } -@@ -2149,6 +2381,14 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) - last = env->me_pglast; - oldest = env->me_pgoldest; - mdb_cursor_init(&m2, txn, FREE_DBI, NULL); -+#if (MDB_DEVEL) & 2 /* "& 2" so MDB_DEVEL=1 won't hide bugs breaking freeDB */ -+ /* Use original snapshot. TODO: Should need less care in code -+ * which modifies the database. Maybe we can delete some code? -+ */ -+ m2.mc_flags |= C_ORIG_RDONLY; -+ m2.mc_db = &env->me_metas[(txn->mt_txnid-1) & 1]->mm_dbs[FREE_DBI]; -+ m2.mc_dbflag = (unsigned char *)""; /* probably unnecessary */ -+#endif - if (last) { - op = MDB_SET_RANGE; - key.mv_data = &last; /* will look up last+1 */ -@@ -2189,7 +2429,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) - } - np = m2.mc_pg[m2.mc_top]; - leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]); -- if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS) -+ if ((rc = mdb_node_read(&m2, leaf, &data)) != MDB_SUCCESS) - return rc; - - idl = (MDB_ID *) data.mv_data; -@@ -2206,10 +2446,10 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) - } - env->me_pglast = last; - #if (MDB_DEBUG) > 1 -- DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u", -+ DPRINTF(("IDL read txn %"Yu" root %"Yu" num %u", - last, txn->mt_dbs[FREE_DBI].md_root, i)); - for (j = i; j; j--) -- DPRINTF(("IDL %"Z"u", idl[j])); -+ DPRINTF(("IDL %"Yu, idl[j])); - #endif - /* Merge in descending sorted order */ - mdb_midl_xmerge(mop, idl); -@@ -2224,6 +2464,20 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) - rc = MDB_MAP_FULL; - goto fail; - } -+#if defined(_WIN32) && !defined(MDB_VL32) -+ if (!(env->me_flags & MDB_RDONLY)) { -+ void *p; -+ p = (MDB_page *)(env->me_map + env->me_psize * pgno); -+ p = VirtualAlloc(p, env->me_psize * num, MEM_COMMIT, -+ (env->me_flags & MDB_WRITEMAP) ? PAGE_READWRITE: -+ PAGE_READONLY); -+ if (!p) { -+ DPUTS("VirtualAlloc failed"); -+ rc = ErrCode(); -+ goto fail; -+ } -+ } -+#endif - - search_done: - if (env->me_flags & MDB_WRITEMAP) { -@@ -2365,7 +2619,7 @@ mdb_page_touch(MDB_cursor *mc) - (rc = mdb_page_alloc(mc, 1, &np))) - goto fail; - pgno = np->mp_pgno; -- DPRINTF(("touched db %d page %"Z"u -> %"Z"u", DDBI(mc), -+ DPRINTF(("touched db %d page %"Yu" -> %"Yu, DDBI(mc), - mp->mp_pgno, pgno)); - mdb_cassert(mc, mp->mp_pgno != pgno); - mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); -@@ -2389,7 +2643,7 @@ mdb_page_touch(MDB_cursor *mc) - if (mp != dl[x].mptr) { /* bad cursor? */ - mc->mc_flags &= ~(C_INITIALIZED|C_EOF); - txn->mt_flags |= MDB_TXN_ERROR; -- return MDB_CORRUPTED; -+ return MDB_PROBLEM; - } - return 0; - } -@@ -2439,6 +2693,7 @@ done: - } - } - } -+ MDB_PAGE_UNREF(mc->mc_txn, mp); - return 0; - - fail: -@@ -2447,7 +2702,7 @@ fail: - } - - int --mdb_env_sync(MDB_env *env, int force) -+mdb_env_sync0(MDB_env *env, int force, pgno_t numpgs) - { - int rc = 0; - if (env->me_flags & MDB_RDONLY) -@@ -2456,7 +2711,7 @@ mdb_env_sync(MDB_env *env, int force) - if (env->me_flags & MDB_WRITEMAP) { - int flags = ((env->me_flags & MDB_MAPASYNC) && !force) - ? MS_ASYNC : MS_SYNC; -- if (MDB_MSYNC(env->me_map, env->me_mapsize, flags)) -+ if (MDB_MSYNC(env->me_map, env->me_psize * numpgs, flags)) - rc = ErrCode(); - #ifdef _WIN32 - else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd)) -@@ -2476,6 +2731,13 @@ mdb_env_sync(MDB_env *env, int force) - return rc; - } - -+int -+mdb_env_sync(MDB_env *env, int force) -+{ -+ MDB_meta *m = mdb_env_pick_meta(env); -+ return mdb_env_sync0(env, force, m->mm_last_pg+1); -+} -+ - /** Back up parent txn's cursors, then grab the originals for tracking */ - static int - mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst) -@@ -2720,6 +2982,9 @@ mdb_txn_renew0(MDB_txn *txn) - - /* Moved to here to avoid a data race in read TXNs */ - txn->mt_next_pgno = meta->mm_last_pg+1; -+#ifdef MDB_VL32 -+ txn->mt_last_pgno = txn->mt_next_pgno - 1; -+#endif - - txn->mt_flags = flags; - -@@ -2755,7 +3020,7 @@ mdb_txn_renew(MDB_txn *txn) - - rc = mdb_txn_renew0(txn); - if (rc == MDB_SUCCESS) { -- DPRINTF(("renew txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", -+ DPRINTF(("renew txn %"Yu"%c %p on mdbenv %p, root page %"Yu, - txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', - (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root)); - } -@@ -2798,6 +3063,17 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) - DPRINTF(("calloc: %s", strerror(errno))); - return ENOMEM; - } -+#ifdef MDB_VL32 -+ if (!parent) { -+ txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3)); -+ if (!txn->mt_rpages) { -+ free(txn); -+ return ENOMEM; -+ } -+ txn->mt_rpages[0].mid = 0; -+ txn->mt_rpcheck = MDB_TRPAGE_SIZE/2; -+ } -+#endif - txn->mt_dbxs = env->me_dbxs; /* static */ - txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); - txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs; -@@ -2825,6 +3101,9 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) - parent->mt_child = txn; - txn->mt_parent = parent; - txn->mt_numdbs = parent->mt_numdbs; -+#ifdef MDB_VL32 -+ txn->mt_rpages = parent->mt_rpages; -+#endif - memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); - /* Copy parent's mt_dbflags, but clear DB_NEW */ - for (i=0; i<txn->mt_numdbs; i++) -@@ -2850,12 +3129,16 @@ renew: - rc = mdb_txn_renew0(txn); - } - if (rc) { -- if (txn != env->me_txn0) -+ if (txn != env->me_txn0) { -+#ifdef MDB_VL32 -+ free(txn->mt_rpages); -+#endif - free(txn); -+ } - } else { - txn->mt_flags |= flags; /* could not change txn=me_txn0 earlier */ - *ret = txn; -- DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", -+ DPRINTF(("begin txn %"Yu"%c %p on mdbenv %p, root page %"Yu, - txn->mt_txnid, (flags & MDB_RDONLY) ? 'r' : 'w', - (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root)); - } -@@ -2870,7 +3153,7 @@ mdb_txn_env(MDB_txn *txn) - return txn->mt_env; - } - --size_t -+mdb_size_t - mdb_txn_id(MDB_txn *txn) - { - if(!txn) return 0; -@@ -2922,7 +3205,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) - /* Export or close DBI handles opened in this txn */ - mdb_dbis_update(txn, mode & MDB_END_UPDATE); - -- DPRINTF(("%s txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", -+ DPRINTF(("%s txn %"Yu"%c %p on mdbenv %p, root page %"Yu, - names[mode & MDB_END_OPMASK], - txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', - (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root)); -@@ -2976,7 +3259,31 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) - - mdb_midl_free(pghead); - } -- -+#ifdef MDB_VL32 -+ if (!txn->mt_parent) { -+ MDB_ID3L el = env->me_rpages, tl = txn->mt_rpages; -+ unsigned i, x, n = tl[0].mid; -+ pthread_mutex_lock(&env->me_rpmutex); -+ for (i = 1; i <= n; i++) { -+ if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) { -+ /* tmp overflow pages that we didn't share in env */ -+ munmap(tl[i].mptr, tl[i].mcnt * env->me_psize); -+ } else { -+ x = mdb_mid3l_search(el, tl[i].mid); -+ if (tl[i].mptr == el[x].mptr) { -+ el[x].mref--; -+ } else { -+ /* another tmp overflow page */ -+ munmap(tl[i].mptr, tl[i].mcnt * env->me_psize); -+ } -+ } -+ } -+ pthread_mutex_unlock(&env->me_rpmutex); -+ tl[0].mid = 0; -+ if (mode & MDB_END_FREE) -+ free(tl); -+ } -+#endif - if (mode & MDB_END_FREE) - free(txn); - } -@@ -3008,6 +3315,9 @@ mdb_txn_abort(MDB_txn *txn) - - /** Save the freelist as of this transaction to the freeDB. - * This changes the freelist. Keep trying until it stabilizes. -+ * -+ * When (MDB_DEVEL) & 2, the changes do not affect #mdb_page_alloc(), -+ * it then uses the transaction's original snapshot of the freeDB. - */ - static int - mdb_freelist_save(MDB_txn *txn) -@@ -3096,10 +3406,10 @@ mdb_freelist_save(MDB_txn *txn) - #if (MDB_DEBUG) > 1 - { - unsigned int i = free_pgs[0]; -- DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u", -+ DPRINTF(("IDL write txn %"Yu" root %"Yu" num %u", - txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i)); - for (; i; i--) -- DPRINTF(("IDL %"Z"u", free_pgs[i])); -+ DPRINTF(("IDL %"Yu, free_pgs[i])); - } - #endif - continue; -@@ -3210,15 +3520,16 @@ mdb_page_flush(MDB_txn *txn, int keep) - MDB_ID2L dl = txn->mt_u.dirty_list; - unsigned psize = env->me_psize, j; - int i, pagecount = dl[0].mid, rc; -- size_t size = 0, pos = 0; -+ size_t size = 0; -+ offset_t pos = 0; - pgno_t pgno = 0; - MDB_page *dp = NULL; - #ifdef _WIN32 - OVERLAPPED ov; - #else - struct iovec iov[MDB_COMMIT_PAGES]; -- ssize_t wpos = 0, wsize = 0, wres; -- size_t next_pos = 1; /* impossible pos, so pos != next_pos */ -+ ssize_t wsize = 0, wres; -+ offset_t wpos = 0, next_pos = 1; /* impossible pos, so pos != next_pos */ - int n = 0; - #endif - -@@ -3266,7 +3577,7 @@ mdb_page_flush(MDB_txn *txn, int keep) - * the write offset, to at least save the overhead of a Seek - * system call. - */ -- DPRINTF(("committing page %"Z"u", pgno)); -+ DPRINTF(("committing page %"Yu, pgno)); - memset(&ov, 0, sizeof(ov)); - ov.Offset = pos & 0xffffffff; - ov.OffsetHigh = pos >> 16 >> 16; -@@ -3317,7 +3628,7 @@ retry_seek: - wpos = pos; - wsize = 0; - } -- DPRINTF(("committing page %"Z"u", pgno)); -+ DPRINTF(("committing page %"Yu, pgno)); - next_pos = pos + size; - iov[n].iov_len = size; - iov[n].iov_base = (char *)dp; -@@ -3325,6 +3636,10 @@ retry_seek: - n++; - #endif /* _WIN32 */ - } -+#ifdef MDB_VL32 -+ if (pgno > txn->mt_last_pgno) -+ txn->mt_last_pgno = pgno; -+#endif - - /* MIPS has cache coherency issues, this is a no-op everywhere else - * Note: for any size >= on-chip cache size, entire on-chip cache is -@@ -3526,7 +3841,7 @@ mdb_txn_commit(MDB_txn *txn) - !(txn->mt_flags & (MDB_TXN_DIRTY|MDB_TXN_SPILLS))) - goto done; - -- DPRINTF(("committing txn %"Z"u %p on mdbenv %p, root page %"Z"u", -+ DPRINTF(("committing txn %"Yu" %p on mdbenv %p, root page %"Yu, - txn->mt_txnid, (void*)txn, (void*)env, txn->mt_dbs[MAIN_DBI].md_root)); - - /* Update DB root pointers */ -@@ -3564,9 +3879,12 @@ mdb_txn_commit(MDB_txn *txn) - mdb_audit(txn); - #endif - -- if ((rc = mdb_page_flush(txn, 0)) || -- (rc = mdb_env_sync(env, 0)) || -- (rc = mdb_env_write_meta(txn))) -+ if ((rc = mdb_page_flush(txn, 0))) -+ goto fail; -+ if (!F_ISSET(txn->mt_flags, MDB_TXN_NOSYNC) && -+ (rc = mdb_env_sync0(env, 0, txn->mt_next_pgno))) -+ goto fail; -+ if ((rc = mdb_env_write_meta(txn))) - goto fail; - end_mode = MDB_END_COMMITTED|MDB_END_UPDATE; - -@@ -3621,7 +3939,7 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta) - p = (MDB_page *)&pbuf; - - if (!F_ISSET(p->mp_flags, P_META)) { -- DPRINTF(("page %"Z"u not a meta page", p->mp_pgno)); -+ DPRINTF(("page %"Yu" not a meta page", p->mp_pgno)); - return MDB_INVALID; - } - -@@ -3691,7 +4009,6 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta) - p = calloc(NUM_METAS, psize); - if (!p) - return ENOMEM; -- - p->mp_pgno = 0; - p->mp_flags = P_META; - *(MDB_meta *)METADATA(p) = *meta; -@@ -3722,8 +4039,8 @@ mdb_env_write_meta(MDB_txn *txn) - MDB_env *env; - MDB_meta meta, metab, *mp; - unsigned flags; -- size_t mapsize; -- off_t off; -+ mdb_size_t mapsize; -+ offset_t off; - int rc, len, toggle; - char *ptr; - HANDLE mfd; -@@ -3734,11 +4051,11 @@ mdb_env_write_meta(MDB_txn *txn) - #endif - - toggle = txn->mt_txnid & 1; -- DPRINTF(("writing meta page %d for root page %"Z"u", -+ DPRINTF(("writing meta page %d for root page %"Yu, - toggle, txn->mt_dbs[MAIN_DBI].md_root)); - - env = txn->mt_env; -- flags = env->me_flags; -+ flags = txn->mt_flags | env->me_flags; - mp = env->me_metas[toggle]; - mapsize = env->me_metas[toggle ^ 1]->mm_mapsize; - /* Persist any increases of mapsize config */ -@@ -3867,6 +4184,9 @@ mdb_env_create(MDB_env **env) - #ifdef MDB_USE_POSIX_SEM - e->me_rmutex = SEM_FAILED; - e->me_wmutex = SEM_FAILED; -+#elif defined MDB_USE_SYSV_SEM -+ e->me_rmutex->semid = -1; -+ e->me_wmutex->semid = -1; - #endif - e->me_pid = getpid(); - GET_PAGESIZE(e->me_os_psize); -@@ -3875,6 +4195,19 @@ mdb_env_create(MDB_env **env) - return MDB_SUCCESS; - } - -+#ifdef _WIN32 -+/** @brief Map a result from an NTAPI call to WIN32. */ -+static DWORD -+mdb_nt2win32(NTSTATUS st) -+{ -+ OVERLAPPED o = {0}; -+ DWORD br; -+ o.Internal = st; -+ GetOverlappedResult(NULL, &o, &br, FALSE); -+ return GetLastError(); -+} -+#endif -+ - static int ESECT - mdb_env_map(MDB_env *env, void *addr) - { -@@ -3882,42 +4215,51 @@ mdb_env_map(MDB_env *env, void *addr) - unsigned int flags = env->me_flags; - #ifdef _WIN32 - int rc; -+ int access = SECTION_MAP_READ; - HANDLE mh; -- LONG sizelo, sizehi; -- size_t msize; -+ void *map; -+ SIZE_T msize; -+ ULONG pageprot = PAGE_READONLY, secprot, alloctype; - -+ if (flags & MDB_WRITEMAP) { -+ access |= SECTION_MAP_WRITE; -+ pageprot = PAGE_READWRITE; -+ } - if (flags & MDB_RDONLY) { -- /* Don't set explicit map size, use whatever exists */ -+ secprot = PAGE_READONLY; - msize = 0; -- sizelo = 0; -- sizehi = 0; -+ alloctype = 0; - } else { -+ secprot = PAGE_READWRITE; - msize = env->me_mapsize; -- sizelo = msize & 0xffffffff; -- sizehi = msize >> 16 >> 16; /* only needed on Win64 */ -- -- /* Windows won't create mappings for zero length files. -- * and won't map more than the file size. -- * Just set the maxsize right now. -- */ -- if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo -- || !SetEndOfFile(env->me_fd) -- || SetFilePointer(env->me_fd, 0, NULL, 0) != 0) -- return ErrCode(); -+ alloctype = MEM_RESERVE; - } - -- mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ? -- PAGE_READWRITE : PAGE_READONLY, -- sizehi, sizelo, NULL); -- if (!mh) -- return ErrCode(); -- env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ? -- FILE_MAP_WRITE : FILE_MAP_READ, -- 0, 0, msize, addr); -- rc = env->me_map ? 0 : ErrCode(); -- CloseHandle(mh); -+ rc = NtCreateSection(&mh, access, NULL, NULL, secprot, SEC_RESERVE, env->me_fd); - if (rc) -- return rc; -+ return mdb_nt2win32(rc); -+ map = addr; -+#ifdef MDB_VL32 -+ msize = NUM_METAS * env->me_psize; -+#endif -+ rc = NtMapViewOfSection(mh, GetCurrentProcess(), &map, 0, 0, NULL, &msize, ViewUnmap, alloctype, pageprot); -+#ifdef MDB_VL32 -+ env->me_fmh = mh; -+#else -+ NtClose(mh); -+#endif -+ if (rc) -+ return mdb_nt2win32(rc); -+ env->me_map = map; -+#else -+#ifdef MDB_VL32 -+ (void) flags; -+ env->me_map = mmap(addr, NUM_METAS * env->me_psize, PROT_READ, MAP_SHARED, -+ env->me_fd, 0); -+ if (env->me_map == MAP_FAILED) { -+ env->me_map = NULL; -+ return ErrCode(); -+ } - #else - int prot = PROT_READ; - if (flags & MDB_WRITEMAP) { -@@ -3951,6 +4293,7 @@ mdb_env_map(MDB_env *env, void *addr) - */ - if (addr && env->me_map != addr) - return EBUSY; /* TODO: Make a new MDB_* error code? */ -+#endif - - p = (MDB_page *)env->me_map; - env->me_metas[0] = METADATA(p); -@@ -3960,15 +4303,17 @@ mdb_env_map(MDB_env *env, void *addr) - } - - int ESECT --mdb_env_set_mapsize(MDB_env *env, size_t size) -+mdb_env_set_mapsize(MDB_env *env, mdb_size_t size) - { - /* If env is already open, caller is responsible for making - * sure there are no active txns. - */ - if (env->me_map) { -- int rc; - MDB_meta *meta; -+#ifndef MDB_VL32 - void *old; -+ int rc; -+#endif - if (env->me_txn) - return EINVAL; - meta = mdb_env_pick_meta(env); -@@ -3976,16 +4321,21 @@ mdb_env_set_mapsize(MDB_env *env, size_t size) - size = meta->mm_mapsize; - { - /* Silently round up to minimum if the size is too small */ -- size_t minsize = (meta->mm_last_pg + 1) * env->me_psize; -+ mdb_size_t minsize = (meta->mm_last_pg + 1) * env->me_psize; - if (size < minsize) - size = minsize; - } -+#ifndef MDB_VL32 -+ /* For MDB_VL32 this bit is a noop since we dynamically remap -+ * chunks of the DB anyway. -+ */ - munmap(env->me_map, env->me_mapsize); - env->me_mapsize = size; - old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL; - rc = mdb_env_map(env, old); - if (rc) - return rc; -+#endif /* !MDB_VL32 */ - } - env->me_mapsize = size; - if (env->me_psize) -@@ -4021,7 +4371,7 @@ mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) - } - - static int ESECT --mdb_fsize(HANDLE fd, size_t *size) -+mdb_fsize(HANDLE fd, mdb_size_t *size) - { - #ifdef _WIN32 - LARGE_INTEGER fsize; -@@ -4135,7 +4485,7 @@ mdb_env_open2(MDB_env *env) - /* Make sure mapsize >= committed data size. Even when using - * mm_mapsize, which could be broken in old files (ITS#7789). - */ -- size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize; -+ mdb_size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize; - if (env->me_mapsize < minsize) - env->me_mapsize = minsize; - } -@@ -4154,6 +4504,18 @@ mdb_env_open2(MDB_env *env) - return rc; - newenv = 0; - } -+#ifdef _WIN32 -+ /* For FIXEDMAP, make sure the file is non-empty before we attempt to map it */ -+ if (newenv) { -+ char dummy = 0; -+ DWORD len; -+ rc = WriteFile(env->me_fd, &dummy, 1, &len, NULL); -+ if (!rc) { -+ rc = ErrCode(); -+ return rc; -+ } -+ } -+#endif - - rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL); - if (rc) -@@ -4183,13 +4545,13 @@ mdb_env_open2(MDB_env *env) - - DPRINTF(("opened database version %u, pagesize %u", - meta->mm_version, env->me_psize)); -- DPRINTF(("using meta page %d", (int) (meta->mm_txnid & 1))); -- DPRINTF(("depth: %u", db->md_depth)); -- DPRINTF(("entries: %"Z"u", db->md_entries)); -- DPRINTF(("branch pages: %"Z"u", db->md_branch_pages)); -- DPRINTF(("leaf pages: %"Z"u", db->md_leaf_pages)); -- DPRINTF(("overflow pages: %"Z"u", db->md_overflow_pages)); -- DPRINTF(("root: %"Z"u", db->md_root)); -+ DPRINTF(("using meta page %d", (int) (meta->mm_txnid & 1))); -+ DPRINTF(("depth: %u", db->md_depth)); -+ DPRINTF(("entries: %"Yu, db->md_entries)); -+ DPRINTF(("branch pages: %"Yu, db->md_branch_pages)); -+ DPRINTF(("leaf pages: %"Yu, db->md_leaf_pages)); -+ DPRINTF(("overflow pages: %"Yu, db->md_overflow_pages)); -+ DPRINTF(("root: %"Yu, db->md_root)); - } - #endif - -@@ -4465,8 +4827,12 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) - # define MDB_CLOEXEC 0 - #endif - #endif -+#ifdef MDB_USE_SYSV_SEM -+ int semid; -+ union semun semu; -+#endif - int rc; -- off_t size, rsize; -+ offset_t size, rsize; - - #ifdef _WIN32 - wchar_t *wlpath; -@@ -4616,19 +4982,42 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) - env->me_wmutex = sem_open(env->me_txns->mti_wmname, - O_CREAT|O_EXCL, mode, 1); - if (env->me_wmutex == SEM_FAILED) goto fail_errno; -+#elif defined(MDB_USE_SYSV_SEM) -+ unsigned short vals[2] = {1, 1}; -+ key_t key = ftok(lpath, 'M'); -+ if (key == -1) -+ goto fail_errno; -+ semid = semget(key, 2, (mode & 0777) | IPC_CREAT); -+ if (semid < 0) -+ goto fail_errno; -+ semu.array = vals; -+ if (semctl(semid, 0, SETALL, semu) < 0) -+ goto fail_errno; -+ env->me_txns->mti_semid = semid; -+ env->me_txns->mti_rlocked = 0; -+ env->me_txns->mti_wlocked = 0; - #else /* MDB_USE_POSIX_MUTEX: */ - pthread_mutexattr_t mattr; - -- if ((rc = pthread_mutexattr_init(&mattr)) -- || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) -+ /* Solaris needs this before initing a robust mutex. Otherwise -+ * it may skip the init and return EBUSY "seems someone already -+ * inited" or EINVAL "it was inited differently". -+ */ -+ memset(env->me_txns->mti_rmutex, 0, sizeof(*env->me_txns->mti_rmutex)); -+ memset(env->me_txns->mti_wmutex, 0, sizeof(*env->me_txns->mti_wmutex)); -+ -+ if ((rc = pthread_mutexattr_init(&mattr)) != 0) -+ goto fail; -+ rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED); - #ifdef MDB_ROBUST_SUPPORTED -- || (rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST)) -+ if (!rc) rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST); - #endif -- || (rc = pthread_mutex_init(env->me_txns->mti_rmutex, &mattr)) -- || (rc = pthread_mutex_init(env->me_txns->mti_wmutex, &mattr))) -- goto fail; -+ if (!rc) rc = pthread_mutex_init(env->me_txns->mti_rmutex, &mattr); -+ if (!rc) rc = pthread_mutex_init(env->me_txns->mti_wmutex, &mattr); - pthread_mutexattr_destroy(&mattr); --#endif /* _WIN32 || MDB_USE_POSIX_SEM */ -+ if (rc) -+ goto fail; -+#endif /* _WIN32 || ... */ - - env->me_txns->mti_magic = MDB_MAGIC; - env->me_txns->mti_format = MDB_LOCK_FORMAT; -@@ -4636,6 +5025,9 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) - env->me_txns->mti_numreaders = 0; - - } else { -+#ifdef MDB_USE_SYSV_SEM -+ struct semid_ds buf; -+#endif - if (env->me_txns->mti_magic != MDB_MAGIC) { - DPUTS("lock region has invalid magic"); - rc = MDB_INVALID; -@@ -4661,8 +5053,33 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) - if (env->me_rmutex == SEM_FAILED) goto fail_errno; - env->me_wmutex = sem_open(env->me_txns->mti_wmname, 0); - if (env->me_wmutex == SEM_FAILED) goto fail_errno; -+#elif defined(MDB_USE_SYSV_SEM) -+ semid = env->me_txns->mti_semid; -+ semu.buf = &buf; -+ /* check for read access */ -+ if (semctl(semid, 0, IPC_STAT, semu) < 0) -+ goto fail_errno; -+ /* check for write access */ -+ if (semctl(semid, 0, IPC_SET, semu) < 0) -+ goto fail_errno; - #endif - } -+#ifdef MDB_USE_SYSV_SEM -+ env->me_rmutex->semid = semid; -+ env->me_wmutex->semid = semid; -+ env->me_rmutex->semnum = 0; -+ env->me_wmutex->semnum = 1; -+ env->me_rmutex->locked = &env->me_txns->mti_rlocked; -+ env->me_wmutex->locked = &env->me_txns->mti_wlocked; -+#endif -+#ifdef MDB_VL32 -+#ifdef _WIN32 -+ env->me_rpmutex = CreateMutex(NULL, FALSE, NULL); -+#else -+ pthread_mutex_init(&env->me_rpmutex, NULL); -+#endif -+#endif -+ - return MDB_SUCCESS; - - fail_errno: -@@ -4701,6 +5118,17 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode - if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) - return EINVAL; - -+#ifdef MDB_VL32 -+ if (flags & MDB_WRITEMAP) { -+ /* silently ignore WRITEMAP in 32 bit mode */ -+ flags ^= MDB_WRITEMAP; -+ } -+ if (flags & MDB_FIXEDMAP) { -+ /* cannot support FIXEDMAP */ -+ return EINVAL; -+ } -+#endif -+ - len = strlen(path); - if (flags & MDB_NOSUBDIR) { - rc = len + sizeof(LOCKSUFF) + len + 1; -@@ -4730,6 +5158,17 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode - (env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2))))) - rc = ENOMEM; - } -+#ifdef MDB_VL32 -+ if (!rc) { -+ env->me_rpages = malloc(MDB_ERPAGE_SIZE * sizeof(MDB_ID3)); -+ if (!env->me_rpages) { -+ rc = ENOMEM; -+ goto leave; -+ } -+ env->me_rpages[0].mid = 0; -+ env->me_rpcheck = MDB_ERPAGE_SIZE/2; -+ } -+#endif - env->me_flags = flags |= MDB_ENV_ACTIVE; - if (rc) - goto leave; -@@ -4828,6 +5267,16 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode - txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); - txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); - txn->mt_env = env; -+#ifdef MDB_VL32 -+ txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3)); -+ if (!txn->mt_rpages) { -+ free(txn); -+ rc = ENOMEM; -+ goto leave; -+ } -+ txn->mt_rpages[0].mid = 0; -+ txn->mt_rpcheck = MDB_TRPAGE_SIZE/2; -+#endif - txn->mt_dbxs = env->me_dbxs; - txn->mt_flags = MDB_TXN_FINISHED; - env->me_txn0 = txn; -@@ -4866,6 +5315,15 @@ mdb_env_close0(MDB_env *env, int excl) - free(env->me_dbflags); - free(env->me_path); - free(env->me_dirty_list); -+#ifdef MDB_VL32 -+ if (env->me_txn0 && env->me_txn0->mt_rpages) -+ free(env->me_txn0->mt_rpages); -+ { unsigned int x; -+ for (x=1; x<=env->me_rpages[0].mid; x++) -+ munmap(env->me_rpages[x].mptr, env->me_rpages[x].mcnt * env->me_psize); -+ } -+ free(env->me_rpages); -+#endif - free(env->me_txn0); - mdb_midl_free(env->me_free_pgs); - -@@ -4883,7 +5341,11 @@ mdb_env_close0(MDB_env *env, int excl) - } - - if (env->me_map) { -+#ifdef MDB_VL32 -+ munmap(env->me_map, NUM_METAS*env->me_psize); -+#else - munmap(env->me_map, env->me_mapsize); -+#endif - } - if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) - (void) close(env->me_mfd); -@@ -4924,6 +5386,16 @@ mdb_env_close0(MDB_env *env, int excl) - sem_unlink(env->me_txns->mti_wmname); - } - } -+#elif defined(MDB_USE_SYSV_SEM) -+ if (env->me_rmutex->semid != -1) { -+ /* If we have the filelock: If we are the -+ * only remaining user, clean up semaphores. -+ */ -+ if (excl == 0) -+ mdb_env_excl_lock(env, &excl); -+ if (excl > 0) -+ semctl(env->me_rmutex->semid, 0, IPC_RMID); -+ } - #endif - munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); - } -@@ -4938,6 +5410,14 @@ mdb_env_close0(MDB_env *env, int excl) - #endif - (void) close(env->me_lfd); - } -+#ifdef MDB_VL32 -+#ifdef _WIN32 -+ if (env->me_fmh) CloseHandle(env->me_fmh); -+ if (env->me_rpmutex) CloseHandle(env->me_rpmutex); -+#else -+ pthread_mutex_destroy(&env->me_rpmutex); -+#endif -+#endif - - env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); - } -@@ -4961,18 +5441,18 @@ mdb_env_close(MDB_env *env) - free(env); - } - --/** Compare two items pointing at aligned size_t's */ -+/** Compare two items pointing at aligned #mdb_size_t's */ - static int - mdb_cmp_long(const MDB_val *a, const MDB_val *b) - { -- return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 : -- *(size_t *)a->mv_data > *(size_t *)b->mv_data; -+ return (*(mdb_size_t *)a->mv_data < *(mdb_size_t *)b->mv_data) ? -1 : -+ *(mdb_size_t *)a->mv_data > *(mdb_size_t *)b->mv_data; - } - - /** Compare two items pointing at aligned unsigned int's. - * - * This is also set as #MDB_INTEGERDUP|#MDB_DUPFIXED's #MDB_dbx.%md_dcmp, -- * but #mdb_cmp_clong() is called instead if the data type is size_t. -+ * but #mdb_cmp_clong() is called instead if the data type is #mdb_size_t. - */ - static int - mdb_cmp_int(const MDB_val *a, const MDB_val *b) -@@ -5077,7 +5557,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp) - - nkeys = NUMKEYS(mp); - -- DPRINTF(("searching %u keys in %s %spage %"Z"u", -+ DPRINTF(("searching %u keys in %s %spage %"Yu, - nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "", - mdb_dbg_pgno(mp))); - -@@ -5089,7 +5569,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp) - * alignment is guaranteed. Use faster mdb_cmp_int. - */ - if (cmp == mdb_cmp_cint && IS_BRANCH(mp)) { -- if (NODEPTR(mp, 1)->mn_ksize == sizeof(size_t)) -+ if (NODEPTR(mp, 1)->mn_ksize == sizeof(mdb_size_t)) - cmp = mdb_cmp_long; - else - cmp = mdb_cmp_int; -@@ -5125,7 +5605,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp) - DPRINTF(("found leaf index %u [%s], rc = %i", - i, DKEY(&nodekey), rc)); - else -- DPRINTF(("found branch index %u [%s -> %"Z"u], rc = %i", -+ DPRINTF(("found branch index %u [%s -> %"Yu"], rc = %i", - i, DKEY(&nodekey), NODEPGNO(node), rc)); - #endif - if (rc == 0) -@@ -5173,7 +5653,7 @@ static void - mdb_cursor_pop(MDB_cursor *mc) - { - if (mc->mc_snum) { -- DPRINTF(("popping page %"Z"u off db %d cursor %p", -+ DPRINTF(("popping page %"Yu" off db %d cursor %p", - mc->mc_pg[mc->mc_top]->mp_pgno, DDBI(mc), (void *) mc)); - - mc->mc_snum--; -@@ -5189,7 +5669,7 @@ mdb_cursor_pop(MDB_cursor *mc) - static int - mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) - { -- DPRINTF(("pushing page %"Z"u on db %d cursor %p", mp->mp_pgno, -+ DPRINTF(("pushing page %"Yu" on db %d cursor %p", mp->mp_pgno, - DDBI(mc), (void *) mc)); - - if (mc->mc_snum >= CURSOR_STACK) { -@@ -5204,21 +5684,309 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) - return MDB_SUCCESS; - } - --/** Find the address of the page corresponding to a given page number. -+#ifdef MDB_VL32 -+/** Map a read-only page. -+ * There are two levels of tracking in use, a per-txn list and a per-env list. -+ * ref'ing and unref'ing the per-txn list is faster since it requires no -+ * locking. Pages are cached in the per-env list for global reuse, and a lock -+ * is required. Pages are not immediately unmapped when their refcnt goes to -+ * zero; they hang around in case they will be reused again soon. -+ * -+ * When the per-txn list gets full, all pages with refcnt=0 are purged from the -+ * list and their refcnts in the per-env list are decremented. -+ * -+ * When the per-env list gets full, all pages with refcnt=0 are purged from the -+ * list and their pages are unmapped. -+ * -+ * @note "full" means the list has reached its respective rpcheck threshold. -+ * This threshold slowly raises if no pages could be purged on a given check, -+ * and returns to its original value when enough pages were purged. -+ * -+ * If purging doesn't free any slots, filling the per-txn list will return -+ * MDB_TXN_FULL, and filling the per-env list returns MDB_MAP_FULL. -+ * -+ * Reference tracking in a txn is imperfect, pages can linger with non-zero -+ * refcnt even without active references. It was deemed to be too invasive -+ * to add unrefs in every required location. However, all pages are unref'd -+ * at the end of the transaction. This guarantees that no stale references -+ * linger in the per-env list. -+ * -+ * Usually we map chunks of 16 pages at a time, but if an overflow page begins -+ * at the tail of the chunk we extend the chunk to include the entire overflow -+ * page. Unfortunately, pages can be turned into overflow pages after their -+ * chunk was already mapped. In that case we must remap the chunk if the -+ * overflow page is referenced. If the chunk's refcnt is 0 we can just remap -+ * it, otherwise we temporarily map a new chunk just for the overflow page. -+ * -+ * @note this chunk handling means we cannot guarantee that a data item -+ * returned from the DB will stay alive for the duration of the transaction: -+ * We unref pages as soon as a cursor moves away from the page -+ * A subsequent op may cause a purge, which may unmap any unref'd chunks -+ * The caller must copy the data if it must be used later in the same txn. -+ * -+ * Also - our reference counting revolves around cursors, but overflow pages -+ * aren't pointed to by a cursor's page stack. We have to remember them -+ * explicitly, in the added mc_ovpg field. A single cursor can only hold a -+ * reference to one overflow page at a time. -+ * - * @param[in] txn the transaction for this access. - * @param[in] pgno the page number for the page to retrieve. - * @param[out] ret address of a pointer where the page's address will be stored. -- * @param[out] lvl dirty_list inheritance level of found page. 1=current txn, 0=mapped page. - * @return 0 on success, non-zero on failure. - */ - static int --mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) -+mdb_rpage_get(MDB_txn *txn, pgno_t pg0, MDB_page **ret) - { - MDB_env *env = txn->mt_env; -+ MDB_page *p; -+ MDB_ID3L tl = txn->mt_rpages; -+ MDB_ID3L el = env->me_rpages; -+ MDB_ID3 id3; -+ unsigned x, rem; -+ pgno_t pgno; -+ int rc, retries = 1; -+#ifdef _WIN32 -+ LARGE_INTEGER off; -+ SIZE_T len; -+#define SET_OFF(off,val) off.QuadPart = val -+#define MAP(rc,env,addr,len,off) \ -+ addr = NULL; \ -+ rc = NtMapViewOfSection(env->me_fmh, GetCurrentProcess(), &addr, 0, \ -+ len, &off, &len, ViewUnmap, (env->me_flags & MDB_RDONLY) ? 0 : MEM_RESERVE, PAGE_READONLY); \ -+ if (rc) rc = mdb_nt2win32(rc) -+#else -+ offset_t off; -+ size_t len; -+#define SET_OFF(off,val) off = val -+#define MAP(rc,env,addr,len,off) \ -+ addr = mmap(NULL, len, PROT_READ, MAP_SHARED, env->me_fd, off); \ -+ rc = (addr == MAP_FAILED) ? errno : 0 -+#endif -+ -+ /* remember the offset of the actual page number, so we can -+ * return the correct pointer at the end. -+ */ -+ rem = pg0 & (MDB_RPAGE_CHUNK-1); -+ pgno = pg0 ^ rem; -+ -+ id3.mid = 0; -+ x = mdb_mid3l_search(tl, pgno); -+ if (x <= tl[0].mid && tl[x].mid == pgno) { -+ if (x != tl[0].mid && tl[x+1].mid == pg0) -+ x++; -+ /* check for overflow size */ -+ p = (MDB_page *)((char *)tl[x].mptr + rem * env->me_psize); -+ if (IS_OVERFLOW(p) && p->mp_pages + rem > tl[x].mcnt) { -+ id3.mcnt = p->mp_pages + rem; -+ len = id3.mcnt * env->me_psize; -+ SET_OFF(off, pgno * env->me_psize); -+ MAP(rc, env, id3.mptr, len, off); -+ if (rc) -+ return rc; -+ /* check for local-only page */ -+ if (rem) { -+ mdb_tassert(txn, tl[x].mid != pg0); -+ /* hope there's room to insert this locally. -+ * setting mid here tells later code to just insert -+ * this id3 instead of searching for a match. -+ */ -+ id3.mid = pg0; -+ goto notlocal; -+ } else { -+ /* ignore the mapping we got from env, use new one */ -+ tl[x].mptr = id3.mptr; -+ tl[x].mcnt = id3.mcnt; -+ /* if no active ref, see if we can replace in env */ -+ if (!tl[x].mref) { -+ unsigned i; -+ pthread_mutex_lock(&env->me_rpmutex); -+ i = mdb_mid3l_search(el, tl[x].mid); -+ if (el[i].mref == 1) { -+ /* just us, replace it */ -+ munmap(el[i].mptr, el[i].mcnt * env->me_psize); -+ el[i].mptr = tl[x].mptr; -+ el[i].mcnt = tl[x].mcnt; -+ } else { -+ /* there are others, remove ourself */ -+ el[i].mref--; -+ } -+ pthread_mutex_unlock(&env->me_rpmutex); -+ } -+ } -+ } -+ id3.mptr = tl[x].mptr; -+ id3.mcnt = tl[x].mcnt; -+ tl[x].mref++; -+ goto ok; -+ } -+ -+notlocal: -+ if (tl[0].mid >= MDB_TRPAGE_MAX - txn->mt_rpcheck) { -+ unsigned i, y; -+ /* purge unref'd pages from our list and unref in env */ -+ pthread_mutex_lock(&env->me_rpmutex); -+retry: -+ y = 0; -+ for (i=1; i<=tl[0].mid; i++) { -+ if (!tl[i].mref) { -+ if (!y) y = i; -+ /* tmp overflow pages don't go to env */ -+ if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) { -+ munmap(tl[i].mptr, tl[i].mcnt * env->me_psize); -+ continue; -+ } -+ x = mdb_mid3l_search(el, tl[i].mid); -+ el[x].mref--; -+ } -+ } -+ pthread_mutex_unlock(&env->me_rpmutex); -+ if (!y) { -+ /* we didn't find any unref'd chunks. -+ * if we're out of room, fail. -+ */ -+ if (tl[0].mid >= MDB_TRPAGE_MAX) -+ return MDB_TXN_FULL; -+ /* otherwise, raise threshold for next time around -+ * and let this go. -+ */ -+ txn->mt_rpcheck /= 2; -+ } else { -+ /* we found some unused; consolidate the list */ -+ for (i=y+1; i<= tl[0].mid; i++) -+ if (tl[i].mref) -+ tl[y++] = tl[i]; -+ tl[0].mid = y-1; -+ /* decrease the check threshold toward its original value */ -+ if (!txn->mt_rpcheck) -+ txn->mt_rpcheck = 1; -+ while (txn->mt_rpcheck < tl[0].mid && txn->mt_rpcheck < MDB_TRPAGE_SIZE/2) -+ txn->mt_rpcheck *= 2; -+ } -+ } -+ if (tl[0].mid < MDB_TRPAGE_SIZE) { -+ id3.mref = 1; -+ if (id3.mid) -+ goto found; -+ /* don't map past last written page in read-only envs */ -+ if ((env->me_flags & MDB_RDONLY) && pgno + MDB_RPAGE_CHUNK-1 > txn->mt_last_pgno) -+ id3.mcnt = txn->mt_last_pgno + 1 - pgno; -+ else -+ id3.mcnt = MDB_RPAGE_CHUNK; -+ len = id3.mcnt * env->me_psize; -+ id3.mid = pgno; -+ -+ /* search for page in env */ -+ pthread_mutex_lock(&env->me_rpmutex); -+ x = mdb_mid3l_search(el, pgno); -+ if (x <= el[0].mid && el[x].mid == pgno) { -+ id3.mptr = el[x].mptr; -+ id3.mcnt = el[x].mcnt; -+ /* check for overflow size */ -+ p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize); -+ if (IS_OVERFLOW(p) && p->mp_pages + rem > id3.mcnt) { -+ id3.mcnt = p->mp_pages + rem; -+ len = id3.mcnt * env->me_psize; -+ SET_OFF(off, pgno * env->me_psize); -+ MAP(rc, env, id3.mptr, len, off); -+ if (rc) -+ goto fail; -+ if (!el[x].mref) { -+ munmap(el[x].mptr, env->me_psize * el[x].mcnt); -+ el[x].mptr = id3.mptr; -+ el[x].mcnt = id3.mcnt; -+ } else { -+ id3.mid = pg0; -+ pthread_mutex_unlock(&env->me_rpmutex); -+ goto found; -+ } -+ } -+ el[x].mref++; -+ pthread_mutex_unlock(&env->me_rpmutex); -+ goto found; -+ } -+ if (el[0].mid >= MDB_ERPAGE_MAX - env->me_rpcheck) { -+ /* purge unref'd pages */ -+ unsigned i, y = 0; -+ for (i=1; i<=el[0].mid; i++) { -+ if (!el[i].mref) { -+ if (!y) y = i; -+ munmap(el[i].mptr, env->me_psize * el[i].mcnt); -+ } -+ } -+ if (!y) { -+ if (retries) { -+ /* see if we can unref some local pages */ -+ retries--; -+ id3.mid = 0; -+ goto retry; -+ } -+ if (el[0].mid >= MDB_ERPAGE_MAX) { -+ pthread_mutex_unlock(&env->me_rpmutex); -+ return MDB_MAP_FULL; -+ } -+ env->me_rpcheck /= 2; -+ } else { -+ for (i=y+1; i<= el[0].mid; i++) -+ if (el[i].mref) -+ el[y++] = el[i]; -+ el[0].mid = y-1; -+ if (!env->me_rpcheck) -+ env->me_rpcheck = 1; -+ while (env->me_rpcheck < el[0].mid && env->me_rpcheck < MDB_ERPAGE_SIZE/2) -+ env->me_rpcheck *= 2; -+ } -+ } -+ SET_OFF(off, pgno * env->me_psize); -+ MAP(rc, env, id3.mptr, len, off); -+ if (rc) { -+fail: -+ pthread_mutex_unlock(&env->me_rpmutex); -+ return rc; -+ } -+ /* check for overflow size */ -+ p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize); -+ if (IS_OVERFLOW(p) && p->mp_pages + rem > id3.mcnt) { -+ id3.mcnt = p->mp_pages + rem; -+ munmap(id3.mptr, len); -+ len = id3.mcnt * env->me_psize; -+ MAP(rc, env, id3.mptr, len, off); -+ if (rc) -+ goto fail; -+ } -+ mdb_mid3l_insert(el, &id3); -+ pthread_mutex_unlock(&env->me_rpmutex); -+found: -+ mdb_mid3l_insert(tl, &id3); -+ } else { -+ return MDB_TXN_FULL; -+ } -+ok: -+ p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize); -+#if MDB_DEBUG /* we don't need this check any more */ -+ if (IS_OVERFLOW(p)) { -+ mdb_tassert(txn, p->mp_pages + rem <= id3.mcnt); -+ } -+#endif -+ *ret = p; -+ return MDB_SUCCESS; -+} -+#endif -+ -+/** Find the address of the page corresponding to a given page number. -+ * @param[in] mc the cursor accessing the page. -+ * @param[in] pgno the page number for the page to retrieve. -+ * @param[out] ret address of a pointer where the page's address will be stored. -+ * @param[out] lvl dirty_list inheritance level of found page. 1=current txn, 0=mapped page. -+ * @return 0 on success, non-zero on failure. -+ */ -+static int -+mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl) -+{ -+ MDB_txn *txn = mc->mc_txn; - MDB_page *p = NULL; - int level; - -- if (! (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_WRITEMAP))) { -+ if (! (mc->mc_flags & (C_ORIG_RDONLY|C_WRITEMAP))) { - MDB_txn *tx2 = txn; - level = 1; - do { -@@ -5233,8 +6001,7 @@ mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) - MDB_ID pn = pgno << 1; - x = mdb_midl_search(tx2->mt_spill_pgs, pn); - if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) { -- p = (MDB_page *)(env->me_map + env->me_psize * pgno); -- goto done; -+ goto mapped; - } - } - if (dl[0].mid) { -@@ -5248,15 +6015,26 @@ mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) - } while ((tx2 = tx2->mt_parent) != NULL); - } - -- if (pgno < txn->mt_next_pgno) { -- level = 0; -- p = (MDB_page *)(env->me_map + env->me_psize * pgno); -- } else { -- DPRINTF(("page %"Z"u not found", pgno)); -+ if (pgno >= txn->mt_next_pgno) { -+ DPRINTF(("page %"Yu" not found", pgno)); - txn->mt_flags |= MDB_TXN_ERROR; - return MDB_PAGE_NOTFOUND; - } - -+ level = 0; -+ -+mapped: -+ { -+#ifdef MDB_VL32 -+ int rc = mdb_rpage_get(txn, pgno, &p); -+ if (rc) -+ return rc; -+#else -+ MDB_env *env = txn->mt_env; -+ p = (MDB_page *)(env->me_map + env->me_psize * pgno); -+#endif -+ } -+ - done: - *ret = p; - if (lvl) -@@ -5278,13 +6056,13 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) - MDB_node *node; - indx_t i; - -- DPRINTF(("branch page %"Z"u has %u keys", mp->mp_pgno, NUMKEYS(mp))); -+ DPRINTF(("branch page %"Yu" has %u keys", mp->mp_pgno, NUMKEYS(mp))); - /* Don't assert on branch pages in the FreeDB. We can get here - * while in the process of rebalancing a FreeDB branch page; we must - * let that proceed. ITS#8336 - */ - mdb_cassert(mc, !mc->mc_dbi || NUMKEYS(mp) > 1); -- DPRINTF(("found index 0 to page %"Z"u", NODEPGNO(NODEPTR(mp, 0)))); -+ DPRINTF(("found index 0 to page %"Yu, NODEPGNO(NODEPTR(mp, 0)))); - - if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) { - i = 0; -@@ -5308,7 +6086,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) - mdb_cassert(mc, i < NUMKEYS(mp)); - node = NODEPTR(mp, i); - -- if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) -+ if ((rc = mdb_page_get(mc, NODEPGNO(node), &mp, NULL)) != 0) - return rc; - - mc->mc_ki[mc->mc_top] = i; -@@ -5329,7 +6107,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) - return MDB_CORRUPTED; - } - -- DPRINTF(("found leaf page %"Z"u for key [%s]", mp->mp_pgno, -+ DPRINTF(("found leaf page %"Yu" for key [%s]", mp->mp_pgno, - key ? DKEY(key) : "null")); - mc->mc_flags |= C_INITIALIZED; - mc->mc_flags &= ~C_EOF; -@@ -5350,7 +6128,7 @@ mdb_page_search_lowest(MDB_cursor *mc) - MDB_node *node = NODEPTR(mp, 0); - int rc; - -- if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) -+ if ((rc = mdb_page_get(mc, NODEPGNO(node), &mp, NULL)) != 0) - return rc; - - mc->mc_ki[mc->mc_top] = 0; -@@ -5402,7 +6180,7 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) - return MDB_NOTFOUND; - if ((leaf->mn_flags & (F_DUPDATA|F_SUBDATA)) != F_SUBDATA) - return MDB_INCOMPATIBLE; /* not a named DB */ -- rc = mdb_node_read(mc->mc_txn, leaf, &data); -+ rc = mdb_node_read(&mc2, leaf, &data); - if (rc) - return rc; - memcpy(&flags, ((char *) data.mv_data + offsetof(MDB_db, md_flags)), -@@ -5425,14 +6203,26 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) - } - - mdb_cassert(mc, root > 1); -- if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) -- if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0) -+ if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) { -+#ifdef MDB_VL32 -+ if (mc->mc_pg[0]) -+ MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[0]); -+#endif -+ if ((rc = mdb_page_get(mc, root, &mc->mc_pg[0], NULL)) != 0) - return rc; -+ } - -+#ifdef MDB_VL32 -+ { -+ int i; -+ for (i=1; i<mc->mc_snum; i++) -+ MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[i]); -+ } -+#endif - mc->mc_snum = 1; - mc->mc_top = 0; - -- DPRINTF(("db %d root page %"Z"u has flags 0x%X", -+ DPRINTF(("db %d root page %"Yu" has flags 0x%X", - DDBI(mc), root, mc->mc_pg[0]->mp_flags)); - - if (flags & MDB_PS_MODIFY) { -@@ -5457,7 +6247,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp) - MDB_ID pn = pg << 1; - int rc; - -- DPRINTF(("free ov page %"Z"u (%d)", pg, ovpages)); -+ DPRINTF(("free ov page %"Yu" (%d)", pg, ovpages)); - /* If the page is dirty or on the spill list we just acquired it, - * so we should give it back to our current free list, if any. - * Otherwise put it onto the list of pages we freed in this txn. -@@ -5498,7 +6288,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp) - j = ++(dl[0].mid); - dl[j] = ix; /* Unsorted. OK when MDB_TXN_ERROR. */ - txn->mt_flags |= MDB_TXN_ERROR; -- return MDB_CORRUPTED; -+ return MDB_PROBLEM; - } - } - txn->mt_dirty_room++; -@@ -5523,18 +6313,22 @@ release: - } - - /** Return the data associated with a given node. -- * @param[in] txn The transaction for this operation. -+ * @param[in] mc The cursor for this operation. - * @param[in] leaf The node being read. - * @param[out] data Updated to point to the node's data. - * @return 0 on success, non-zero on failure. - */ - static int --mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data) -+mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data) - { - MDB_page *omp; /* overflow page */ - pgno_t pgno; - int rc; - -+ if (MC_OVPG(mc)) { -+ MDB_PAGE_UNREF(mc->mc_txn, MC_OVPG(mc)); -+ MC_SET_OVPG(mc, NULL); -+ } - if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) { - data->mv_size = NODEDSZ(leaf); - data->mv_data = NODEDATA(leaf); -@@ -5545,11 +6339,12 @@ mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data) - */ - data->mv_size = NODEDSZ(leaf); - memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); -- if ((rc = mdb_page_get(txn, pgno, &omp, NULL)) != 0) { -- DPRINTF(("read overflow page %"Z"u failed", pgno)); -+ if ((rc = mdb_page_get(mc, pgno, &omp, NULL)) != 0) { -+ DPRINTF(("read overflow page %"Yu" failed", pgno)); - return rc; - } - data->mv_data = METADATA(omp); -+ MC_SET_OVPG(mc, omp); - - return MDB_SUCCESS; - } -@@ -5560,7 +6355,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, - { - MDB_cursor mc; - MDB_xcursor mx; -- int exact = 0; -+ int exact = 0, rc; - DKBUF; - - DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key))); -@@ -5572,7 +6367,12 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, - return MDB_BAD_TXN; - - mdb_cursor_init(&mc, txn, dbi, &mx); -- return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); -+ rc = mdb_cursor_set(&mc, key, data, MDB_SET, &exact); -+ /* unref all the pages when MDB_VL32 - caller must copy the data -+ * before doing anything else -+ */ -+ MDB_CURSOR_UNREF(&mc, 1); -+ return rc; - } - - /** Find a sibling for a page. -@@ -5589,13 +6389,19 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) - int rc; - MDB_node *indx; - MDB_page *mp; -+#ifdef MDB_VL32 -+ MDB_page *op; -+#endif - - if (mc->mc_snum < 2) { - return MDB_NOTFOUND; /* root has no siblings */ - } - -+#ifdef MDB_VL32 -+ op = mc->mc_pg[mc->mc_top]; -+#endif - mdb_cursor_pop(mc); -- DPRINTF(("parent page is page %"Z"u, index %u", -+ DPRINTF(("parent page is page %"Yu", index %u", - mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top])); - - if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top])) -@@ -5618,8 +6424,10 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) - } - mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top])); - -+ MDB_PAGE_UNREF(mc->mc_txn, op); -+ - indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); -- if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) { -+ if ((rc = mdb_page_get(mc, NODEPGNO(indx), &mp, NULL)) != 0) { - /* mc will be inconsistent if caller does mc_snum++ as above */ - mc->mc_flags &= ~(C_INITIALIZED|C_EOF); - return rc; -@@ -5640,11 +6448,12 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) - MDB_node *leaf; - int rc; - -- if (mc->mc_flags & C_EOF) { -+ if ((mc->mc_flags & C_EOF) || -+ ((mc->mc_flags & C_DEL) && op == MDB_NEXT_DUP)) { - return MDB_NOTFOUND; - } -- -- mdb_cassert(mc, mc->mc_flags & C_INITIALIZED); -+ if (!(mc->mc_flags & C_INITIALIZED)) -+ return mdb_cursor_first(mc, key, data); - - mp = mc->mc_pg[mc->mc_top]; - -@@ -5659,6 +6468,9 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) - return rc; - } - } -+ else { -+ MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0); -+ } - } else { - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if (op == MDB_NEXT_DUP) -@@ -5666,7 +6478,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) - } - } - -- DPRINTF(("cursor_next: top page is %"Z"u in cursor %p", -+ DPRINTF(("cursor_next: top page is %"Yu" in cursor %p", - mdb_dbg_pgno(mp), (void *) mc)); - if (mc->mc_flags & C_DEL) { - mc->mc_flags ^= C_DEL; -@@ -5680,12 +6492,12 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) - return rc; - } - mp = mc->mc_pg[mc->mc_top]; -- DPRINTF(("next page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); -+ DPRINTF(("next page is %"Yu", key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); - } else - mc->mc_ki[mc->mc_top]++; - - skip: -- DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", -+ DPRINTF(("==> cursor points to page %"Yu" with %u keys, key index %u", - mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); - - if (IS_LEAF2(mp)) { -@@ -5701,7 +6513,7 @@ skip: - mdb_xcursor_init1(mc, leaf); - } - if (data) { -- if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) -+ if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) - return rc; - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { -@@ -5723,7 +6535,12 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) - MDB_node *leaf; - int rc; - -- mdb_cassert(mc, mc->mc_flags & C_INITIALIZED); -+ if (!(mc->mc_flags & C_INITIALIZED)) { -+ rc = mdb_cursor_last(mc, key, data); -+ if (rc) -+ return rc; -+ mc->mc_ki[mc->mc_top]++; -+ } - - mp = mc->mc_pg[mc->mc_top]; - -@@ -5740,6 +6557,9 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) - return rc; - } - } -+ else { -+ MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0); -+ } - } else { - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if (op == MDB_PREV_DUP) -@@ -5747,7 +6567,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) - } - } - -- DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p", -+ DPRINTF(("cursor_prev: top page is %"Yu" in cursor %p", - mdb_dbg_pgno(mp), (void *) mc)); - - mc->mc_flags &= ~(C_EOF|C_DEL); -@@ -5759,11 +6579,13 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) - } - mp = mc->mc_pg[mc->mc_top]; - mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1; -- DPRINTF(("prev page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); -+ DPRINTF(("prev page is %"Yu", key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top])); - } else - mc->mc_ki[mc->mc_top]--; - -- DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", -+ mc->mc_flags &= ~C_EOF; -+ -+ DPRINTF(("==> cursor points to page %"Yu" with %u keys, key index %u", - mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); - - if (IS_LEAF2(mp)) { -@@ -5779,7 +6601,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) - mdb_xcursor_init1(mc, leaf); - } - if (data) { -- if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) -+ if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) - return rc; - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { -@@ -5806,8 +6628,10 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, - if (key->mv_size == 0) - return MDB_BAD_VALSIZE; - -- if (mc->mc_xcursor) -+ if (mc->mc_xcursor) { -+ MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0); - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); -+ } - - /* See if we're already on the right page */ - if (mc->mc_flags & C_INITIALIZED) { -@@ -5873,6 +6697,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, - } - } - rc = 0; -+ mc->mc_flags &= ~C_EOF; - goto set2; - } - } -@@ -5961,25 +6786,23 @@ set1: - } else if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) { - MDB_val olddata; - MDB_cmp_func *dcmp; -- if ((rc = mdb_node_read(mc->mc_txn, leaf, &olddata)) != MDB_SUCCESS) -+ if ((rc = mdb_node_read(mc, leaf, &olddata)) != MDB_SUCCESS) - return rc; - dcmp = mc->mc_dbx->md_dcmp; --#if UINT_MAX < SIZE_MAX -- if (dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t)) -+ if (NEED_CMP_CLONG(dcmp, olddata.mv_size)) - dcmp = mdb_cmp_clong; --#endif - rc = dcmp(data, &olddata); - if (rc) { - if (op == MDB_GET_BOTH || rc > 0) - return MDB_NOTFOUND; - rc = 0; -- *data = olddata; - } -+ *data = olddata; - - } else { - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); -- if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) -+ if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) - return rc; - } - } -@@ -5999,8 +6822,10 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) - int rc; - MDB_node *leaf; - -- if (mc->mc_xcursor) -+ if (mc->mc_xcursor) { -+ MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0); - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); -+ } - - if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { - rc = mdb_page_search(mc, NULL, MDB_PS_FIRST); -@@ -6028,7 +6853,7 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) - if (rc) - return rc; - } else { -- if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) -+ if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) - return rc; - } - } -@@ -6043,8 +6868,10 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) - int rc; - MDB_node *leaf; - -- if (mc->mc_xcursor) -+ if (mc->mc_xcursor) { -+ MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0); - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); -+ } - - if (!(mc->mc_flags & C_EOF)) { - -@@ -6073,7 +6900,7 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) - if (rc) - return rc; - } else { -- if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) -+ if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) - return rc; - } - } -@@ -6119,7 +6946,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - rc = mdb_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_GET_CURRENT); - } else { -- rc = mdb_node_read(mc->mc_txn, leaf, data); -+ rc = mdb_node_read(mc, leaf, data); - } - } - } -@@ -6169,10 +6996,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, - rc = MDB_INCOMPATIBLE; - break; - } -- if (!(mc->mc_flags & C_INITIALIZED)) -- rc = mdb_cursor_first(mc, key, data); -- else -- rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP); -+ rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP); - if (rc == MDB_SUCCESS) { - if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - MDB_cursor *mx; -@@ -6187,24 +7011,38 @@ fetchm: - } - } - break; -+ case MDB_PREV_MULTIPLE: -+ if (data == NULL) { -+ rc = EINVAL; -+ break; -+ } -+ if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) { -+ rc = MDB_INCOMPATIBLE; -+ break; -+ } -+ if (!(mc->mc_flags & C_INITIALIZED)) -+ rc = mdb_cursor_last(mc, key, data); -+ else -+ rc = MDB_SUCCESS; -+ if (rc == MDB_SUCCESS) { -+ MDB_cursor *mx = &mc->mc_xcursor->mx_cursor; -+ if (mx->mc_flags & C_INITIALIZED) { -+ rc = mdb_cursor_sibling(mx, 0); -+ if (rc == MDB_SUCCESS) -+ goto fetchm; -+ } else { -+ rc = MDB_NOTFOUND; -+ } -+ } -+ break; - case MDB_NEXT: - case MDB_NEXT_DUP: - case MDB_NEXT_NODUP: -- if (!(mc->mc_flags & C_INITIALIZED)) -- rc = mdb_cursor_first(mc, key, data); -- else -- rc = mdb_cursor_next(mc, key, data, op); -+ rc = mdb_cursor_next(mc, key, data, op); - break; - case MDB_PREV: - case MDB_PREV_DUP: - case MDB_PREV_NODUP: -- if (!(mc->mc_flags & C_INITIALIZED)) { -- rc = mdb_cursor_last(mc, key, data); -- if (rc) -- break; -- mc->mc_flags |= C_INITIALIZED; -- mc->mc_ki[mc->mc_top]++; -- } - rc = mdb_cursor_prev(mc, key, data, op); - break; - case MDB_FIRST: -@@ -6225,7 +7063,7 @@ fetchm: - MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { - MDB_GET_KEY(leaf, key); -- rc = mdb_node_read(mc->mc_txn, leaf, data); -+ rc = mdb_node_read(mc, leaf, data); - break; - } - } -@@ -6487,10 +7325,8 @@ more: - if (flags == MDB_CURRENT) - goto current; - dcmp = mc->mc_dbx->md_dcmp; --#if UINT_MAX < SIZE_MAX -- if (dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t)) -+ if (NEED_CMP_CLONG(dcmp, olddata.mv_size)) - dcmp = mdb_cmp_clong; --#endif - /* does data match? */ - if (!dcmp(data, &olddata)) { - if (flags & (MDB_NODUPDATA|MDB_APPENDDUP)) -@@ -6609,7 +7445,7 @@ current: - int level, ovpages, dpages = OVPAGES(data->mv_size, env->me_psize); - - memcpy(&pg, olddata.mv_data, sizeof(pg)); -- if ((rc2 = mdb_page_get(mc->mc_txn, pg, &omp, &level)) != 0) -+ if ((rc2 = mdb_page_get(mc, pg, &omp, &level)) != 0) - return rc2; - ovpages = omp->mp_pages; - -@@ -6641,8 +7477,13 @@ current: - /* Note - this page is already counted in parent's dirty_room */ - rc2 = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2); - mdb_cassert(mc, rc2 == 0); -+ /* Currently we make the page look as with put() in the -+ * parent txn, in case the user peeks at MDB_RESERVEd -+ * or unused parts. Some users treat ovpages specially. -+ */ - if (!(flags & MDB_RESERVE)) { -- /* Copy end of page, adjusting alignment so -+ /* Skip the part where LMDB will put *data. -+ * Copy end of page, adjusting alignment so - * compiler may copy words instead of bytes. - */ - off = (PAGEHDRSZ + data->mv_size) & -sizeof(size_t); -@@ -6728,7 +7569,7 @@ new_sub: - */ - if (do_sub) { - int xflags, new_dupdata; -- size_t ecount; -+ mdb_size_t ecount; - put_sub: - xdata.mv_size = 0; - xdata.mv_data = ""; -@@ -6810,7 +7651,7 @@ put_sub: - return rc; - bad_sub: - if (rc == MDB_KEYEXIST) /* should not happen, we deleted that item */ -- rc = MDB_CORRUPTED; -+ rc = MDB_PROBLEM; - } - mc->mc_txn->mt_flags |= MDB_TXN_ERROR; - return rc; -@@ -6910,7 +7751,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) - pgno_t pg; - - memcpy(&pg, NODEDATA(leaf), sizeof(pg)); -- if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) || -+ if ((rc = mdb_page_get(mc, pg, &omp, NULL)) || - (rc = mdb_ovpage_free(mc, omp))) - goto fail; - } -@@ -6939,7 +7780,7 @@ mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp) - - if ((rc = mdb_page_alloc(mc, num, &np))) - return rc; -- DPRINTF(("allocated new mpage %"Z"u, page size %u", -+ DPRINTF(("allocated new mpage %"Yu", page size %u", - np->mp_pgno, mc->mc_txn->mt_env->me_psize)); - np->mp_flags = flags | P_DIRTY; - np->mp_lower = (PAGEHDRSZ-PAGEBASE); -@@ -7039,7 +7880,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, - - mdb_cassert(mc, mp->mp_upper >= mp->mp_lower); - -- DPRINTF(("add to %s %spage %"Z"u index %i, data size %"Z"u key size %"Z"u [%s]", -+ DPRINTF(("add to %s %spage %"Yu" index %i, data size %"Z"u key size %"Z"u [%s]", - IS_LEAF(mp) ? "leaf" : "branch", - IS_SUBP(mp) ? "sub-" : "", - mdb_dbg_pgno(mp), indx, data ? data->mv_size : 0, -@@ -7080,7 +7921,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, - goto full; - if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp))) - return rc; -- DPRINTF(("allocated overflow page %"Z"u", ofp->mp_pgno)); -+ DPRINTF(("allocated overflow page %"Yu, ofp->mp_pgno)); - flags |= F_BIGDATA; - goto update; - } else { -@@ -7137,7 +7978,7 @@ update: - return MDB_SUCCESS; - - full: -- DPRINTF(("not enough room in page %"Z"u, got %u ptrs", -+ DPRINTF(("not enough room in page %"Yu", got %u ptrs", - mdb_dbg_pgno(mp), NUMKEYS(mp))); - DPRINTF(("upper-lower = %u - %u = %"Z"d", mp->mp_upper,mp->mp_lower,room)); - DPRINTF(("node size = %"Z"u", node_size)); -@@ -7160,7 +8001,7 @@ mdb_node_del(MDB_cursor *mc, int ksize) - MDB_node *node; - char *base; - -- DPRINTF(("delete node %u on %s page %"Z"u", indx, -+ DPRINTF(("delete node %u on %s page %"Yu, indx, - IS_LEAF(mp) ? "leaf" : "branch", mdb_dbg_pgno(mp))); - numkeys = NUMKEYS(mp); - mdb_cassert(mc, indx < numkeys); -@@ -7269,7 +8110,8 @@ mdb_xcursor_init0(MDB_cursor *mc) - mx->mx_cursor.mc_dbflag = &mx->mx_dbflag; - mx->mx_cursor.mc_snum = 0; - mx->mx_cursor.mc_top = 0; -- mx->mx_cursor.mc_flags = C_SUB; -+ MC_SET_OVPG(&mx->mx_cursor, NULL); -+ mx->mx_cursor.mc_flags = C_SUB | (mc->mc_flags & (C_ORIG_RDONLY|C_WRITEMAP)); - mx->mx_dbx.md_name.mv_size = 0; - mx->mx_dbx.md_name.mv_data = NULL; - mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp; -@@ -7288,12 +8130,12 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) - { - MDB_xcursor *mx = mc->mc_xcursor; - -+ mx->mx_cursor.mc_flags &= C_SUB|C_ORIG_RDONLY|C_WRITEMAP; - if (node->mn_flags & F_SUBDATA) { - memcpy(&mx->mx_db, NODEDATA(node), sizeof(MDB_db)); - mx->mx_cursor.mc_pg[0] = 0; - mx->mx_cursor.mc_snum = 0; - mx->mx_cursor.mc_top = 0; -- mx->mx_cursor.mc_flags = C_SUB; - } else { - MDB_page *fp = NODEDATA(node); - mx->mx_db.md_pad = 0; -@@ -7306,7 +8148,7 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) - COPY_PGNO(mx->mx_db.md_root, fp->mp_pgno); - mx->mx_cursor.mc_snum = 1; - mx->mx_cursor.mc_top = 0; -- mx->mx_cursor.mc_flags = C_INITIALIZED|C_SUB; -+ mx->mx_cursor.mc_flags |= C_INITIALIZED; - mx->mx_cursor.mc_pg[0] = fp; - mx->mx_cursor.mc_ki[0] = 0; - if (mc->mc_db->md_flags & MDB_DUPFIXED) { -@@ -7316,13 +8158,11 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) - mx->mx_db.md_flags |= MDB_INTEGERKEY; - } - } -- DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi, -+ DPRINTF(("Sub-db -%u root page %"Yu, mx->mx_cursor.mc_dbi, - mx->mx_db.md_root)); - mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ --#if UINT_MAX < SIZE_MAX -- if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t)) -+ if (NEED_CMP_CLONG(mx->mx_dbx.md_cmp, mx->mx_db.md_pad)) - mx->mx_dbx.md_cmp = mdb_cmp_clong; --#endif - } - - -@@ -7345,7 +8185,7 @@ mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata) - mx->mx_cursor.mc_flags |= C_INITIALIZED; - mx->mx_cursor.mc_ki[0] = 0; - mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ --#if UINT_MAX < SIZE_MAX -+#if UINT_MAX < MDB_SIZE_MAX /* matches mdb_xcursor_init1:NEED_CMP_CLONG() */ - mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp; - #endif - } else if (!(mx->mx_cursor.mc_flags & C_INITIALIZED)) { -@@ -7353,7 +8193,7 @@ mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata) - } - mx->mx_db = src_mx->mx_db; - mx->mx_cursor.mc_pg[0] = src_mx->mx_cursor.mc_pg[0]; -- DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi, -+ DPRINTF(("Sub-db -%u root page %"Yu, mx->mx_cursor.mc_dbi, - mx->mx_db.md_root)); - } - -@@ -7372,7 +8212,8 @@ mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) - mc->mc_top = 0; - mc->mc_pg[0] = 0; - mc->mc_ki[0] = 0; -- mc->mc_flags = 0; -+ MC_SET_OVPG(mc, NULL); -+ mc->mc_flags = txn->mt_flags & (C_ORIG_RDONLY|C_WRITEMAP); - if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { - mdb_tassert(txn, mx != NULL); - mc->mc_xcursor = mx; -@@ -7437,7 +8278,7 @@ mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc) - - /* Return the count of duplicate data items for the current key */ - int --mdb_cursor_count(MDB_cursor *mc, size_t *countp) -+mdb_cursor_count(MDB_cursor *mc, mdb_size_t *countp) - { - MDB_node *leaf; - -@@ -7471,6 +8312,9 @@ mdb_cursor_count(MDB_cursor *mc, size_t *countp) - void - mdb_cursor_close(MDB_cursor *mc) - { -+ if (mc) { -+ MDB_CURSOR_UNREF(mc, 0); -+ } - if (mc && !mc->mc_backup) { - /* remove from txn, if tracked */ - if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) { -@@ -7522,7 +8366,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key) - char kbuf2[DKBUF_MAXKEYSIZE*2+1]; - k2.mv_data = NODEKEY(node); - k2.mv_size = node->mn_ksize; -- DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Z"u", -+ DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Yu, - indx, ptr, - mdb_dkey(&k2, kbuf2), - DKEY(key), -@@ -7670,7 +8514,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) - return rc; - } - -- DPRINTF(("moving %s node %u [%s] on page %"Z"u to node %u on page %"Z"u", -+ DPRINTF(("moving %s node %u [%s] on page %"Yu" to node %u on page %"Yu, - IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch", - csrc->mc_ki[csrc->mc_top], - DKEY(&key), -@@ -7764,7 +8608,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) - key.mv_size = NODEKSZ(srcnode); - key.mv_data = NODEKEY(srcnode); - } -- DPRINTF(("update separator for source page %"Z"u to [%s]", -+ DPRINTF(("update separator for source page %"Yu" to [%s]", - csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key))); - mdb_cursor_copy(csrc, &mn); - mn.mc_snum--; -@@ -7795,7 +8639,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) - key.mv_size = NODEKSZ(srcnode); - key.mv_data = NODEKEY(srcnode); - } -- DPRINTF(("update separator for destination page %"Z"u to [%s]", -+ DPRINTF(("update separator for destination page %"Yu" to [%s]", - cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key))); - mdb_cursor_copy(cdst, &mn); - mn.mc_snum--; -@@ -7841,7 +8685,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) - psrc = csrc->mc_pg[csrc->mc_top]; - pdst = cdst->mc_pg[cdst->mc_top]; - -- DPRINTF(("merging page %"Z"u into %"Z"u", psrc->mp_pgno, pdst->mp_pgno)); -+ DPRINTF(("merging page %"Yu" into %"Yu, psrc->mp_pgno, pdst->mp_pgno)); - - mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */ - mdb_cassert(csrc, cdst->mc_snum > 1); -@@ -7898,7 +8742,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) - } - } - -- DPRINTF(("dst page %"Z"u now has %u keys (%.1f%% filled)", -+ DPRINTF(("dst page %"Yu" now has %u keys (%.1f%% filled)", - pdst->mp_pgno, NUMKEYS(pdst), - (float)PAGEFILL(cdst->mc_txn->mt_env, pdst) / 10)); - -@@ -7986,6 +8830,7 @@ mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst) - cdst->mc_snum = csrc->mc_snum; - cdst->mc_top = csrc->mc_top; - cdst->mc_flags = csrc->mc_flags; -+ MC_SET_OVPG(cdst, MC_OVPG(csrc)); - - for (i=0; i<csrc->mc_snum; i++) { - cdst->mc_pg[i] = csrc->mc_pg[i]; -@@ -8014,14 +8859,14 @@ mdb_rebalance(MDB_cursor *mc) - minkeys = 1; - thresh = FILL_THRESHOLD; - } -- DPRINTF(("rebalancing %s page %"Z"u (has %u keys, %.1f%% full)", -+ DPRINTF(("rebalancing %s page %"Yu" (has %u keys, %.1f%% full)", - IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch", - mdb_dbg_pgno(mc->mc_pg[mc->mc_top]), NUMKEYS(mc->mc_pg[mc->mc_top]), - (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10)); - - if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= thresh && - NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) { -- DPRINTF(("no need to rebalance page %"Z"u, above fill threshold", -+ DPRINTF(("no need to rebalance page %"Yu", above fill threshold", - mdb_dbg_pgno(mc->mc_pg[mc->mc_top]))); - return MDB_SUCCESS; - } -@@ -8069,7 +8914,7 @@ mdb_rebalance(MDB_cursor *mc) - if (rc) - return rc; - mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0)); -- rc = mdb_page_get(mc->mc_txn,mc->mc_db->md_root,&mc->mc_pg[0],NULL); -+ rc = mdb_page_get(mc, mc->mc_db->md_root, &mc->mc_pg[0], NULL); - if (rc) - return rc; - mc->mc_db->md_depth--; -@@ -8130,7 +8975,7 @@ mdb_rebalance(MDB_cursor *mc) - DPUTS("reading right neighbor"); - mn.mc_ki[ptop]++; - node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); -- rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); -+ rc = mdb_page_get(mc, NODEPGNO(node), &mn.mc_pg[mn.mc_top], NULL); - if (rc) - return rc; - mn.mc_ki[mn.mc_top] = 0; -@@ -8142,7 +8987,7 @@ mdb_rebalance(MDB_cursor *mc) - DPUTS("reading left neighbor"); - mn.mc_ki[ptop]--; - node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); -- rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); -+ rc = mdb_page_get(mc, NODEPGNO(node), &mn.mc_pg[mn.mc_top], NULL); - if (rc) - return rc; - mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1; -@@ -8150,7 +8995,7 @@ mdb_rebalance(MDB_cursor *mc) - fromleft = 1; - } - -- DPRINTF(("found neighbor page %"Z"u (%u keys, %.1f%% full)", -+ DPRINTF(("found neighbor page %"Yu" (%u keys, %.1f%% full)", - mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]), - (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10)); - -@@ -8207,8 +9052,6 @@ mdb_cursor_del0(MDB_cursor *mc) - if (m3->mc_pg[mc->mc_top] == mp) { - if (m3->mc_ki[mc->mc_top] == ki) { - m3->mc_flags |= C_DEL; -- if (mc->mc_db->md_flags & MDB_DUPSORT) -- m3->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED; - } else if (m3->mc_ki[mc->mc_top] > ki) { - m3->mc_ki[mc->mc_top]--; - } -@@ -8242,11 +9085,21 @@ mdb_cursor_del0(MDB_cursor *mc) - continue; - if (m3->mc_pg[mc->mc_top] == mp) { - /* if m3 points past last node in page, find next sibling */ -- if (m3->mc_ki[mc->mc_top] >= nkeys) { -- rc = mdb_cursor_sibling(m3, 1); -- if (rc == MDB_NOTFOUND) { -- m3->mc_flags |= C_EOF; -- rc = MDB_SUCCESS; -+ if (m3->mc_ki[mc->mc_top] >= mc->mc_ki[mc->mc_top]) { -+ if (m3->mc_ki[mc->mc_top] >= nkeys) { -+ rc = mdb_cursor_sibling(m3, 1); -+ if (rc == MDB_NOTFOUND) { -+ m3->mc_flags |= C_EOF; -+ rc = MDB_SUCCESS; -+ continue; -+ } -+ } -+ if (mc->mc_db->md_flags & MDB_DUPSORT) { -+ MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]); -+ if (node->mn_flags & F_DUPDATA) { -+ mdb_xcursor_init1(m3, node); -+ m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL; -+ } - } - } - } -@@ -8352,7 +9205,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno - newindx = mc->mc_ki[mc->mc_top]; - nkeys = NUMKEYS(mp); - -- DPRINTF(("-----> splitting %s page %"Z"u and adding [%s] at index %i/%i", -+ DPRINTF(("-----> splitting %s page %"Yu" and adding [%s] at index %i/%i", - IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno, - DKEY(newkey), mc->mc_ki[mc->mc_top], nkeys)); - -@@ -8360,7 +9213,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno - if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp))) - return rc; - rp->mp_pad = mp->mp_pad; -- DPRINTF(("new right sibling: page %"Z"u", rp->mp_pgno)); -+ DPRINTF(("new right sibling: page %"Yu, rp->mp_pgno)); - - /* Usually when splitting the root page, the cursor - * height is 1. But when called from mdb_update_key, -@@ -8378,7 +9231,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno - mc->mc_pg[0] = pp; - mc->mc_ki[0] = 0; - mc->mc_db->md_root = pp->mp_pgno; -- DPRINTF(("root split! new root = %"Z"u", pp->mp_pgno)); -+ DPRINTF(("root split! new root = %"Yu, pp->mp_pgno)); - new_root = mc->mc_db->md_depth++; - - /* Add left (implicit) pointer. */ -@@ -8395,7 +9248,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno - ptop = 0; - } else { - ptop = mc->mc_top-1; -- DPRINTF(("parent branch page is %"Z"u", mc->mc_pg[ptop]->mp_pgno)); -+ DPRINTF(("parent branch page is %"Yu, mc->mc_pg[ptop]->mp_pgno)); - } - - mdb_cursor_copy(mc, &mn); -@@ -8776,11 +9629,12 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, - #ifndef MDB_WBUF - #define MDB_WBUF (1024*1024) - #endif -+#define MDB_EOF 0x10 /**< #mdb_env_copyfd1() is done reading */ - -- /** State needed for a compacting copy. */ -+ /** State needed for a double-buffering compacting copy. */ - typedef struct mdb_copy { - pthread_mutex_t mc_mutex; -- pthread_cond_t mc_cond; -+ pthread_cond_t mc_cond; /**< Condition variable for #mc_new */ - char *mc_wbuf[2]; - char *mc_over[2]; - MDB_env *mc_env; -@@ -8789,10 +9643,9 @@ typedef struct mdb_copy { - int mc_olen[2]; - pgno_t mc_next_pgno; - HANDLE mc_fd; -- int mc_status; -- volatile int mc_new; -- int mc_toggle; -- -+ int mc_toggle; /**< Buffer number in provider */ -+ int mc_new; /**< (0-2 buffers to write) | (#MDB_EOF at end) */ -+ volatile int mc_error; /**< Error code, never cleared if set */ - } mdb_copy; - - /** Dedicated writer thread for compacting copy. */ -@@ -8811,20 +9664,16 @@ mdb_env_copythr(void *arg) - #endif - - pthread_mutex_lock(&my->mc_mutex); -- my->mc_new = 0; -- pthread_cond_signal(&my->mc_cond); - for(;;) { - while (!my->mc_new) - pthread_cond_wait(&my->mc_cond, &my->mc_mutex); -- if (my->mc_new < 0) { -- my->mc_new = 0; -+ if (my->mc_new == 0 + MDB_EOF) /* 0 buffers, just EOF */ - break; -- } -- my->mc_new = 0; - wsize = my->mc_wlen[toggle]; - ptr = my->mc_wbuf[toggle]; - again: -- while (wsize > 0) { -+ rc = MDB_SUCCESS; -+ while (wsize > 0 && !my->mc_error) { - DO_WRITE(rc, my->mc_fd, ptr, wsize, len); - if (!rc) { - rc = ErrCode(); -@@ -8840,8 +9689,7 @@ again: - } - } - if (rc) { -- my->mc_status = rc; -- break; -+ my->mc_error = rc; - } - /* If there's an overflow page tail, write it too */ - if (my->mc_olen[toggle]) { -@@ -8852,39 +9700,41 @@ again: - } - my->mc_wlen[toggle] = 0; - toggle ^= 1; -+ /* Return the empty buffer to provider */ -+ my->mc_new--; - pthread_cond_signal(&my->mc_cond); - } -- pthread_cond_signal(&my->mc_cond); - pthread_mutex_unlock(&my->mc_mutex); - return (THREAD_RET)0; - #undef DO_WRITE - } - -- /** Tell the writer thread there's a buffer ready to write */ -+ /** Give buffer and/or #MDB_EOF to writer thread, await unused buffer. -+ * -+ * @param[in] my control structure. -+ * @param[in] adjust (1 to hand off 1 buffer) | (MDB_EOF when ending). -+ */ - static int ESECT --mdb_env_cthr_toggle(mdb_copy *my, int st) -+mdb_env_cthr_toggle(mdb_copy *my, int adjust) - { -- int toggle = my->mc_toggle ^ 1; - pthread_mutex_lock(&my->mc_mutex); -- if (my->mc_status) { -- pthread_mutex_unlock(&my->mc_mutex); -- return my->mc_status; -- } -- while (my->mc_new == 1) -- pthread_cond_wait(&my->mc_cond, &my->mc_mutex); -- my->mc_new = st; -- my->mc_toggle = toggle; -+ my->mc_new += adjust; - pthread_cond_signal(&my->mc_cond); -+ while (my->mc_new & 2) /* both buffers in use */ -+ pthread_cond_wait(&my->mc_cond, &my->mc_mutex); - pthread_mutex_unlock(&my->mc_mutex); -- return 0; -+ -+ my->mc_toggle ^= (adjust & 1); -+ /* Both threads reset mc_wlen, to be safe from threading errors */ -+ my->mc_wlen[my->mc_toggle] = 0; -+ return my->mc_error; - } - - /** Depth-first tree traversal for compacting copy. */ - static int ESECT - mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) - { -- MDB_cursor mc; -- MDB_txn *txn = my->mc_txn; -+ MDB_cursor mc = {0}; - MDB_node *ni; - MDB_page *mo, *mp, *leaf; - char *buf, *ptr; -@@ -8896,10 +9746,10 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) - return MDB_SUCCESS; - - mc.mc_snum = 1; -- mc.mc_top = 0; -- mc.mc_txn = txn; -+ mc.mc_txn = my->mc_txn; -+ mc.mc_flags = my->mc_txn->mt_flags & (C_ORIG_RDONLY|C_WRITEMAP); - -- rc = mdb_page_get(my->mc_txn, *pg, &mc.mc_pg[0], NULL); -+ rc = mdb_page_get(&mc, *pg, &mc.mc_pg[0], NULL); - if (rc) - return rc; - rc = mdb_page_search_root(&mc, NULL, MDB_PS_FIRST); -@@ -8943,7 +9793,8 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) - } - - memcpy(&pg, NODEDATA(ni), sizeof(pg)); -- rc = mdb_page_get(txn, pg, &omp, NULL); -+ memcpy(NODEDATA(ni), &my->mc_next_pgno, sizeof(pgno_t)); -+ rc = mdb_page_get(&mc, pg, &omp, NULL); - if (rc) - goto done; - if (my->mc_wlen[toggle] >= MDB_WBUF) { -@@ -8965,7 +9816,6 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) - goto done; - toggle = my->mc_toggle; - } -- memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t)); - } else if (ni->mn_flags & F_SUBDATA) { - MDB_db db; - -@@ -8994,7 +9844,7 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags) - again: - ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]); - pg = NODEPGNO(ni); -- rc = mdb_page_get(txn, pg, &mp, NULL); -+ rc = mdb_page_get(&mc, pg, &mp, NULL); - if (rc) - goto done; - mc.mc_top++; -@@ -9043,47 +9893,56 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) - { - MDB_meta *mm; - MDB_page *mp; -- mdb_copy my; -+ mdb_copy my = {0}; - MDB_txn *txn = NULL; - pthread_t thr; -- int rc; -+ pgno_t root, new_root; -+ int rc = MDB_SUCCESS; - - #ifdef _WIN32 -- my.mc_mutex = CreateMutex(NULL, FALSE, NULL); -- my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL); -+ if (!(my.mc_mutex = CreateMutex(NULL, FALSE, NULL)) || -+ !(my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL))) { -+ rc = ErrCode(); -+ goto done; -+ } - my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_os_psize); -- if (my.mc_wbuf[0] == NULL) -- return errno; -+ if (my.mc_wbuf[0] == NULL) { -+ /* _aligned_malloc() sets errno, but we use Windows error codes */ -+ rc = ERROR_NOT_ENOUGH_MEMORY; -+ goto done; -+ } - #else -- pthread_mutex_init(&my.mc_mutex, NULL); -- pthread_cond_init(&my.mc_cond, NULL); -+ if ((rc = pthread_mutex_init(&my.mc_mutex, NULL)) != 0) -+ return rc; -+ if ((rc = pthread_cond_init(&my.mc_cond, NULL)) != 0) -+ goto done2; - #ifdef HAVE_MEMALIGN - my.mc_wbuf[0] = memalign(env->me_os_psize, MDB_WBUF*2); -- if (my.mc_wbuf[0] == NULL) -- return errno; -+ if (my.mc_wbuf[0] == NULL) { -+ rc = errno; -+ goto done; -+ } - #else -- rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_os_psize, MDB_WBUF*2); -- if (rc) -- return rc; -+ { -+ void *p; -+ if ((rc = posix_memalign(&p, env->me_os_psize, MDB_WBUF*2)) != 0) -+ goto done; -+ my.mc_wbuf[0] = p; -+ } - #endif - #endif - memset(my.mc_wbuf[0], 0, MDB_WBUF*2); - my.mc_wbuf[1] = my.mc_wbuf[0] + MDB_WBUF; -- my.mc_wlen[0] = 0; -- my.mc_wlen[1] = 0; -- my.mc_olen[0] = 0; -- my.mc_olen[1] = 0; - my.mc_next_pgno = NUM_METAS; -- my.mc_status = 0; -- my.mc_new = 1; -- my.mc_toggle = 0; - my.mc_env = env; - my.mc_fd = fd; -- THREAD_CREATE(thr, mdb_env_copythr, &my); -+ rc = THREAD_CREATE(thr, mdb_env_copythr, &my); -+ if (rc) -+ goto done; - - rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); - if (rc) -- return rc; -+ goto finish; - - mp = (MDB_page *)my.mc_wbuf[0]; - memset(mp, 0, NUM_METAS * env->me_psize); -@@ -9099,57 +9958,64 @@ mdb_env_copyfd1(MDB_env *env, HANDLE fd) - *(MDB_meta *)METADATA(mp) = *mm; - mm = (MDB_meta *)METADATA(mp); - -- /* Count the number of free pages, subtract from lastpg to find -- * number of active pages -- */ -- { -+ /* Set metapage 1 with current main DB */ -+ root = new_root = txn->mt_dbs[MAIN_DBI].md_root; -+ if (root != P_INVALID) { -+ /* Count free pages + freeDB pages. Subtract from last_pg -+ * to find the new last_pg, which also becomes the new root. -+ */ - MDB_ID freecount = 0; - MDB_cursor mc; - MDB_val key, data; - mdb_cursor_init(&mc, txn, FREE_DBI, NULL); - while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) - freecount += *(MDB_ID *)data.mv_data; -+ if (rc != MDB_NOTFOUND) -+ goto finish; - freecount += txn->mt_dbs[FREE_DBI].md_branch_pages + - txn->mt_dbs[FREE_DBI].md_leaf_pages + - txn->mt_dbs[FREE_DBI].md_overflow_pages; - -- /* Set metapage 1 */ -- mm->mm_last_pg = txn->mt_next_pgno - freecount - 1; -+ new_root = txn->mt_next_pgno - 1 - freecount; -+ mm->mm_last_pg = new_root; - mm->mm_dbs[MAIN_DBI] = txn->mt_dbs[MAIN_DBI]; -- if (mm->mm_last_pg > NUM_METAS-1) { -- mm->mm_dbs[MAIN_DBI].md_root = mm->mm_last_pg; -- mm->mm_txnid = 1; -- } else { -- mm->mm_dbs[MAIN_DBI].md_root = P_INVALID; -- } -+ mm->mm_dbs[MAIN_DBI].md_root = new_root; -+ } else { -+ /* When the DB is empty, handle it specially to -+ * fix any breakage like page leaks from ITS#8174. -+ */ -+ mm->mm_dbs[MAIN_DBI].md_flags = txn->mt_dbs[MAIN_DBI].md_flags; -+ } -+ if (root != P_INVALID || mm->mm_dbs[MAIN_DBI].md_flags) { -+ mm->mm_txnid = 1; /* use metapage 1 */ - } -+ - my.mc_wlen[0] = env->me_psize * NUM_METAS; - my.mc_txn = txn; -- pthread_mutex_lock(&my.mc_mutex); -- while(my.mc_new) -- pthread_cond_wait(&my.mc_cond, &my.mc_mutex); -- pthread_mutex_unlock(&my.mc_mutex); -- rc = mdb_env_cwalk(&my, &txn->mt_dbs[MAIN_DBI].md_root, 0); -- if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle]) -- rc = mdb_env_cthr_toggle(&my, 1); -- mdb_env_cthr_toggle(&my, -1); -- pthread_mutex_lock(&my.mc_mutex); -- while(my.mc_new) -- pthread_cond_wait(&my.mc_cond, &my.mc_mutex); -- pthread_mutex_unlock(&my.mc_mutex); -- THREAD_FINISH(thr); -+ rc = mdb_env_cwalk(&my, &root, 0); -+ if (rc == MDB_SUCCESS && root != new_root) { -+ rc = MDB_INCOMPATIBLE; /* page leak or corrupt DB */ -+ } - -+finish: -+ if (rc) -+ my.mc_error = rc; -+ mdb_env_cthr_toggle(&my, 1 | MDB_EOF); -+ rc = THREAD_FINISH(thr); - mdb_txn_abort(txn); -+ -+done: - #ifdef _WIN32 -- CloseHandle(my.mc_cond); -- CloseHandle(my.mc_mutex); -- _aligned_free(my.mc_wbuf[0]); -+ if (my.mc_wbuf[0]) _aligned_free(my.mc_wbuf[0]); -+ if (my.mc_cond) CloseHandle(my.mc_cond); -+ if (my.mc_mutex) CloseHandle(my.mc_mutex); - #else -+ free(my.mc_wbuf[0]); - pthread_cond_destroy(&my.mc_cond); -+done2: - pthread_mutex_destroy(&my.mc_mutex); -- free(my.mc_wbuf[0]); - #endif -- return rc; -+ return rc ? rc : my.mc_error; - } - - /** Copy environment as-is. */ -@@ -9159,7 +10025,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd) - MDB_txn *txn = NULL; - mdb_mutexref_t wmutex = NULL; - int rc; -- size_t wsize, w3; -+ mdb_size_t wsize, w3; - char *ptr; - #ifdef _WIN32 - DWORD len, w2; -@@ -9220,7 +10086,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd) - - w3 = txn->mt_next_pgno * env->me_psize; - { -- size_t fsize = 0; -+ mdb_size_t fsize = 0; - if ((rc = mdb_fsize(env->me_fd, &fsize))) - goto leave; - if (w3 > fsize) -@@ -9659,11 +10525,19 @@ mdb_drop0(MDB_cursor *mc, int subs) - - /* DUPSORT sub-DBs have no ovpages/DBs. Omit scanning leaves. - * This also avoids any P_LEAF2 pages, which have no nodes. -+ * Also if the DB doesn't have sub-DBs and has no overflow -+ * pages, omit scanning leaves. - */ -- if (mc->mc_flags & C_SUB) -+ if ((mc->mc_flags & C_SUB) || -+ (!subs && !mc->mc_db->md_overflow_pages)) - mdb_cursor_pop(mc); - - mdb_cursor_copy(mc, &mx); -+#ifdef MDB_VL32 -+ /* bump refcount for mx's pages */ -+ for (i=0; i<mc->mc_snum; i++) -+ mdb_page_get(&mx, mc->mc_pg[i]->mp_pgno, &mx.mc_pg[i], NULL); -+#endif - while (mc->mc_snum > 0) { - MDB_page *mp = mc->mc_pg[mc->mc_top]; - unsigned n = NUMKEYS(mp); -@@ -9674,7 +10548,7 @@ mdb_drop0(MDB_cursor *mc, int subs) - MDB_page *omp; - pgno_t pg; - memcpy(&pg, NODEDATA(ni), sizeof(pg)); -- rc = mdb_page_get(txn, pg, &omp, NULL); -+ rc = mdb_page_get(mc, pg, &omp, NULL); - if (rc != 0) - goto done; - mdb_cassert(mc, IS_OVERFLOW(omp)); -@@ -9682,6 +10556,9 @@ mdb_drop0(MDB_cursor *mc, int subs) - pg, omp->mp_pages); - if (rc) - goto done; -+ mc->mc_db->md_overflow_pages -= omp->mp_pages; -+ if (!mc->mc_db->md_overflow_pages && !subs) -+ break; - } else if (subs && (ni->mn_flags & F_SUBDATA)) { - mdb_xcursor_init1(mc, ni); - rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); -@@ -9689,6 +10566,8 @@ mdb_drop0(MDB_cursor *mc, int subs) - goto done; - } - } -+ if (!subs && !mc->mc_db->md_overflow_pages) -+ goto pop; - } else { - if ((rc = mdb_midl_need(&txn->mt_free_pgs, n)) != 0) - goto done; -@@ -9710,6 +10589,7 @@ mdb_drop0(MDB_cursor *mc, int subs) - /* no more siblings, go back to beginning - * of previous level. - */ -+pop: - mdb_cursor_pop(mc); - mc->mc_ki[0] = 0; - for (i=1; i<mc->mc_snum; i++) { -@@ -9723,6 +10603,8 @@ mdb_drop0(MDB_cursor *mc, int subs) - done: - if (rc) - txn->mt_flags |= MDB_TXN_ERROR; -+ /* drop refcount for mx's pages */ -+ MDB_CURSOR_UNREF(&mx, 0); - } else if (rc == MDB_NOTFOUND) { - rc = MDB_SUCCESS; - } -@@ -9842,7 +10724,7 @@ mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) - if (mr[i].mr_pid) { - txnid_t txnid = mr[i].mr_txnid; - sprintf(buf, txnid == (txnid_t)-1 ? -- "%10d %"Z"x -\n" : "%10d %"Z"x %"Z"u\n", -+ "%10d %"Z"x -\n" : "%10d %"Z"x %"Yu"\n", - (int)mr[i].mr_pid, (size_t)mr[i].mr_tid, txnid); - if (first) { - first = 0; -@@ -9947,7 +10829,7 @@ mdb_reader_check0(MDB_env *env, int rlocked, int *dead) - } - for (; j<rdrs; j++) - if (mr[j].mr_pid == pid) { -- DPRINTF(("clear stale reader pid %u txn %"Z"d", -+ DPRINTF(("clear stale reader pid %u txn %"Yd, - (unsigned) pid, mr[j].mr_txnid)); - mr[j].mr_pid = 0; - count++; -diff --git a/libraries/liblmdb/mdb_copy.1 b/libraries/liblmdb/mdb_copy.1 -index 258affb..4387ac3 100644 ---- a/libraries/liblmdb/mdb_copy.1 -+++ b/libraries/liblmdb/mdb_copy.1 -@@ -36,6 +36,7 @@ Write the library version number to the standard output, and exit. - Compact while copying. Only current data pages will be copied; freed - or unused pages will be omitted from the copy. This option will - slow down the backup process as it is more CPU-intensive. -+Currently it fails if the environment has suffered a page leak. - .TP - .BR \-n - Open LDMB environment(s) which do not use subdirectories. -diff --git a/libraries/liblmdb/mdb_dump.c b/libraries/liblmdb/mdb_dump.c -index 17def6b..13084a2 100644 ---- a/libraries/liblmdb/mdb_dump.c -+++ b/libraries/liblmdb/mdb_dump.c -@@ -20,11 +20,7 @@ - #include <signal.h> - #include "lmdb.h" - --#ifdef _WIN32 --#define Z "I" --#else --#define Z "z" --#endif -+#define Yu MDB_PRIy(u) - - #define PRINT 1 - static int mode; -@@ -115,7 +111,7 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) - if (name) - printf("database=%s\n", name); - printf("type=btree\n"); -- printf("mapsize=%" Z "u\n", info.me_mapsize); -+ printf("mapsize=%"Yu"\n", info.me_mapsize); - if (info.me_mapaddr) - printf("mapaddr=%p\n", info.me_mapaddr); - printf("maxreaders=%u\n", info.me_maxreaders); -diff --git a/libraries/liblmdb/mdb_load.c b/libraries/liblmdb/mdb_load.c -index 053cc88..73dfe8c 100644 ---- a/libraries/liblmdb/mdb_load.c -+++ b/libraries/liblmdb/mdb_load.c -@@ -25,7 +25,7 @@ static int mode; - - static char *subname = NULL; - --static size_t lineno; -+static mdb_size_t lineno; - static int version; - - static int flags; -@@ -38,11 +38,7 @@ static MDB_envinfo info; - - static MDB_val kbuf, dbuf; - --#ifdef _WIN32 --#define Z "I" --#else --#define Z "z" --#endif -+#define Yu MDB_PRIy(u) - - #define STRLENOF(s) (sizeof(s)-1) - -@@ -73,7 +69,7 @@ static void readhdr(void) - if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) { - version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION=")); - if (version > 3) { -- fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n", -+ fprintf(stderr, "%s: line %"Yu": unsupported VERSION %d\n", - prog, lineno, version); - exit(EXIT_FAILURE); - } -@@ -83,7 +79,7 @@ static void readhdr(void) - if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print"))) - mode |= PRINT; - else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) { -- fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n", -+ fprintf(stderr, "%s: line %"Yu": unsupported FORMAT %s\n", - prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT=")); - exit(EXIT_FAILURE); - } -@@ -94,7 +90,7 @@ static void readhdr(void) - subname = strdup((char *)dbuf.mv_data+STRLENOF("database=")); - } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) { - if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) { -- fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n", -+ fprintf(stderr, "%s: line %"Yu": unsupported type %s\n", - prog, lineno, (char *)dbuf.mv_data+STRLENOF("type=")); - exit(EXIT_FAILURE); - } -@@ -104,7 +100,7 @@ static void readhdr(void) - if (ptr) *ptr = '\0'; - i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr); - if (i != 1) { -- fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n", -+ fprintf(stderr, "%s: line %"Yu": invalid mapaddr %s\n", - prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr=")); - exit(EXIT_FAILURE); - } -@@ -112,9 +108,10 @@ static void readhdr(void) - int i; - ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); - if (ptr) *ptr = '\0'; -- i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize); -+ i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), -+ "%" MDB_SCNy(u), &info.me_mapsize); - if (i != 1) { -- fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n", -+ fprintf(stderr, "%s: line %"Yu": invalid mapsize %s\n", - prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); - exit(EXIT_FAILURE); - } -@@ -124,7 +121,7 @@ static void readhdr(void) - if (ptr) *ptr = '\0'; - i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders); - if (i != 1) { -- fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n", -+ fprintf(stderr, "%s: line %"Yu": invalid maxreaders %s\n", - prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders=")); - exit(EXIT_FAILURE); - } -@@ -140,12 +137,12 @@ static void readhdr(void) - if (!dbflags[i].bit) { - ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size); - if (!ptr) { -- fprintf(stderr, "%s: line %" Z "d: unexpected format\n", -+ fprintf(stderr, "%s: line %"Yu": unexpected format\n", - prog, lineno); - exit(EXIT_FAILURE); - } else { - *ptr = '\0'; -- fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n", -+ fprintf(stderr, "%s: line %"Yu": unrecognized keyword ignored: %s\n", - prog, lineno, (char *)dbuf.mv_data); - } - } -@@ -155,7 +152,7 @@ static void readhdr(void) - - static void badend(void) - { -- fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n", -+ fprintf(stderr, "%s: line %"Yu": unexpected end of input\n", - prog, lineno); - } - -@@ -213,7 +210,7 @@ badend: - buf->mv_data = realloc(buf->mv_data, buf->mv_size*2); - if (!buf->mv_data) { - Eof = 1; -- fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n", -+ fprintf(stderr, "%s: line %"Yu": out of memory, line too long\n", - prog, lineno); - return EOF; - } -@@ -405,7 +402,7 @@ int main(int argc, char *argv[]) - - rc = readline(&data, &dbuf); - if (rc) { -- fprintf(stderr, "%s: line %" Z "d: failed to read key value\n", prog, lineno); -+ fprintf(stderr, "%s: line %"Yu": failed to read key value\n", prog, lineno); - goto txn_abort; - } - -@@ -420,7 +417,7 @@ int main(int argc, char *argv[]) - if (batch == 100) { - rc = mdb_txn_commit(txn); - if (rc) { -- fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", -+ fprintf(stderr, "%s: line %"Yu": txn_commit: %s\n", - prog, lineno, mdb_strerror(rc)); - goto env_close; - } -@@ -440,7 +437,7 @@ int main(int argc, char *argv[]) - rc = mdb_txn_commit(txn); - txn = NULL; - if (rc) { -- fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", -+ fprintf(stderr, "%s: line %"Yu": txn_commit: %s\n", - prog, lineno, mdb_strerror(rc)); - goto env_close; - } -diff --git a/libraries/liblmdb/mdb_stat.c b/libraries/liblmdb/mdb_stat.c -index a5cda2f..0343a65 100644 ---- a/libraries/liblmdb/mdb_stat.c -+++ b/libraries/liblmdb/mdb_stat.c -@@ -17,11 +17,8 @@ - #include <unistd.h> - #include "lmdb.h" - --#ifdef _WIN32 --#define Z "I" --#else --#define Z "z" --#endif -+#define Z MDB_FMT_Z -+#define Yu MDB_PRIy(u) - - static void prstat(MDB_stat *ms) - { -@@ -29,10 +26,10 @@ static void prstat(MDB_stat *ms) - printf(" Page size: %u\n", ms->ms_psize); - #endif - printf(" Tree depth: %u\n", ms->ms_depth); -- printf(" Branch pages: %"Z"u\n", ms->ms_branch_pages); -- printf(" Leaf pages: %"Z"u\n", ms->ms_leaf_pages); -- printf(" Overflow pages: %"Z"u\n", ms->ms_overflow_pages); -- printf(" Entries: %"Z"u\n", ms->ms_entries); -+ printf(" Branch pages: %"Yu"\n", ms->ms_branch_pages); -+ printf(" Leaf pages: %"Yu"\n", ms->ms_leaf_pages); -+ printf(" Overflow pages: %"Yu"\n", ms->ms_overflow_pages); -+ printf(" Entries: %"Yu"\n", ms->ms_entries); - } - - static void usage(char *prog) -@@ -125,11 +122,11 @@ int main(int argc, char *argv[]) - (void)mdb_env_info(env, &mei); - printf("Environment Info\n"); - printf(" Map address: %p\n", mei.me_mapaddr); -- printf(" Map size: %"Z"u\n", mei.me_mapsize); -+ printf(" Map size: %"Yu"\n", mei.me_mapsize); - printf(" Page size: %u\n", mst.ms_psize); -- printf(" Max pages: %"Z"u\n", mei.me_mapsize / mst.ms_psize); -- printf(" Number of pages used: %"Z"u\n", mei.me_last_pgno+1); -- printf(" Last transaction ID: %"Z"u\n", mei.me_last_txnid); -+ printf(" Max pages: %"Yu"\n", mei.me_mapsize / mst.ms_psize); -+ printf(" Number of pages used: %"Yu"\n", mei.me_last_pgno+1); -+ printf(" Last transaction ID: %"Yu"\n", mei.me_last_txnid); - printf(" Max readers: %u\n", mei.me_maxreaders); - printf(" Number of readers used: %u\n", mei.me_numreaders); - } -@@ -156,7 +153,7 @@ int main(int argc, char *argv[]) - if (freinfo) { - MDB_cursor *cursor; - MDB_val key, data; -- size_t pages = 0, *iptr; -+ mdb_size_t pages = 0, *iptr; - - printf("Freelist Status\n"); - dbi = 0; -@@ -176,7 +173,7 @@ int main(int argc, char *argv[]) - pages += *iptr; - if (freinfo > 1) { - char *bad = ""; -- size_t pg, prev; -+ mdb_size_t pg, prev; - ssize_t i, j, span = 0; - j = *iptr++; - for (i = j, prev = 1; --i >= 0; ) { -@@ -187,20 +184,20 @@ int main(int argc, char *argv[]) - pg += span; - for (; i >= span && iptr[i-span] == pg; span++, pg++) ; - } -- printf(" Transaction %"Z"u, %"Z"d pages, maxspan %"Z"d%s\n", -- *(size_t *)key.mv_data, j, span, bad); -+ printf(" Transaction %"Yu", %"Z"d pages, maxspan %"Z"d%s\n", -+ *(mdb_size_t *)key.mv_data, j, span, bad); - if (freinfo > 2) { - for (--j; j >= 0; ) { - pg = iptr[j]; - for (span=1; --j >= 0 && iptr[j] == pg+span; span++) ; -- printf(span>1 ? " %9"Z"u[%"Z"d]\n" : " %9"Z"u\n", -+ printf(span>1 ? " %9"Yu"[%"Z"d]\n" : " %9"Yu"\n", - pg, span); - } - } - } - } - mdb_cursor_close(cursor); -- printf(" Free pages: %"Z"u\n", pages); -+ printf(" Free pages: %"Yu"\n", pages); - } - - rc = mdb_open(txn, subname, 0, &dbi); -diff --git a/libraries/liblmdb/midl.c b/libraries/liblmdb/midl.c -index 5c6d841..9748d8d 100644 ---- a/libraries/liblmdb/midl.c -+++ b/libraries/liblmdb/midl.c -@@ -3,7 +3,7 @@ - /* $OpenLDAP$ */ - /* This work is part of OpenLDAP Software <http://www.openldap.org/>. - * -- * Copyright 2000-2015 The OpenLDAP Foundation. -+ * Copyright 2000-2016 The OpenLDAP Foundation. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without -@@ -354,5 +354,67 @@ int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ) - return 0; - } - -+#ifdef MDB_VL32 -+unsigned mdb_mid3l_search( MDB_ID3L ids, MDB_ID id ) -+{ -+ /* -+ * binary search of id in ids -+ * if found, returns position of id -+ * if not found, returns first position greater than id -+ */ -+ unsigned base = 0; -+ unsigned cursor = 1; -+ int val = 0; -+ unsigned n = (unsigned)ids[0].mid; -+ -+ while( 0 < n ) { -+ unsigned pivot = n >> 1; -+ cursor = base + pivot + 1; -+ val = CMP( id, ids[cursor].mid ); -+ -+ if( val < 0 ) { -+ n = pivot; -+ -+ } else if ( val > 0 ) { -+ base = cursor; -+ n -= pivot + 1; -+ -+ } else { -+ return cursor; -+ } -+ } -+ -+ if( val > 0 ) { -+ ++cursor; -+ } -+ return cursor; -+} -+ -+int mdb_mid3l_insert( MDB_ID3L ids, MDB_ID3 *id ) -+{ -+ unsigned x, i; -+ -+ x = mdb_mid3l_search( ids, id->mid ); -+ -+ if( x < 1 ) { -+ /* internal error */ -+ return -2; -+ } -+ -+ if ( x <= ids[0].mid && ids[x].mid == id->mid ) { -+ /* duplicate */ -+ return -1; -+ } -+ -+ /* insert id */ -+ ids[0].mid++; -+ for (i=(unsigned)ids[0].mid; i>x; i--) -+ ids[i] = ids[i-1]; -+ ids[x] = *id; -+ -+ return 0; -+} -+#endif /* MDB_VL32 */ -+ - /** @} */ - /** @} */ -diff --git a/libraries/liblmdb/midl.h b/libraries/liblmdb/midl.h -index 2331e78..dc532c4 100644 ---- a/libraries/liblmdb/midl.h -+++ b/libraries/liblmdb/midl.h -@@ -11,7 +11,7 @@ - /* $OpenLDAP$ */ - /* This work is part of OpenLDAP Software <http://www.openldap.org/>. - * -- * Copyright 2000-2015 The OpenLDAP Foundation. -+ * Copyright 2000-2016 The OpenLDAP Foundation. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without -@@ -26,7 +26,7 @@ - #ifndef _MDB_MIDL_H_ - #define _MDB_MIDL_H_ - --#include <stddef.h> -+#include "lmdb.h" - - #ifdef __cplusplus - extern "C" { -@@ -42,7 +42,7 @@ extern "C" { - /** A generic unsigned ID number. These were entryIDs in back-bdb. - * Preferably it should have the same size as a pointer. - */ --typedef size_t MDB_ID; -+typedef mdb_size_t MDB_ID; - - /** An IDL is an ID List, a sorted array of IDs. The first - * element of the array is a counter for how many actual -@@ -55,7 +55,11 @@ typedef MDB_ID *MDB_IDL; - /* IDL sizes - likely should be even bigger - * limiting factors: sizeof(ID), thread stack size - */ -+#ifdef MDB_VL32 -+#define MDB_IDL_LOGN 14 /* DB_SIZE is 2^14, UM_SIZE is 2^15 */ -+#else - #define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ -+#endif - #define MDB_IDL_DB_SIZE (1<<MDB_IDL_LOGN) - #define MDB_IDL_UM_SIZE (1<<(MDB_IDL_LOGN+1)) - -@@ -177,6 +181,20 @@ int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id ); - */ - int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ); - -+#ifdef MDB_VL32 -+typedef struct MDB_ID3 { -+ MDB_ID mid; /**< The ID */ -+ void *mptr; /**< The pointer */ -+ unsigned int mcnt; /**< Number of pages */ -+ unsigned int mref; /**< Refcounter */ -+} MDB_ID3; -+ -+typedef MDB_ID3 *MDB_ID3L; -+ -+unsigned mdb_mid3l_search( MDB_ID3L ids, MDB_ID id ); -+int mdb_mid3l_insert( MDB_ID3L ids, MDB_ID3 *id ); -+ -+#endif /* MDB_VL32 */ - /** @} */ - /** @} */ - #ifdef __cplusplus diff --git a/ports/lmdb/portfile.cmake b/ports/lmdb/portfile.cmake index 064f7cff6..4638e8b7e 100644 --- a/ports/lmdb/portfile.cmake +++ b/ports/lmdb/portfile.cmake @@ -6,7 +6,6 @@ vcpkg_from_github( REF LMDB_0.9.23 SHA512 47466a96ce288d18d489acf1abf811aa973649848a4cac31f71e1f25ea781a055ebd6616d2d630214b2df2f146f12609c82d65be0196f49d6b46a6c96464e120 HEAD_REF master - PATCHES lmdb_45a88275d2a410e683bae4ef44881e0f55fa3c4d.patch ) file(COPY ${CMAKE_CURRENT_LIST_DIR}/cmake/ DESTINATION ${SOURCE_PATH}/libraries/liblmdb) |
