[kernel/f20] Add keyring patches to support krb5 (rhbz 1003043)

Josh Boyer jwboyer at fedoraproject.org
Tue Sep 3 18:20:16 UTC 2013


commit ee903899ecc21e320d387d1a55b97b2dbf3aaf1c
Author: Josh Boyer <jwboyer at redhat.com>
Date:   Tue Sep 3 14:17:59 2013 -0400

    Add keyring patches to support krb5 (rhbz 1003043)

 config-generic            |    2 +
 kernel.spec               |   11 +
 keys-expand-keyring.patch | 6834 +++++++++++++++++++++++++++++++++++++++++++++
 keys-krb-support.patch    |  747 +++++
 4 files changed, 7594 insertions(+), 0 deletions(-)
---
diff --git a/config-generic b/config-generic
index 57d7d9a..0acca31 100644
--- a/config-generic
+++ b/config-generic
@@ -4203,6 +4203,8 @@ CONFIG_ZLIB_DEFLATE=m
 
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_KEYS=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=m
 CONFIG_TRUSTED_KEYS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_KEYS_DEBUG_PROC_KEYS=y
diff --git a/kernel.spec b/kernel.spec
index 1272ec1..118d486 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -646,6 +646,10 @@ Patch800: crash-driver.patch
 
 # crypto/
 
+# keys
+Patch900: keys-expand-keyring.patch
+Patch901: keys-krb-support.patch
+
 # secure boot
 Patch1000: secure-modules.patch
 Patch1001: modsign-uefi.patch
@@ -1379,6 +1383,10 @@ ApplyPatch crash-driver.patch
 
 # crypto/
 
+# keys
+ApplyPatch keys-expand-keyring.patch
+ApplyPatch keys-krb-support.patch
+
 # secure boot
 ApplyPatch secure-modules.patch
 ApplyPatch modsign-uefi.patch
@@ -2257,6 +2265,9 @@ fi
 #                 ||----w |
 #                 ||     ||
 %changelog
+* Tue Sep 03 2013 Josh Boyer <jwboyer at fedoraproject.org>
+- Add keyring patches to support krb5 (rhbz 1003043)
+
 * Tue Sep 03 2013 Kyle McMartin <kyle at redhat.com>
 - [arm64] disable VGA_CONSOLE and PARPORT_PC
 - [arm64] install dtb as on %{arm}
diff --git a/keys-expand-keyring.patch b/keys-expand-keyring.patch
new file mode 100644
index 0000000..7561824
--- /dev/null
+++ b/keys-expand-keyring.patch
@@ -0,0 +1,6834 @@
+From 96dcf8e91389e509021448ffd798cc68471fcf0f Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:50 +0100
+Subject: [PATCH 01/10] KEYS: Skip key state checks when checking for
+ possession
+
+Skip key state checks (invalidation, revocation and expiration) when checking
+for possession.  Without this, keys that have been marked invalid, revoked
+keys and expired keys are not given a possession attribute - which means the
+possessor is not granted any possession permits and cannot do anything with
+them unless they also have one a user, group or other permit.
+
+This causes failures in the keyutils test suite's revocation and expiration
+tests now that commit 96b5c8fea6c0861621051290d705ec2e971963f1 reduced the
+initial permissions granted to a key.
+
+The failures are due to accesses to revoked and expired keys being given
+EACCES instead of EKEYREVOKED or EKEYEXPIRED.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+---
+ security/keys/internal.h         | 1 +
+ security/keys/process_keys.c     | 8 +++++---
+ security/keys/request_key.c      | 6 ++++--
+ security/keys/request_key_auth.c | 2 +-
+ 4 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index d4f1468..df971fe 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -124,6 +124,7 @@ extern key_ref_t search_my_process_keyrings(struct key_type *type,
+ extern key_ref_t search_process_keyrings(struct key_type *type,
+ 					 const void *description,
+ 					 key_match_func_t match,
++					 bool no_state_check,
+ 					 const struct cred *cred);
+ 
+ extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
+diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
+index 42defae..a3410d6 100644
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -440,6 +440,7 @@ found:
+ key_ref_t search_process_keyrings(struct key_type *type,
+ 				  const void *description,
+ 				  key_match_func_t match,
++				  bool no_state_check,
+ 				  const struct cred *cred)
+ {
+ 	struct request_key_auth *rka;
+@@ -448,7 +449,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
+ 	might_sleep();
+ 
+ 	key_ref = search_my_process_keyrings(type, description, match,
+-					     false, cred);
++					     no_state_check, cred);
+ 	if (!IS_ERR(key_ref))
+ 		goto found;
+ 	err = key_ref;
+@@ -468,7 +469,8 @@ key_ref_t search_process_keyrings(struct key_type *type,
+ 			rka = cred->request_key_auth->payload.data;
+ 
+ 			key_ref = search_process_keyrings(type, description,
+-							  match, rka->cred);
++							  match, no_state_check,
++							  rka->cred);
+ 
+ 			up_read(&cred->request_key_auth->sem);
+ 
+@@ -675,7 +677,7 @@ try_again:
+ 		/* check to see if we possess the key */
+ 		skey_ref = search_process_keyrings(key->type, key,
+ 						   lookup_user_key_possessed,
+-						   cred);
++						   true, cred);
+ 
+ 		if (!IS_ERR(skey_ref)) {
+ 			key_put(key);
+diff --git a/security/keys/request_key.c b/security/keys/request_key.c
+index c411f9b..172115b 100644
+--- a/security/keys/request_key.c
++++ b/security/keys/request_key.c
+@@ -390,7 +390,8 @@ static int construct_alloc_key(struct key_type *type,
+ 	 * waited for locks */
+ 	mutex_lock(&key_construction_mutex);
+ 
+-	key_ref = search_process_keyrings(type, description, type->match, cred);
++	key_ref = search_process_keyrings(type, description, type->match,
++					  false, cred);
+ 	if (!IS_ERR(key_ref))
+ 		goto key_already_present;
+ 
+@@ -539,7 +540,8 @@ struct key *request_key_and_link(struct key_type *type,
+ 	       dest_keyring, flags);
+ 
+ 	/* search all the process keyrings for a key */
+-	key_ref = search_process_keyrings(type, description, type->match, cred);
++	key_ref = search_process_keyrings(type, description, type->match,
++					  false, cred);
+ 
+ 	if (!IS_ERR(key_ref)) {
+ 		key = key_ref_to_ptr(key_ref);
+diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
+index 85730d5..92077de 100644
+--- a/security/keys/request_key_auth.c
++++ b/security/keys/request_key_auth.c
+@@ -247,7 +247,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id)
+ 		&key_type_request_key_auth,
+ 		(void *) (unsigned long) target_id,
+ 		key_get_instantiation_authkey_match,
+-		cred);
++		false, cred);
+ 
+ 	if (IS_ERR(authkey_ref)) {
+ 		authkey = ERR_CAST(authkey_ref);
+-- 
+1.8.3.1
+
+
+From 9b1294158dd1fbca78541b5d55c057e46b1a9ca2 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:51 +0100
+Subject: [PATCH 02/10] KEYS: Use bool in make_key_ref() and is_key_possessed()
+
+Make make_key_ref() take a bool possession parameter and make
+is_key_possessed() return a bool.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+---
+ Documentation/security/keys.txt | 7 +++----
+ include/linux/key.h             | 4 ++--
+ security/keys/keyring.c         | 5 +++--
+ 3 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
+index 7b4145d..9ede670 100644
+--- a/Documentation/security/keys.txt
++++ b/Documentation/security/keys.txt
+@@ -865,15 +865,14 @@ encountered:
+      calling processes has a searchable link to the key from one of its
+      keyrings. There are three functions for dealing with these:
+ 
+-	key_ref_t make_key_ref(const struct key *key,
+-			       unsigned long possession);
++	key_ref_t make_key_ref(const struct key *key, bool possession);
+ 
+ 	struct key *key_ref_to_ptr(const key_ref_t key_ref);
+ 
+-	unsigned long is_key_possessed(const key_ref_t key_ref);
++	bool is_key_possessed(const key_ref_t key_ref);
+ 
+      The first function constructs a key reference from a key pointer and
+-     possession information (which must be 0 or 1 and not any other value).
++     possession information (which must be true or false).
+ 
+      The second function retrieves the key pointer from a reference and the
+      third retrieves the possession flag.
+diff --git a/include/linux/key.h b/include/linux/key.h
+index 4dfde11..51bce29 100644
+--- a/include/linux/key.h
++++ b/include/linux/key.h
+@@ -99,7 +99,7 @@ struct keyring_name;
+ typedef struct __key_reference_with_attributes *key_ref_t;
+ 
+ static inline key_ref_t make_key_ref(const struct key *key,
+-				     unsigned long possession)
++				     bool possession)
+ {
+ 	return (key_ref_t) ((unsigned long) key | possession);
+ }
+@@ -109,7 +109,7 @@ static inline struct key *key_ref_to_ptr(const key_ref_t key_ref)
+ 	return (struct key *) ((unsigned long) key_ref & ~1UL);
+ }
+ 
+-static inline unsigned long is_key_possessed(const key_ref_t key_ref)
++static inline bool is_key_possessed(const key_ref_t key_ref)
+ {
+ 	return (unsigned long) key_ref & 1UL;
+ }
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index 6ece7f2..f784063 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -329,9 +329,10 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+ 
+ 	struct keyring_list *keylist;
+ 	struct timespec now;
+-	unsigned long possessed, kflags;
++	unsigned long kflags;
+ 	struct key *keyring, *key;
+ 	key_ref_t key_ref;
++	bool possessed;
+ 	long err;
+ 	int sp, nkeys, kix;
+ 
+@@ -542,8 +543,8 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+ 			       key_perm_t perm)
+ {
+ 	struct keyring_list *klist;
+-	unsigned long possessed;
+ 	struct key *keyring, *key;
++	bool possessed;
+ 	int nkeys, loop;
+ 
+ 	keyring = key_ref_to_ptr(keyring_ref);
+-- 
+1.8.3.1
+
+
+From 4a7e7536b9b728f1d912d0e4c047c885c95e13a1 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:51 +0100
+Subject: [PATCH 03/10] KEYS: key_is_dead() should take a const key pointer
+ argument
+
+key_is_dead() should take a const key pointer argument as it doesn't modify
+what it points to.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+---
+ security/keys/internal.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index df971fe..490aef5 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -203,7 +203,7 @@ extern struct key *key_get_instantiation_authkey(key_serial_t target_id);
+ /*
+  * Determine whether a key is dead.
+  */
+-static inline bool key_is_dead(struct key *key, time_t limit)
++static inline bool key_is_dead(const struct key *key, time_t limit)
+ {
+ 	return
+ 		key->flags & ((1 << KEY_FLAG_DEAD) |
+-- 
+1.8.3.1
+
+
+From 9007a0a7f8c135f0085e46db277de0cf7b944403 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:52 +0100
+Subject: [PATCH 04/10] KEYS: Consolidate the concept of an 'index key' for key
+ access
+
+Consolidate the concept of an 'index key' for accessing keys.  The index key
+is the search term needed to find a key directly - basically the key type and
+the key description.  We can add to that the description length.
+
+This will be useful when turning a keyring into an associative array rather
+than just a pointer block.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+---
+ include/linux/key.h         | 21 +++++++++----
+ security/keys/internal.h    |  8 ++---
+ security/keys/key.c         | 72 +++++++++++++++++++++++----------------------
+ security/keys/keyring.c     | 37 +++++++++++------------
+ security/keys/request_key.c | 12 +++++---
+ 5 files changed, 83 insertions(+), 67 deletions(-)
+
+diff --git a/include/linux/key.h b/include/linux/key.h
+index 51bce29..d573e82 100644
+--- a/include/linux/key.h
++++ b/include/linux/key.h
+@@ -82,6 +82,12 @@ struct key_owner;
+ struct keyring_list;
+ struct keyring_name;
+ 
++struct keyring_index_key {
++	struct key_type		*type;
++	const char		*description;
++	size_t			desc_len;
++};
++
+ /*****************************************************************************/
+ /*
+  * key reference with possession attribute handling
+@@ -129,7 +135,6 @@ struct key {
+ 		struct list_head graveyard_link;
+ 		struct rb_node	serial_node;
+ 	};
+-	struct key_type		*type;		/* type of key */
+ 	struct rw_semaphore	sem;		/* change vs change sem */
+ 	struct key_user		*user;		/* owner of this key */
+ 	void			*security;	/* security data for this key */
+@@ -163,12 +168,18 @@ struct key {
+ #define KEY_FLAG_ROOT_CAN_CLEAR	6	/* set if key can be cleared by root without permission */
+ #define KEY_FLAG_INVALIDATED	7	/* set if key has been invalidated */
+ 
+-	/* the description string
+-	 * - this is used to match a key against search criteria
+-	 * - this should be a printable string
++	/* the key type and key description string
++	 * - the desc is used to match a key against search criteria
++	 * - it should be a printable string
+ 	 * - eg: for krb5 AFS, this might be "afs at REDHAT.COM"
+ 	 */
+-	char			*description;
++	union {
++		struct keyring_index_key index_key;
++		struct {
++			struct key_type	*type;		/* type of key */
++			char		*description;
++		};
++	};
+ 
+ 	/* type specific data
+ 	 * - this is used by the keyring type to index the name
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index 490aef5..77441dd 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -89,19 +89,17 @@ extern struct key_type *key_type_lookup(const char *type);
+ extern void key_type_put(struct key_type *ktype);
+ 
+ extern int __key_link_begin(struct key *keyring,
+-			    const struct key_type *type,
+-			    const char *description,
++			    const struct keyring_index_key *index_key,
+ 			    unsigned long *_prealloc);
+ extern int __key_link_check_live_key(struct key *keyring, struct key *key);
+ extern void __key_link(struct key *keyring, struct key *key,
+ 		       unsigned long *_prealloc);
+ extern void __key_link_end(struct key *keyring,
+-			   struct key_type *type,
++			   const struct keyring_index_key *index_key,
+ 			   unsigned long prealloc);
+ 
+ extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+-				      const struct key_type *type,
+-				      const char *description,
++				      const struct keyring_index_key *index_key,
+ 				      key_perm_t perm);
+ 
+ extern struct key *keyring_search_instkey(struct key *keyring,
+diff --git a/security/keys/key.c b/security/keys/key.c
+index 8fb7c7b..7e6bc39 100644
+--- a/security/keys/key.c
++++ b/security/keys/key.c
+@@ -242,8 +242,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
+ 		}
+ 	}
+ 
+-	desclen = strlen(desc) + 1;
+-	quotalen = desclen + type->def_datalen;
++	desclen = strlen(desc);
++	quotalen = desclen + 1 + type->def_datalen;
+ 
+ 	/* get hold of the key tracking for this user */
+ 	user = key_user_lookup(uid);
+@@ -277,7 +277,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
+ 		goto no_memory_2;
+ 
+ 	if (desc) {
+-		key->description = kmemdup(desc, desclen, GFP_KERNEL);
++		key->index_key.desc_len = desclen;
++		key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
+ 		if (!key->description)
+ 			goto no_memory_3;
+ 	}
+@@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
+ 	atomic_set(&key->usage, 1);
+ 	init_rwsem(&key->sem);
+ 	lockdep_set_class(&key->sem, &type->lock_class);
+-	key->type = type;
++	key->index_key.type = type;
+ 	key->user = user;
+ 	key->quotalen = quotalen;
+ 	key->datalen = type->def_datalen;
+@@ -489,8 +490,7 @@ int key_instantiate_and_link(struct key *key,
+ 	}
+ 
+ 	if (keyring) {
+-		ret = __key_link_begin(keyring, key->type, key->description,
+-				       &prealloc);
++		ret = __key_link_begin(keyring, &key->index_key, &prealloc);
+ 		if (ret < 0)
+ 			goto error_free_preparse;
+ 	}
+@@ -499,7 +499,7 @@ int key_instantiate_and_link(struct key *key,
+ 					 &prealloc);
+ 
+ 	if (keyring)
+-		__key_link_end(keyring, key->type, prealloc);
++		__key_link_end(keyring, &key->index_key, prealloc);
+ 
+ error_free_preparse:
+ 	if (key->type->preparse)
+@@ -548,8 +548,7 @@ int key_reject_and_link(struct key *key,
+ 	ret = -EBUSY;
+ 
+ 	if (keyring)
+-		link_ret = __key_link_begin(keyring, key->type,
+-					    key->description, &prealloc);
++		link_ret = __key_link_begin(keyring, &key->index_key, &prealloc);
+ 
+ 	mutex_lock(&key_construction_mutex);
+ 
+@@ -581,7 +580,7 @@ int key_reject_and_link(struct key *key,
+ 	mutex_unlock(&key_construction_mutex);
+ 
+ 	if (keyring)
+-		__key_link_end(keyring, key->type, prealloc);
++		__key_link_end(keyring, &key->index_key, prealloc);
+ 
+ 	/* wake up anyone waiting for a key to be constructed */
+ 	if (awaken)
+@@ -780,25 +779,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 			       key_perm_t perm,
+ 			       unsigned long flags)
+ {
+-	unsigned long prealloc;
++	struct keyring_index_key index_key = {
++		.description	= description,
++	};
+ 	struct key_preparsed_payload prep;
+ 	const struct cred *cred = current_cred();
+-	struct key_type *ktype;
++	unsigned long prealloc;
+ 	struct key *keyring, *key = NULL;
+ 	key_ref_t key_ref;
+ 	int ret;
+ 
+ 	/* look up the key type to see if it's one of the registered kernel
+ 	 * types */
+-	ktype = key_type_lookup(type);
+-	if (IS_ERR(ktype)) {
++	index_key.type = key_type_lookup(type);
++	if (IS_ERR(index_key.type)) {
+ 		key_ref = ERR_PTR(-ENODEV);
+ 		goto error;
+ 	}
+ 
+ 	key_ref = ERR_PTR(-EINVAL);
+-	if (!ktype->match || !ktype->instantiate ||
+-	    (!description && !ktype->preparse))
++	if (!index_key.type->match || !index_key.type->instantiate ||
++	    (!index_key.description && !index_key.type->preparse))
+ 		goto error_put_type;
+ 
+ 	keyring = key_ref_to_ptr(keyring_ref);
+@@ -812,21 +813,22 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 	memset(&prep, 0, sizeof(prep));
+ 	prep.data = payload;
+ 	prep.datalen = plen;
+-	prep.quotalen = ktype->def_datalen;
+-	if (ktype->preparse) {
+-		ret = ktype->preparse(&prep);
++	prep.quotalen = index_key.type->def_datalen;
++	if (index_key.type->preparse) {
++		ret = index_key.type->preparse(&prep);
+ 		if (ret < 0) {
+ 			key_ref = ERR_PTR(ret);
+ 			goto error_put_type;
+ 		}
+-		if (!description)
+-			description = prep.description;
++		if (!index_key.description)
++			index_key.description = prep.description;
+ 		key_ref = ERR_PTR(-EINVAL);
+-		if (!description)
++		if (!index_key.description)
+ 			goto error_free_prep;
+ 	}
++	index_key.desc_len = strlen(index_key.description);
+ 
+-	ret = __key_link_begin(keyring, ktype, description, &prealloc);
++	ret = __key_link_begin(keyring, &index_key, &prealloc);
+ 	if (ret < 0) {
+ 		key_ref = ERR_PTR(ret);
+ 		goto error_free_prep;
+@@ -844,9 +846,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 	 * key of the same type and description in the destination keyring and
+ 	 * update that instead if possible
+ 	 */
+-	if (ktype->update) {
+-		key_ref = __keyring_search_one(keyring_ref, ktype, description,
+-					       0);
++	if (index_key.type->update) {
++		key_ref = __keyring_search_one(keyring_ref, &index_key, 0);
+ 		if (!IS_ERR(key_ref))
+ 			goto found_matching_key;
+ 	}
+@@ -856,16 +857,17 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 		perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
+ 		perm |= KEY_USR_VIEW;
+ 
+-		if (ktype->read)
++		if (index_key.type->read)
+ 			perm |= KEY_POS_READ;
+ 
+-		if (ktype == &key_type_keyring || ktype->update)
++		if (index_key.type == &key_type_keyring ||
++		    index_key.type->update)
+ 			perm |= KEY_POS_WRITE;
+ 	}
+ 
+ 	/* allocate a new key */
+-	key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred,
+-			perm, flags);
++	key = key_alloc(index_key.type, index_key.description,
++			cred->fsuid, cred->fsgid, cred, perm, flags);
+ 	if (IS_ERR(key)) {
+ 		key_ref = ERR_CAST(key);
+ 		goto error_link_end;
+@@ -882,12 +884,12 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 	key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
+ 
+ error_link_end:
+-	__key_link_end(keyring, ktype, prealloc);
++	__key_link_end(keyring, &index_key, prealloc);
+ error_free_prep:
+-	if (ktype->preparse)
+-		ktype->free_preparse(&prep);
++	if (index_key.type->preparse)
++		index_key.type->free_preparse(&prep);
+ error_put_type:
+-	key_type_put(ktype);
++	key_type_put(index_key.type);
+ error:
+ 	return key_ref;
+ 
+@@ -895,7 +897,7 @@ error:
+ 	/* we found a matching key, so we're going to try to update it
+ 	 * - we can drop the locks first as we have the key pinned
+ 	 */
+-	__key_link_end(keyring, ktype, prealloc);
++	__key_link_end(keyring, &index_key, prealloc);
+ 
+ 	key_ref = __key_update(key_ref, &prep);
+ 	goto error_free_prep;
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index f784063..c7f59f9 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -538,8 +538,7 @@ EXPORT_SYMBOL(keyring_search);
+  * to the returned key reference.
+  */
+ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+-			       const struct key_type *ktype,
+-			       const char *description,
++			       const struct keyring_index_key *index_key,
+ 			       key_perm_t perm)
+ {
+ 	struct keyring_list *klist;
+@@ -558,9 +557,9 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+ 		smp_rmb();
+ 		for (loop = 0; loop < nkeys ; loop++) {
+ 			key = rcu_dereference(klist->keys[loop]);
+-			if (key->type == ktype &&
++			if (key->type == index_key->type &&
+ 			    (!key->type->match ||
+-			     key->type->match(key, description)) &&
++			     key->type->match(key, index_key->description)) &&
+ 			    key_permission(make_key_ref(key, possessed),
+ 					   perm) == 0 &&
+ 			    !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+@@ -747,8 +746,8 @@ static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
+ /*
+  * Preallocate memory so that a key can be linked into to a keyring.
+  */
+-int __key_link_begin(struct key *keyring, const struct key_type *type,
+-		     const char *description, unsigned long *_prealloc)
++int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key,
++		     unsigned long *_prealloc)
+ 	__acquires(&keyring->sem)
+ 	__acquires(&keyring_serialise_link_sem)
+ {
+@@ -759,7 +758,8 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
+ 	size_t size;
+ 	int loop, lru, ret;
+ 
+-	kenter("%d,%s,%s,", key_serial(keyring), type->name, description);
++	kenter("%d,%s,%s,",
++	       key_serial(keyring), index_key->type->name, index_key->description);
+ 
+ 	if (keyring->type != &key_type_keyring)
+ 		return -ENOTDIR;
+@@ -772,7 +772,7 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
+ 
+ 	/* serialise link/link calls to prevent parallel calls causing a cycle
+ 	 * when linking two keyring in opposite orders */
+-	if (type == &key_type_keyring)
++	if (index_key->type == &key_type_keyring)
+ 		down_write(&keyring_serialise_link_sem);
+ 
+ 	klist = rcu_dereference_locked_keyring(keyring);
+@@ -784,8 +784,8 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
+ 		for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+ 			struct key *key = rcu_deref_link_locked(klist, loop,
+ 								keyring);
+-			if (key->type == type &&
+-			    strcmp(key->description, description) == 0) {
++			if (key->type == index_key->type &&
++			    strcmp(key->description, index_key->description) == 0) {
+ 				/* Found a match - we'll replace the link with
+ 				 * one to the new key.  We record the slot
+ 				 * position.
+@@ -865,7 +865,7 @@ error_quota:
+ 	key_payload_reserve(keyring,
+ 			    keyring->datalen - KEYQUOTA_LINK_BYTES);
+ error_sem:
+-	if (type == &key_type_keyring)
++	if (index_key->type == &key_type_keyring)
+ 		up_write(&keyring_serialise_link_sem);
+ error_krsem:
+ 	up_write(&keyring->sem);
+@@ -957,16 +957,17 @@ void __key_link(struct key *keyring, struct key *key,
+  *
+  * Must be called with __key_link_begin() having being called.
+  */
+-void __key_link_end(struct key *keyring, struct key_type *type,
++void __key_link_end(struct key *keyring,
++		    const struct keyring_index_key *index_key,
+ 		    unsigned long prealloc)
+ 	__releases(&keyring->sem)
+ 	__releases(&keyring_serialise_link_sem)
+ {
+-	BUG_ON(type == NULL);
+-	BUG_ON(type->name == NULL);
+-	kenter("%d,%s,%lx", keyring->serial, type->name, prealloc);
++	BUG_ON(index_key->type == NULL);
++	BUG_ON(index_key->type->name == NULL);
++	kenter("%d,%s,%lx", keyring->serial, index_key->type->name, prealloc);
+ 
+-	if (type == &key_type_keyring)
++	if (index_key->type == &key_type_keyring)
+ 		up_write(&keyring_serialise_link_sem);
+ 
+ 	if (prealloc) {
+@@ -1007,12 +1008,12 @@ int key_link(struct key *keyring, struct key *key)
+ 	key_check(keyring);
+ 	key_check(key);
+ 
+-	ret = __key_link_begin(keyring, key->type, key->description, &prealloc);
++	ret = __key_link_begin(keyring, &key->index_key, &prealloc);
+ 	if (ret == 0) {
+ 		ret = __key_link_check_live_key(keyring, key);
+ 		if (ret == 0)
+ 			__key_link(keyring, key, &prealloc);
+-		__key_link_end(keyring, key->type, prealloc);
++		__key_link_end(keyring, &key->index_key, prealloc);
+ 	}
+ 
+ 	return ret;
+diff --git a/security/keys/request_key.c b/security/keys/request_key.c
+index 172115b..586cb79 100644
+--- a/security/keys/request_key.c
++++ b/security/keys/request_key.c
+@@ -352,6 +352,11 @@ static int construct_alloc_key(struct key_type *type,
+ 			       struct key_user *user,
+ 			       struct key **_key)
+ {
++	const struct keyring_index_key index_key = {
++		.type		= type,
++		.description	= description,
++		.desc_len	= strlen(description),
++	};
+ 	const struct cred *cred = current_cred();
+ 	unsigned long prealloc;
+ 	struct key *key;
+@@ -379,8 +384,7 @@ static int construct_alloc_key(struct key_type *type,
+ 	set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
+ 
+ 	if (dest_keyring) {
+-		ret = __key_link_begin(dest_keyring, type, description,
+-				       &prealloc);
++		ret = __key_link_begin(dest_keyring, &index_key, &prealloc);
+ 		if (ret < 0)
+ 			goto link_prealloc_failed;
+ 	}
+@@ -400,7 +404,7 @@ static int construct_alloc_key(struct key_type *type,
+ 
+ 	mutex_unlock(&key_construction_mutex);
+ 	if (dest_keyring)
+-		__key_link_end(dest_keyring, type, prealloc);
++		__key_link_end(dest_keyring, &index_key, prealloc);
+ 	mutex_unlock(&user->cons_lock);
+ 	*_key = key;
+ 	kleave(" = 0 [%d]", key_serial(key));
+@@ -416,7 +420,7 @@ key_already_present:
+ 		ret = __key_link_check_live_key(dest_keyring, key);
+ 		if (ret == 0)
+ 			__key_link(dest_keyring, key, &prealloc);
+-		__key_link_end(dest_keyring, type, prealloc);
++		__key_link_end(dest_keyring, &index_key, prealloc);
+ 		if (ret < 0)
+ 			goto link_check_failed;
+ 	}
+-- 
+1.8.3.1
+
+
+From eca8dad5cd291d2baf2d20372fcb0af9e75e25ea Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:52 +0100
+Subject: [PATCH 05/10] KEYS: Introduce a search context structure
+
+Search functions pass around a bunch of arguments, each of which gets copied
+with each call.  Introduce a search context structure to hold these.
+
+Whilst we're at it, create a search flag that indicates whether the search
+should be directly to the description or whether it should iterate through all
+keys looking for a non-description match.
+
+This will be useful when keyrings use a generic data struct with generic
+routines to manage their content as the search terms can just be passed
+through to the iterator callback function.
+
+Also, for future use, the data to be supplied to the match function is
+separated from the description pointer in the search context.  This makes it
+clear which is being supplied.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+---
+ include/linux/key-type.h         |   5 ++
+ security/keys/internal.h         |  40 +++++++------
+ security/keys/keyring.c          |  70 +++++++++++------------
+ security/keys/proc.c             |  17 ++++--
+ security/keys/process_keys.c     | 117 +++++++++++++++++++--------------------
+ security/keys/request_key.c      |  56 +++++++++----------
+ security/keys/request_key_auth.c |  14 +++--
+ security/keys/user_defined.c     |  18 +++---
+ 8 files changed, 179 insertions(+), 158 deletions(-)
+
+diff --git a/include/linux/key-type.h b/include/linux/key-type.h
+index 518a53a..f58737b 100644
+--- a/include/linux/key-type.h
++++ b/include/linux/key-type.h
+@@ -63,6 +63,11 @@ struct key_type {
+ 	 */
+ 	size_t def_datalen;
+ 
++	/* Default key search algorithm. */
++	unsigned def_lookup_type;
++#define KEYRING_SEARCH_LOOKUP_DIRECT	0x0000	/* Direct lookup by description. */
++#define KEYRING_SEARCH_LOOKUP_ITERATE	0x0001	/* Iterative search. */
++
+ 	/* vet a description */
+ 	int (*vet_description)(const char *description);
+ 
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index 77441dd..f4bf938 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -107,23 +107,31 @@ extern struct key *keyring_search_instkey(struct key *keyring,
+ 
+ typedef int (*key_match_func_t)(const struct key *, const void *);
+ 
++struct keyring_search_context {
++	struct keyring_index_key index_key;
++	const struct cred	*cred;
++	key_match_func_t	match;
++	const void		*match_data;
++	unsigned		flags;
++#define KEYRING_SEARCH_LOOKUP_TYPE	0x0001	/* [as type->def_lookup_type] */
++#define KEYRING_SEARCH_NO_STATE_CHECK	0x0002	/* Skip state checks */
++#define KEYRING_SEARCH_DO_STATE_CHECK	0x0004	/* Override NO_STATE_CHECK */
++#define KEYRING_SEARCH_NO_UPDATE_TIME	0x0008	/* Don't update times */
++#define KEYRING_SEARCH_NO_CHECK_PERM	0x0010	/* Don't check permissions */
++#define KEYRING_SEARCH_DETECT_TOO_DEEP	0x0020	/* Give an error on excessive depth */
++
++	/* Internal stuff */
++	int			skipped_ret;
++	bool			possessed;
++	key_ref_t		result;
++	struct timespec		now;
++};
++
+ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+-				    const struct cred *cred,
+-				    struct key_type *type,
+-				    const void *description,
+-				    key_match_func_t match,
+-				    bool no_state_check);
+-
+-extern key_ref_t search_my_process_keyrings(struct key_type *type,
+-					    const void *description,
+-					    key_match_func_t match,
+-					    bool no_state_check,
+-					    const struct cred *cred);
+-extern key_ref_t search_process_keyrings(struct key_type *type,
+-					 const void *description,
+-					 key_match_func_t match,
+-					 bool no_state_check,
+-					 const struct cred *cred);
++				    struct keyring_search_context *ctx);
++
++extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
++extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
+ 
+ extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
+ 
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index c7f59f9..b42f2d4 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -280,11 +280,7 @@ EXPORT_SYMBOL(keyring_alloc);
+ /**
+  * keyring_search_aux - Search a keyring tree for a key matching some criteria
+  * @keyring_ref: A pointer to the keyring with possession indicator.
+- * @cred: The credentials to use for permissions checks.
+- * @type: The type of key to search for.
+- * @description: Parameter for @match.
+- * @match: Function to rule on whether or not a key is the one required.
+- * @no_state_check: Don't check if a matching key is bad
++ * @ctx: The keyring search context.
+  *
+  * Search the supplied keyring tree for a key that matches the criteria given.
+  * The root keyring and any linked keyrings must grant Search permission to the
+@@ -314,11 +310,7 @@ EXPORT_SYMBOL(keyring_alloc);
+  * @keyring_ref is propagated to the returned key reference.
+  */
+ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+-			     const struct cred *cred,
+-			     struct key_type *type,
+-			     const void *description,
+-			     key_match_func_t match,
+-			     bool no_state_check)
++			     struct keyring_search_context *ctx)
+ {
+ 	struct {
+ 		/* Need a separate keylist pointer for RCU purposes */
+@@ -328,20 +320,18 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+ 	} stack[KEYRING_SEARCH_MAX_DEPTH];
+ 
+ 	struct keyring_list *keylist;
+-	struct timespec now;
+ 	unsigned long kflags;
+ 	struct key *keyring, *key;
+ 	key_ref_t key_ref;
+-	bool possessed;
+ 	long err;
+ 	int sp, nkeys, kix;
+ 
+ 	keyring = key_ref_to_ptr(keyring_ref);
+-	possessed = is_key_possessed(keyring_ref);
++	ctx->possessed = is_key_possessed(keyring_ref);
+ 	key_check(keyring);
+ 
+ 	/* top keyring must have search permission to begin the search */
+-	err = key_task_permission(keyring_ref, cred, KEY_SEARCH);
++	err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
+ 	if (err < 0) {
+ 		key_ref = ERR_PTR(err);
+ 		goto error;
+@@ -353,7 +343,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+ 
+ 	rcu_read_lock();
+ 
+-	now = current_kernel_time();
++	ctx->now = current_kernel_time();
+ 	err = -EAGAIN;
+ 	sp = 0;
+ 
+@@ -361,16 +351,17 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+ 	 * are looking for */
+ 	key_ref = ERR_PTR(-EAGAIN);
+ 	kflags = keyring->flags;
+-	if (keyring->type == type && match(keyring, description)) {
++	if (keyring->type == ctx->index_key.type &&
++	    ctx->match(keyring, ctx->match_data)) {
+ 		key = keyring;
+-		if (no_state_check)
++		if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+ 			goto found;
+ 
+ 		/* check it isn't negative and hasn't expired or been
+ 		 * revoked */
+ 		if (kflags & (1 << KEY_FLAG_REVOKED))
+ 			goto error_2;
+-		if (key->expiry && now.tv_sec >= key->expiry)
++		if (key->expiry && ctx->now.tv_sec >= key->expiry)
+ 			goto error_2;
+ 		key_ref = ERR_PTR(key->type_data.reject_error);
+ 		if (kflags & (1 << KEY_FLAG_NEGATIVE))
+@@ -384,7 +375,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+ 	if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+ 		      (1 << KEY_FLAG_REVOKED) |
+ 		      (1 << KEY_FLAG_NEGATIVE)) ||
+-	    (keyring->expiry && now.tv_sec >= keyring->expiry))
++	    (keyring->expiry && ctx->now.tv_sec >= keyring->expiry))
+ 		goto error_2;
+ 
+ 	/* start processing a new keyring */
+@@ -406,29 +397,29 @@ descend:
+ 		kflags = key->flags;
+ 
+ 		/* ignore keys not of this type */
+-		if (key->type != type)
++		if (key->type != ctx->index_key.type)
+ 			continue;
+ 
+ 		/* skip invalidated, revoked and expired keys */
+-		if (!no_state_check) {
++		if (!(ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)) {
+ 			if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+ 				      (1 << KEY_FLAG_REVOKED)))
+ 				continue;
+ 
+-			if (key->expiry && now.tv_sec >= key->expiry)
++			if (key->expiry && ctx->now.tv_sec >= key->expiry)
+ 				continue;
+ 		}
+ 
+ 		/* keys that don't match */
+-		if (!match(key, description))
++		if (!ctx->match(key, ctx->match_data))
+ 			continue;
+ 
+ 		/* key must have search permissions */
+-		if (key_task_permission(make_key_ref(key, possessed),
+-					cred, KEY_SEARCH) < 0)
++		if (key_task_permission(make_key_ref(key, ctx->possessed),
++					ctx->cred, KEY_SEARCH) < 0)
+ 			continue;
+ 
+-		if (no_state_check)
++		if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+ 			goto found;
+ 
+ 		/* we set a different error code if we pass a negative key */
+@@ -456,8 +447,8 @@ ascend:
+ 		if (sp >= KEYRING_SEARCH_MAX_DEPTH)
+ 			continue;
+ 
+-		if (key_task_permission(make_key_ref(key, possessed),
+-					cred, KEY_SEARCH) < 0)
++		if (key_task_permission(make_key_ref(key, ctx->possessed),
++					ctx->cred, KEY_SEARCH) < 0)
+ 			continue;
+ 
+ 		/* stack the current position */
+@@ -489,12 +480,12 @@ not_this_keyring:
+ 	/* we found a viable match */
+ found:
+ 	atomic_inc(&key->usage);
+-	key->last_used_at = now.tv_sec;
+-	keyring->last_used_at = now.tv_sec;
++	key->last_used_at = ctx->now.tv_sec;
++	keyring->last_used_at = ctx->now.tv_sec;
+ 	while (sp > 0)
+-		stack[--sp].keyring->last_used_at = now.tv_sec;
++		stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
+ 	key_check(key);
+-	key_ref = make_key_ref(key, possessed);
++	key_ref = make_key_ref(key, ctx->possessed);
+ error_2:
+ 	rcu_read_unlock();
+ error:
+@@ -514,11 +505,20 @@ key_ref_t keyring_search(key_ref_t keyring,
+ 			 struct key_type *type,
+ 			 const char *description)
+ {
+-	if (!type->match)
++	struct keyring_search_context ctx = {
++		.index_key.type		= type,
++		.index_key.description	= description,
++		.cred			= current_cred(),
++		.match			= type->match,
++		.match_data		= description,
++		.flags			= (type->def_lookup_type |
++					   KEYRING_SEARCH_DO_STATE_CHECK),
++	};
++
++	if (!ctx.match)
+ 		return ERR_PTR(-ENOKEY);
+ 
+-	return keyring_search_aux(keyring, current->cred,
+-				  type, description, type->match, false);
++	return keyring_search_aux(keyring, &ctx);
+ }
+ EXPORT_SYMBOL(keyring_search);
+ 
+diff --git a/security/keys/proc.c b/security/keys/proc.c
+index 217b685..88e9a46 100644
+--- a/security/keys/proc.c
++++ b/security/keys/proc.c
+@@ -182,7 +182,6 @@ static void proc_keys_stop(struct seq_file *p, void *v)
+ 
+ static int proc_keys_show(struct seq_file *m, void *v)
+ {
+-	const struct cred *cred = current_cred();
+ 	struct rb_node *_p = v;
+ 	struct key *key = rb_entry(_p, struct key, serial_node);
+ 	struct timespec now;
+@@ -191,15 +190,23 @@ static int proc_keys_show(struct seq_file *m, void *v)
+ 	char xbuf[12];
+ 	int rc;
+ 
++	struct keyring_search_context ctx = {
++		.index_key.type		= key->type,
++		.index_key.description	= key->description,
++		.cred			= current_cred(),
++		.match			= lookup_user_key_possessed,
++		.match_data		= key,
++		.flags			= (KEYRING_SEARCH_NO_STATE_CHECK |
++					   KEYRING_SEARCH_LOOKUP_DIRECT),
++	};
++
+ 	key_ref = make_key_ref(key, 0);
+ 
+ 	/* determine if the key is possessed by this process (a test we can
+ 	 * skip if the key does not indicate the possessor can view it
+ 	 */
+ 	if (key->perm & KEY_POS_VIEW) {
+-		skey_ref = search_my_process_keyrings(key->type, key,
+-						      lookup_user_key_possessed,
+-						      true, cred);
++		skey_ref = search_my_process_keyrings(&ctx);
+ 		if (!IS_ERR(skey_ref)) {
+ 			key_ref_put(skey_ref);
+ 			key_ref = make_key_ref(key, 1);
+@@ -211,7 +218,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
+ 	 * - the caller holds a spinlock, and thus the RCU read lock, making our
+ 	 *   access to __current_cred() safe
+ 	 */
+-	rc = key_task_permission(key_ref, cred, KEY_VIEW);
++	rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW);
+ 	if (rc < 0)
+ 		return 0;
+ 
+diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
+index a3410d6..e68a3e0 100644
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -319,11 +319,7 @@ void key_fsgid_changed(struct task_struct *tsk)
+  * In the case of a successful return, the possession attribute is set on the
+  * returned key reference.
+  */
+-key_ref_t search_my_process_keyrings(struct key_type *type,
+-				     const void *description,
+-				     key_match_func_t match,
+-				     bool no_state_check,
+-				     const struct cred *cred)
++key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
+ {
+ 	key_ref_t key_ref, ret, err;
+ 
+@@ -339,10 +335,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
+ 	err = ERR_PTR(-EAGAIN);
+ 
+ 	/* search the thread keyring first */
+-	if (cred->thread_keyring) {
++	if (ctx->cred->thread_keyring) {
+ 		key_ref = keyring_search_aux(
+-			make_key_ref(cred->thread_keyring, 1),
+-			cred, type, description, match, no_state_check);
++			make_key_ref(ctx->cred->thread_keyring, 1), ctx);
+ 		if (!IS_ERR(key_ref))
+ 			goto found;
+ 
+@@ -358,10 +353,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
+ 	}
+ 
+ 	/* search the process keyring second */
+-	if (cred->process_keyring) {
++	if (ctx->cred->process_keyring) {
+ 		key_ref = keyring_search_aux(
+-			make_key_ref(cred->process_keyring, 1),
+-			cred, type, description, match, no_state_check);
++			make_key_ref(ctx->cred->process_keyring, 1), ctx);
+ 		if (!IS_ERR(key_ref))
+ 			goto found;
+ 
+@@ -379,11 +373,11 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
+ 	}
+ 
+ 	/* search the session keyring */
+-	if (cred->session_keyring) {
++	if (ctx->cred->session_keyring) {
+ 		rcu_read_lock();
+ 		key_ref = keyring_search_aux(
+-			make_key_ref(rcu_dereference(cred->session_keyring), 1),
+-			cred, type, description, match, no_state_check);
++			make_key_ref(rcu_dereference(ctx->cred->session_keyring), 1),
++			ctx);
+ 		rcu_read_unlock();
+ 
+ 		if (!IS_ERR(key_ref))
+@@ -402,10 +396,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
+ 		}
+ 	}
+ 	/* or search the user-session keyring */
+-	else if (cred->user->session_keyring) {
++	else if (ctx->cred->user->session_keyring) {
+ 		key_ref = keyring_search_aux(
+-			make_key_ref(cred->user->session_keyring, 1),
+-			cred, type, description, match, no_state_check);
++			make_key_ref(ctx->cred->user->session_keyring, 1),
++			ctx);
+ 		if (!IS_ERR(key_ref))
+ 			goto found;
+ 
+@@ -437,19 +431,14 @@ found:
+  *
+  * Return same as search_my_process_keyrings().
+  */
+-key_ref_t search_process_keyrings(struct key_type *type,
+-				  const void *description,
+-				  key_match_func_t match,
+-				  bool no_state_check,
+-				  const struct cred *cred)
++key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
+ {
+ 	struct request_key_auth *rka;
+ 	key_ref_t key_ref, ret = ERR_PTR(-EACCES), err;
+ 
+ 	might_sleep();
+ 
+-	key_ref = search_my_process_keyrings(type, description, match,
+-					     no_state_check, cred);
++	key_ref = search_my_process_keyrings(ctx);
+ 	if (!IS_ERR(key_ref))
+ 		goto found;
+ 	err = key_ref;
+@@ -458,19 +447,21 @@ key_ref_t search_process_keyrings(struct key_type *type,
+ 	 * search the keyrings of the process mentioned there
+ 	 * - we don't permit access to request_key auth keys via this method
+ 	 */
+-	if (cred->request_key_auth &&
+-	    cred == current_cred() &&
+-	    type != &key_type_request_key_auth
++	if (ctx->cred->request_key_auth &&
++	    ctx->cred == current_cred() &&
++	    ctx->index_key.type != &key_type_request_key_auth
+ 	    ) {
++		const struct cred *cred = ctx->cred;
++
+ 		/* defend against the auth key being revoked */
+ 		down_read(&cred->request_key_auth->sem);
+ 
+-		if (key_validate(cred->request_key_auth) == 0) {
+-			rka = cred->request_key_auth->payload.data;
++		if (key_validate(ctx->cred->request_key_auth) == 0) {
++			rka = ctx->cred->request_key_auth->payload.data;
+ 
+-			key_ref = search_process_keyrings(type, description,
+-							  match, no_state_check,
+-							  rka->cred);
++			ctx->cred = rka->cred;
++			key_ref = search_process_keyrings(ctx);
++			ctx->cred = cred;
+ 
+ 			up_read(&cred->request_key_auth->sem);
+ 
+@@ -524,19 +515,23 @@ int lookup_user_key_possessed(const struct key *key, const void *target)
+ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
+ 			  key_perm_t perm)
+ {
++	struct keyring_search_context ctx = {
++		.match	= lookup_user_key_possessed,
++		.flags	= (KEYRING_SEARCH_NO_STATE_CHECK |
++			   KEYRING_SEARCH_LOOKUP_DIRECT),
++	};
+ 	struct request_key_auth *rka;
+-	const struct cred *cred;
+ 	struct key *key;
+ 	key_ref_t key_ref, skey_ref;
+ 	int ret;
+ 
+ try_again:
+-	cred = get_current_cred();
++	ctx.cred = get_current_cred();
+ 	key_ref = ERR_PTR(-ENOKEY);
+ 
+ 	switch (id) {
+ 	case KEY_SPEC_THREAD_KEYRING:
+-		if (!cred->thread_keyring) {
++		if (!ctx.cred->thread_keyring) {
+ 			if (!(lflags & KEY_LOOKUP_CREATE))
+ 				goto error;
+ 
+@@ -548,13 +543,13 @@ try_again:
+ 			goto reget_creds;
+ 		}
+ 
+-		key = cred->thread_keyring;
++		key = ctx.cred->thread_keyring;
+ 		atomic_inc(&key->usage);
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+ 
+ 	case KEY_SPEC_PROCESS_KEYRING:
+-		if (!cred->process_keyring) {
++		if (!ctx.cred->process_keyring) {
+ 			if (!(lflags & KEY_LOOKUP_CREATE))
+ 				goto error;
+ 
+@@ -566,13 +561,13 @@ try_again:
+ 			goto reget_creds;
+ 		}
+ 
+-		key = cred->process_keyring;
++		key = ctx.cred->process_keyring;
+ 		atomic_inc(&key->usage);
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+ 
+ 	case KEY_SPEC_SESSION_KEYRING:
+-		if (!cred->session_keyring) {
++		if (!ctx.cred->session_keyring) {
+ 			/* always install a session keyring upon access if one
+ 			 * doesn't exist yet */
+ 			ret = install_user_keyrings();
+@@ -582,13 +577,13 @@ try_again:
+ 				ret = join_session_keyring(NULL);
+ 			else
+ 				ret = install_session_keyring(
+-					cred->user->session_keyring);
++					ctx.cred->user->session_keyring);
+ 
+ 			if (ret < 0)
+ 				goto error;
+ 			goto reget_creds;
+-		} else if (cred->session_keyring ==
+-			   cred->user->session_keyring &&
++		} else if (ctx.cred->session_keyring ==
++			   ctx.cred->user->session_keyring &&
+ 			   lflags & KEY_LOOKUP_CREATE) {
+ 			ret = join_session_keyring(NULL);
+ 			if (ret < 0)
+@@ -597,32 +592,32 @@ try_again:
+ 		}
+ 
+ 		rcu_read_lock();
+-		key = rcu_dereference(cred->session_keyring);
++		key = rcu_dereference(ctx.cred->session_keyring);
+ 		atomic_inc(&key->usage);
+ 		rcu_read_unlock();
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+ 
+ 	case KEY_SPEC_USER_KEYRING:
+-		if (!cred->user->uid_keyring) {
++		if (!ctx.cred->user->uid_keyring) {
+ 			ret = install_user_keyrings();
+ 			if (ret < 0)
+ 				goto error;
+ 		}
+ 
+-		key = cred->user->uid_keyring;
++		key = ctx.cred->user->uid_keyring;
+ 		atomic_inc(&key->usage);
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+ 
+ 	case KEY_SPEC_USER_SESSION_KEYRING:
+-		if (!cred->user->session_keyring) {
++		if (!ctx.cred->user->session_keyring) {
+ 			ret = install_user_keyrings();
+ 			if (ret < 0)
+ 				goto error;
+ 		}
+ 
+-		key = cred->user->session_keyring;
++		key = ctx.cred->user->session_keyring;
+ 		atomic_inc(&key->usage);
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+@@ -633,7 +628,7 @@ try_again:
+ 		goto error;
+ 
+ 	case KEY_SPEC_REQKEY_AUTH_KEY:
+-		key = cred->request_key_auth;
++		key = ctx.cred->request_key_auth;
+ 		if (!key)
+ 			goto error;
+ 
+@@ -642,20 +637,20 @@ try_again:
+ 		break;
+ 
+ 	case KEY_SPEC_REQUESTOR_KEYRING:
+-		if (!cred->request_key_auth)
++		if (!ctx.cred->request_key_auth)
+ 			goto error;
+ 
+-		down_read(&cred->request_key_auth->sem);
++		down_read(&ctx.cred->request_key_auth->sem);
+ 		if (test_bit(KEY_FLAG_REVOKED,
+-			     &cred->request_key_auth->flags)) {
++			     &ctx.cred->request_key_auth->flags)) {
+ 			key_ref = ERR_PTR(-EKEYREVOKED);
+ 			key = NULL;
+ 		} else {
+-			rka = cred->request_key_auth->payload.data;
++			rka = ctx.cred->request_key_auth->payload.data;
+ 			key = rka->dest_keyring;
+ 			atomic_inc(&key->usage);
+ 		}
+-		up_read(&cred->request_key_auth->sem);
++		up_read(&ctx.cred->request_key_auth->sem);
+ 		if (!key)
+ 			goto error;
+ 		key_ref = make_key_ref(key, 1);
+@@ -675,9 +670,13 @@ try_again:
+ 		key_ref = make_key_ref(key, 0);
+ 
+ 		/* check to see if we possess the key */
+-		skey_ref = search_process_keyrings(key->type, key,
+-						   lookup_user_key_possessed,
+-						   true, cred);
++		ctx.index_key.type		= key->type;
++		ctx.index_key.description	= key->description;
++		ctx.index_key.desc_len		= strlen(key->description);
++		ctx.match_data			= key;
++		kdebug("check possessed");
++		skey_ref = search_process_keyrings(&ctx);
++		kdebug("possessed=%p", skey_ref);
+ 
+ 		if (!IS_ERR(skey_ref)) {
+ 			key_put(key);
+@@ -717,14 +716,14 @@ try_again:
+ 		goto invalid_key;
+ 
+ 	/* check the permissions */
+-	ret = key_task_permission(key_ref, cred, perm);
++	ret = key_task_permission(key_ref, ctx.cred, perm);
+ 	if (ret < 0)
+ 		goto invalid_key;
+ 
+ 	key->last_used_at = current_kernel_time().tv_sec;
+ 
+ error:
+-	put_cred(cred);
++	put_cred(ctx.cred);
+ 	return key_ref;
+ 
+ invalid_key:
+@@ -735,7 +734,7 @@ invalid_key:
+ 	/* if we attempted to install a keyring, then it may have caused new
+ 	 * creds to be installed */
+ reget_creds:
+-	put_cred(cred);
++	put_cred(ctx.cred);
+ 	goto try_again;
+ }
+ 
+diff --git a/security/keys/request_key.c b/security/keys/request_key.c
+index 586cb79..ab75df4 100644
+--- a/security/keys/request_key.c
++++ b/security/keys/request_key.c
+@@ -345,38 +345,34 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
+  * May return a key that's already under construction instead if there was a
+  * race between two thread calling request_key().
+  */
+-static int construct_alloc_key(struct key_type *type,
+-			       const char *description,
++static int construct_alloc_key(struct keyring_search_context *ctx,
+ 			       struct key *dest_keyring,
+ 			       unsigned long flags,
+ 			       struct key_user *user,
+ 			       struct key **_key)
+ {
+-	const struct keyring_index_key index_key = {
+-		.type		= type,
+-		.description	= description,
+-		.desc_len	= strlen(description),
+-	};
+-	const struct cred *cred = current_cred();
+ 	unsigned long prealloc;
+ 	struct key *key;
+ 	key_perm_t perm;
+ 	key_ref_t key_ref;
+ 	int ret;
+ 
+-	kenter("%s,%s,,,", type->name, description);
++	kenter("%s,%s,,,",
++	       ctx->index_key.type->name, ctx->index_key.description);
+ 
+ 	*_key = NULL;
+ 	mutex_lock(&user->cons_lock);
+ 
+ 	perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
+ 	perm |= KEY_USR_VIEW;
+-	if (type->read)
++	if (ctx->index_key.type->read)
+ 		perm |= KEY_POS_READ;
+-	if (type == &key_type_keyring || type->update)
++	if (ctx->index_key.type == &key_type_keyring ||
++	    ctx->index_key.type->update)
+ 		perm |= KEY_POS_WRITE;
+ 
+-	key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
++	key = key_alloc(ctx->index_key.type, ctx->index_key.description,
++			ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred,
+ 			perm, flags);
+ 	if (IS_ERR(key))
+ 		goto alloc_failed;
+@@ -384,7 +380,7 @@ static int construct_alloc_key(struct key_type *type,
+ 	set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
+ 
+ 	if (dest_keyring) {
+-		ret = __key_link_begin(dest_keyring, &index_key, &prealloc);
++		ret = __key_link_begin(dest_keyring, &ctx->index_key, &prealloc);
+ 		if (ret < 0)
+ 			goto link_prealloc_failed;
+ 	}
+@@ -394,8 +390,7 @@ static int construct_alloc_key(struct key_type *type,
+ 	 * waited for locks */
+ 	mutex_lock(&key_construction_mutex);
+ 
+-	key_ref = search_process_keyrings(type, description, type->match,
+-					  false, cred);
++	key_ref = search_process_keyrings(ctx);
+ 	if (!IS_ERR(key_ref))
+ 		goto key_already_present;
+ 
+@@ -404,7 +399,7 @@ static int construct_alloc_key(struct key_type *type,
+ 
+ 	mutex_unlock(&key_construction_mutex);
+ 	if (dest_keyring)
+-		__key_link_end(dest_keyring, &index_key, prealloc);
++		__key_link_end(dest_keyring, &ctx->index_key, prealloc);
+ 	mutex_unlock(&user->cons_lock);
+ 	*_key = key;
+ 	kleave(" = 0 [%d]", key_serial(key));
+@@ -420,7 +415,7 @@ key_already_present:
+ 		ret = __key_link_check_live_key(dest_keyring, key);
+ 		if (ret == 0)
+ 			__key_link(dest_keyring, key, &prealloc);
+-		__key_link_end(dest_keyring, &index_key, prealloc);
++		__key_link_end(dest_keyring, &ctx->index_key, prealloc);
+ 		if (ret < 0)
+ 			goto link_check_failed;
+ 	}
+@@ -449,8 +444,7 @@ alloc_failed:
+ /*
+  * Commence key construction.
+  */
+-static struct key *construct_key_and_link(struct key_type *type,
+-					  const char *description,
++static struct key *construct_key_and_link(struct keyring_search_context *ctx,
+ 					  const char *callout_info,
+ 					  size_t callout_len,
+ 					  void *aux,
+@@ -469,8 +463,7 @@ static struct key *construct_key_and_link(struct key_type *type,
+ 
+ 	construct_get_dest_keyring(&dest_keyring);
+ 
+-	ret = construct_alloc_key(type, description, dest_keyring, flags, user,
+-				  &key);
++	ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
+ 	key_user_put(user);
+ 
+ 	if (ret == 0) {
+@@ -534,18 +527,24 @@ struct key *request_key_and_link(struct key_type *type,
+ 				 struct key *dest_keyring,
+ 				 unsigned long flags)
+ {
+-	const struct cred *cred = current_cred();
++	struct keyring_search_context ctx = {
++		.index_key.type		= type,
++		.index_key.description	= description,
++		.cred			= current_cred(),
++		.match			= type->match,
++		.match_data		= description,
++		.flags			= KEYRING_SEARCH_LOOKUP_DIRECT,
++	};
+ 	struct key *key;
+ 	key_ref_t key_ref;
+ 	int ret;
+ 
+ 	kenter("%s,%s,%p,%zu,%p,%p,%lx",
+-	       type->name, description, callout_info, callout_len, aux,
+-	       dest_keyring, flags);
++	       ctx.index_key.type->name, ctx.index_key.description,
++	       callout_info, callout_len, aux, dest_keyring, flags);
+ 
+ 	/* search all the process keyrings for a key */
+-	key_ref = search_process_keyrings(type, description, type->match,
+-					  false, cred);
++	key_ref = search_process_keyrings(&ctx);
+ 
+ 	if (!IS_ERR(key_ref)) {
+ 		key = key_ref_to_ptr(key_ref);
+@@ -568,9 +567,8 @@ struct key *request_key_and_link(struct key_type *type,
+ 		if (!callout_info)
+ 			goto error;
+ 
+-		key = construct_key_and_link(type, description, callout_info,
+-					     callout_len, aux, dest_keyring,
+-					     flags);
++		key = construct_key_and_link(&ctx, callout_info, callout_len,
++					     aux, dest_keyring, flags);
+ 	}
+ 
+ error:
+diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
+index 92077de..8d09852 100644
+--- a/security/keys/request_key_auth.c
++++ b/security/keys/request_key_auth.c
+@@ -239,15 +239,17 @@ static int key_get_instantiation_authkey_match(const struct key *key,
+  */
+ struct key *key_get_instantiation_authkey(key_serial_t target_id)
+ {
+-	const struct cred *cred = current_cred();
++	struct keyring_search_context ctx = {
++		.index_key.type		= &key_type_request_key_auth,
++		.cred			= current_cred(),
++		.match			= key_get_instantiation_authkey_match,
++		.match_data		= (void *)(unsigned long)target_id,
++		.flags			= KEYRING_SEARCH_LOOKUP_DIRECT,
++	};
+ 	struct key *authkey;
+ 	key_ref_t authkey_ref;
+ 
+-	authkey_ref = search_process_keyrings(
+-		&key_type_request_key_auth,
+-		(void *) (unsigned long) target_id,
+-		key_get_instantiation_authkey_match,
+-		false, cred);
++	authkey_ref = search_process_keyrings(&ctx);
+ 
+ 	if (IS_ERR(authkey_ref)) {
+ 		authkey = ERR_CAST(authkey_ref);
+diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
+index 55dc889..faa2cae 100644
+--- a/security/keys/user_defined.c
++++ b/security/keys/user_defined.c
+@@ -25,14 +25,15 @@ static int logon_vet_description(const char *desc);
+  * arbitrary blob of data as the payload
+  */
+ struct key_type key_type_user = {
+-	.name		= "user",
+-	.instantiate	= user_instantiate,
+-	.update		= user_update,
+-	.match		= user_match,
+-	.revoke		= user_revoke,
+-	.destroy	= user_destroy,
+-	.describe	= user_describe,
+-	.read		= user_read,
++	.name			= "user",
++	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
++	.instantiate		= user_instantiate,
++	.update			= user_update,
++	.match			= user_match,
++	.revoke			= user_revoke,
++	.destroy		= user_destroy,
++	.describe		= user_describe,
++	.read			= user_read,
+ };
+ 
+ EXPORT_SYMBOL_GPL(key_type_user);
+@@ -45,6 +46,7 @@ EXPORT_SYMBOL_GPL(key_type_user);
+  */
+ struct key_type key_type_logon = {
+ 	.name			= "logon",
++	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
+ 	.instantiate		= user_instantiate,
+ 	.update			= user_update,
+ 	.match			= user_match,
+-- 
+1.8.3.1
+
+
+From 4dffed72b92a305bcdbb73b719570d8f4ec53f46 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:52 +0100
+Subject: [PATCH 06/10] KEYS: Search for auth-key by name rather than target
+ key ID
+
+Search for auth-key by name rather than by target key ID as, in a future
+patch, we'll by searching directly by index key in preference to iteration
+over all keys.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+---
+ security/keys/request_key_auth.c | 21 +++++++--------------
+ 1 file changed, 7 insertions(+), 14 deletions(-)
+
+diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
+index 8d09852..7495a93 100644
+--- a/security/keys/request_key_auth.c
++++ b/security/keys/request_key_auth.c
+@@ -18,6 +18,7 @@
+ #include <linux/slab.h>
+ #include <asm/uaccess.h>
+ #include "internal.h"
++#include <keys/user-type.h>
+ 
+ static int request_key_auth_instantiate(struct key *,
+ 					struct key_preparsed_payload *);
+@@ -222,33 +223,25 @@ error_alloc:
+ }
+ 
+ /*
+- * See if an authorisation key is associated with a particular key.
+- */
+-static int key_get_instantiation_authkey_match(const struct key *key,
+-					       const void *_id)
+-{
+-	struct request_key_auth *rka = key->payload.data;
+-	key_serial_t id = (key_serial_t)(unsigned long) _id;
+-
+-	return rka->target_key->serial == id;
+-}
+-
+-/*
+  * Search the current process's keyrings for the authorisation key for
+  * instantiation of a key.
+  */
+ struct key *key_get_instantiation_authkey(key_serial_t target_id)
+ {
++	char description[16];
+ 	struct keyring_search_context ctx = {
+ 		.index_key.type		= &key_type_request_key_auth,
++		.index_key.description	= description,
+ 		.cred			= current_cred(),
+-		.match			= key_get_instantiation_authkey_match,
+-		.match_data		= (void *)(unsigned long)target_id,
++		.match			= user_match,
++		.match_data		= description,
+ 		.flags			= KEYRING_SEARCH_LOOKUP_DIRECT,
+ 	};
+ 	struct key *authkey;
+ 	key_ref_t authkey_ref;
+ 
++	sprintf(description, "%x", target_id);
++
+ 	authkey_ref = search_process_keyrings(&ctx);
+ 
+ 	if (IS_ERR(authkey_ref)) {
+-- 
+1.8.3.1
+
+
+From 5f3c76b0923620ddd5294270ac478819f06f21d1 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:53 +0100
+Subject: [PATCH 07/10] KEYS: Define a __key_get() wrapper to use rather than
+ atomic_inc()
+
+Define a __key_get() wrapper to use rather than atomic_inc() on the key usage
+count as this makes it easier to hook in refcount error debugging.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+---
+ Documentation/security/keys.txt | 13 ++++++++-----
+ include/linux/key.h             | 10 +++++++---
+ security/keys/key.c             |  2 +-
+ security/keys/keyring.c         |  6 +++---
+ security/keys/process_keys.c    | 16 ++++++++--------
+ 5 files changed, 27 insertions(+), 20 deletions(-)
+
+diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
+index 9ede670..a4c33f1 100644
+--- a/Documentation/security/keys.txt
++++ b/Documentation/security/keys.txt
+@@ -960,14 +960,17 @@ payload contents" for more information.
+     the argument will not be parsed.
+ 
+ 
+-(*) Extra references can be made to a key by calling the following function:
++(*) Extra references can be made to a key by calling one of the following
++    functions:
+ 
++	struct key *__key_get(struct key *key);
+ 	struct key *key_get(struct key *key);
+ 
+-    These need to be disposed of by calling key_put() when they've been
+-    finished with. The key pointer passed in will be returned. If the pointer
+-    is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and
+-    no increment will take place.
++    Keys so references will need to be disposed of by calling key_put() when
++    they've been finished with.  The key pointer passed in will be returned.
++
++    In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set
++    then the key will not be dereferenced and no increment will take place.
+ 
+ 
+ (*) A key's serial number can be obtained by calling:
+diff --git a/include/linux/key.h b/include/linux/key.h
+index d573e82..ef596c7 100644
+--- a/include/linux/key.h
++++ b/include/linux/key.h
+@@ -219,13 +219,17 @@ extern void key_revoke(struct key *key);
+ extern void key_invalidate(struct key *key);
+ extern void key_put(struct key *key);
+ 
+-static inline struct key *key_get(struct key *key)
++static inline struct key *__key_get(struct key *key)
+ {
+-	if (key)
+-		atomic_inc(&key->usage);
++	atomic_inc(&key->usage);
+ 	return key;
+ }
+ 
++static inline struct key *key_get(struct key *key)
++{
++	return key ? __key_get(key) : key;
++}
++
+ static inline void key_ref_put(key_ref_t key_ref)
+ {
+ 	key_put(key_ref_to_ptr(key_ref));
+diff --git a/security/keys/key.c b/security/keys/key.c
+index 7e6bc39..1e23cc2 100644
+--- a/security/keys/key.c
++++ b/security/keys/key.c
+@@ -644,7 +644,7 @@ found:
+ 	/* this races with key_put(), but that doesn't matter since key_put()
+ 	 * doesn't actually change the key
+ 	 */
+-	atomic_inc(&key->usage);
++	__key_get(key);
+ 
+ error:
+ 	spin_unlock(&key_serial_lock);
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index b42f2d4..87eff32 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -479,7 +479,7 @@ not_this_keyring:
+ 
+ 	/* we found a viable match */
+ found:
+-	atomic_inc(&key->usage);
++	__key_get(key);
+ 	key->last_used_at = ctx->now.tv_sec;
+ 	keyring->last_used_at = ctx->now.tv_sec;
+ 	while (sp > 0)
+@@ -573,7 +573,7 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+ 	return ERR_PTR(-ENOKEY);
+ 
+ found:
+-	atomic_inc(&key->usage);
++	__key_get(key);
+ 	keyring->last_used_at = key->last_used_at =
+ 		current_kernel_time().tv_sec;
+ 	rcu_read_unlock();
+@@ -909,7 +909,7 @@ void __key_link(struct key *keyring, struct key *key,
+ 
+ 	klist = rcu_dereference_locked_keyring(keyring);
+ 
+-	atomic_inc(&key->usage);
++	__key_get(key);
+ 	keyring->last_used_at = key->last_used_at =
+ 		current_kernel_time().tv_sec;
+ 
+diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
+index e68a3e0..68548ea 100644
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -235,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
+ 		if (IS_ERR(keyring))
+ 			return PTR_ERR(keyring);
+ 	} else {
+-		atomic_inc(&keyring->usage);
++		__key_get(keyring);
+ 	}
+ 
+ 	/* install the keyring */
+@@ -544,7 +544,7 @@ try_again:
+ 		}
+ 
+ 		key = ctx.cred->thread_keyring;
+-		atomic_inc(&key->usage);
++		__key_get(key);
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+ 
+@@ -562,7 +562,7 @@ try_again:
+ 		}
+ 
+ 		key = ctx.cred->process_keyring;
+-		atomic_inc(&key->usage);
++		__key_get(key);
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+ 
+@@ -593,7 +593,7 @@ try_again:
+ 
+ 		rcu_read_lock();
+ 		key = rcu_dereference(ctx.cred->session_keyring);
+-		atomic_inc(&key->usage);
++		__key_get(key);
+ 		rcu_read_unlock();
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+@@ -606,7 +606,7 @@ try_again:
+ 		}
+ 
+ 		key = ctx.cred->user->uid_keyring;
+-		atomic_inc(&key->usage);
++		__key_get(key);
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+ 
+@@ -618,7 +618,7 @@ try_again:
+ 		}
+ 
+ 		key = ctx.cred->user->session_keyring;
+-		atomic_inc(&key->usage);
++		__key_get(key);
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+ 
+@@ -632,7 +632,7 @@ try_again:
+ 		if (!key)
+ 			goto error;
+ 
+-		atomic_inc(&key->usage);
++		__key_get(key);
+ 		key_ref = make_key_ref(key, 1);
+ 		break;
+ 
+@@ -648,7 +648,7 @@ try_again:
+ 		} else {
+ 			rka = ctx.cred->request_key_auth->payload.data;
+ 			key = rka->dest_keyring;
+-			atomic_inc(&key->usage);
++			__key_get(key);
+ 		}
+ 		up_read(&ctx.cred->request_key_auth->sem);
+ 		if (!key)
+-- 
+1.8.3.1
+
+
+From 99b0f3185570bb92a61952673b9933d9c1999508 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:53 +0100
+Subject: [PATCH 08/10] KEYS: Drop the permissions argument from
+ __keyring_search_one()
+
+Drop the permissions argument from __keyring_search_one() as the only caller
+passes 0 here - which causes all checks to be skipped.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+---
+ security/keys/internal.h | 3 +--
+ security/keys/key.c      | 2 +-
+ security/keys/keyring.c  | 9 +++------
+ 3 files changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index f4bf938..73950bf 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -99,8 +99,7 @@ extern void __key_link_end(struct key *keyring,
+ 			   unsigned long prealloc);
+ 
+ extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+-				      const struct keyring_index_key *index_key,
+-				      key_perm_t perm);
++				      const struct keyring_index_key *index_key);
+ 
+ extern struct key *keyring_search_instkey(struct key *keyring,
+ 					  key_serial_t target_id);
+diff --git a/security/keys/key.c b/security/keys/key.c
+index 1e23cc2..7d716b8 100644
+--- a/security/keys/key.c
++++ b/security/keys/key.c
+@@ -847,7 +847,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 	 * update that instead if possible
+ 	 */
+ 	if (index_key.type->update) {
+-		key_ref = __keyring_search_one(keyring_ref, &index_key, 0);
++		key_ref = __keyring_search_one(keyring_ref, &index_key);
+ 		if (!IS_ERR(key_ref))
+ 			goto found_matching_key;
+ 	}
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index 87eff32..eeef1a0 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -531,15 +531,14 @@ EXPORT_SYMBOL(keyring_search);
+  * RCU is used to make it unnecessary to lock the keyring key list here.
+  *
+  * Returns a pointer to the found key with usage count incremented if
+- * successful and returns -ENOKEY if not found.  Revoked keys and keys not
+- * providing the requested permission are skipped over.
++ * successful and returns -ENOKEY if not found.  Revoked and invalidated keys
++ * are skipped over.
+  *
+  * If successful, the possession indicator is propagated from the keyring ref
+  * to the returned key reference.
+  */
+ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+-			       const struct keyring_index_key *index_key,
+-			       key_perm_t perm)
++			       const struct keyring_index_key *index_key)
+ {
+ 	struct keyring_list *klist;
+ 	struct key *keyring, *key;
+@@ -560,8 +559,6 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+ 			if (key->type == index_key->type &&
+ 			    (!key->type->match ||
+ 			     key->type->match(key, index_key->description)) &&
+-			    key_permission(make_key_ref(key, possessed),
+-					   perm) == 0 &&
+ 			    !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ 					    (1 << KEY_FLAG_REVOKED)))
+ 			    )
+-- 
+1.8.3.1
+
+
+From cb720b39e41e62d55bf1e5f8243d78643d31154d Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:53 +0100
+Subject: [PATCH 09/10] Add a generic associative array implementation.
+
+Add a generic associative array implementation that can be used as the
+container for keyrings, thereby massively increasing the capacity available
+whilst also speeding up searching in keyrings that contain a lot of keys.
+
+This may also be useful in FS-Cache for tracking cookies.
+
+Documentation is added into Documentation/associative_array.txt
+
+Some of the properties of the implementation are:
+
+ (1) Objects are opaque pointers.  The implementation does not care where they
+     point (if anywhere) or what they point to (if anything).
+
+     [!] NOTE: Pointers to objects _must_ be zero in the two least significant
+     	       bits.
+
+ (2) Objects do not need to contain linkage blocks for use by the array.  This
+     permits an object to be located in multiple arrays simultaneously.
+     Rather, the array is made up of metadata blocks that point to objects.
+
+ (3) Objects are labelled as being one of two types (the type is a bool value).
+     This information is stored in the array, but has no consequence to the
+     array itself or its algorithms.
+
+ (4) Objects require index keys to locate them within the array.
+
+ (5) Index keys must be unique.  Inserting an object with the same key as one
+     already in the array will replace the old object.
+
+ (6) Index keys can be of any length and can be of different lengths.
+
+ (7) Index keys should encode the length early on, before any variation due to
+     length is seen.
+
+ (8) Index keys can include a hash to scatter objects throughout the array.
+
+ (9) The array can iterated over.  The objects will not necessarily come out in
+     key order.
+
+(10) The array can be iterated whilst it is being modified, provided the RCU
+     readlock is being held by the iterator.  Note, however, under these
+     circumstances, some objects may be seen more than once.  If this is a
+     problem, the iterator should lock against modification.  Objects will not
+     be missed, however, unless deleted.
+
+(11) Objects in the array can be looked up by means of their index key.
+
+(12) Objects can be looked up whilst the array is being modified, provided the
+     RCU readlock is being held by the thread doing the look up.
+
+The implementation uses a tree of 16-pointer nodes internally that are indexed
+on each level by nibbles from the index key.  To improve memory efficiency,
+shortcuts can be emplaced to skip over what would otherwise be a series of
+single-occupancy nodes.  Further, nodes pack leaf object pointers into spare
+space in the node rather than making an extra branch until as such time an
+object needs to be added to a full node.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+---
+ Documentation/assoc_array.txt    |  574 +++++++++++++
+ include/linux/assoc_array.h      |   92 ++
+ include/linux/assoc_array_priv.h |  182 ++++
+ lib/Kconfig                      |   14 +
+ lib/Makefile                     |    1 +
+ lib/assoc_array.c                | 1745 ++++++++++++++++++++++++++++++++++++++
+ 6 files changed, 2608 insertions(+)
+ create mode 100644 Documentation/assoc_array.txt
+ create mode 100644 include/linux/assoc_array.h
+ create mode 100644 include/linux/assoc_array_priv.h
+ create mode 100644 lib/assoc_array.c
+
+diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
+new file mode 100644
+index 0000000..f4faec0
+--- /dev/null
++++ b/Documentation/assoc_array.txt
+@@ -0,0 +1,574 @@
++		   ========================================
++		   GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION
++		   ========================================
++
++Contents:
++
++ - Overview.
++
++ - The public API.
++   - Edit script.
++   - Operations table.
++   - Manipulation functions.
++   - Access functions.
++   - Index key form.
++
++ - Internal workings.
++   - Basic internal tree layout.
++   - Shortcuts.
++   - Splitting and collapsing nodes.
++   - Non-recursive iteration.
++   - Simultaneous alteration and iteration.
++
++
++========
++OVERVIEW
++========
++
++This associative array implementation is an object container with the following
++properties:
++
++ (1) Objects are opaque pointers.  The implementation does not care where they
++     point (if anywhere) or what they point to (if anything).
++
++     [!] NOTE: Pointers to objects _must_ be zero in the least significant bit.
++
++ (2) Objects do not need to contain linkage blocks for use by the array.  This
++     permits an object to be located in multiple arrays simultaneously.
++     Rather, the array is made up of metadata blocks that point to objects.
++
++ (3) Objects require index keys to locate them within the array.
++
++ (4) Index keys must be unique.  Inserting an object with the same key as one
++     already in the array will replace the old object.
++
++ (5) Index keys can be of any length and can be of different lengths.
++
++ (6) Index keys should encode the length early on, before any variation due to
++     length is seen.
++
++ (7) Index keys can include a hash to scatter objects throughout the array.
++
++ (8) The array can iterated over.  The objects will not necessarily come out in
++     key order.
++
++ (9) The array can be iterated over whilst it is being modified, provided the
++     RCU readlock is being held by the iterator.  Note, however, under these
++     circumstances, some objects may be seen more than once.  If this is a
++     problem, the iterator should lock against modification.  Objects will not
++     be missed, however, unless deleted.
++
++(10) Objects in the array can be looked up by means of their index key.
++
++(11) Objects can be looked up whilst the array is being modified, provided the
++     RCU readlock is being held by the thread doing the look up.
++
++The implementation uses a tree of 16-pointer nodes internally that are indexed
++on each level by nibbles from the index key in the same manner as in a radix
++tree.  To improve memory efficiency, shortcuts can be emplaced to skip over
++what would otherwise be a series of single-occupancy nodes.  Further, nodes
++pack leaf object pointers into spare space in the node rather than making an
++extra branch until as such time an object needs to be added to a full node.
++
++
++==============
++THE PUBLIC API
++==============
++
++The public API can be found in <linux/assoc_array.h>.  The associative array is
++rooted on the following structure:
++
++	struct assoc_array {
++		...
++	};
++
++The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY.
++
++
++EDIT SCRIPT
++-----------
++
++The insertion and deletion functions produce an 'edit script' that can later be
++applied to effect the changes without risking ENOMEM.  This retains the
++preallocated metadata blocks that will be installed in the internal tree and
++keeps track of the metadata blocks that will be removed from the tree when the
++script is applied.
++
++This is also used to keep track of dead blocks and dead objects after the
++script has been applied so that they can be freed later.  The freeing is done
++after an RCU grace period has passed - thus allowing access functions to
++proceed under the RCU read lock.
++
++The script appears as outside of the API as a pointer of the type:
++
++	struct assoc_array_edit;
++
++There are two functions for dealing with the script:
++
++ (1) Apply an edit script.
++
++	void assoc_array_apply_edit(struct assoc_array_edit *edit);
++
++     This will perform the edit functions, interpolating various write barriers
++     to permit accesses under the RCU read lock to continue.  The edit script
++     will then be passed to call_rcu() to free it and any dead stuff it points
++     to.
++
++ (2) Cancel an edit script.
++
++	void assoc_array_cancel_edit(struct assoc_array_edit *edit);
++
++     This frees the edit script and all preallocated memory immediately.  If
++     this was for insertion, the new object is _not_ released by this function,
++     but must rather be released by the caller.
++
++These functions are guaranteed not to fail.
++
++
++OPERATIONS TABLE
++----------------
++
++Various functions take a table of operations:
++
++	struct assoc_array_ops {
++		...
++	};
++
++This points to a number of methods, all of which need to be provided:
++
++ (1) Get a chunk of index key from caller data:
++
++	unsigned long (*get_key_chunk)(const void *index_key, int level);
++
++     This should return a chunk of caller-supplied index key starting at the
++     *bit* position given by the level argument.  The level argument will be a
++     multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return
++     ASSOC_ARRAY_KEY_CHUNK_SIZE bits.  No error is possible.
++
++
++ (2) Get a chunk of an object's index key.
++
++	unsigned long (*get_object_key_chunk)(const void *object, int level);
++
++     As the previous function, but gets its data from an object in the array
++     rather than from a caller-supplied index key.
++
++
++ (3) See if this is the object we're looking for.
++
++	bool (*compare_object)(const void *object, const void *index_key);
++
++     Compare the object against an index key and return true if it matches and
++     false if it doesn't.
++
++
++ (4) Diff the index keys of two objects.
++
++	int (*diff_objects)(const void *a, const void *b);
++
++     Return the bit position at which the index keys of two objects differ or
++     -1 if they are the same.
++
++
++ (5) Free an object.
++
++	void (*free_object)(void *object);
++
++     Free the specified object.  Note that this may be called an RCU grace
++     period after assoc_array_apply_edit() was called, so synchronize_rcu() may
++     be necessary on module unloading.
++
++
++MANIPULATION FUNCTIONS
++----------------------
++
++There are a number of functions for manipulating an associative array:
++
++ (1) Initialise an associative array.
++
++	void assoc_array_init(struct assoc_array *array);
++
++     This initialises the base structure for an associative array.  It can't
++     fail.
++
++
++ (2) Insert/replace an object in an associative array.
++
++	struct assoc_array_edit *
++	assoc_array_insert(struct assoc_array *array,
++			   const struct assoc_array_ops *ops,
++			   const void *index_key,
++			   void *object);
++
++     This inserts the given object into the array.  Note that the least
++     significant bit of the pointer must be zero as it's used to type-mark
++     pointers internally.
++
++     If an object already exists for that key then it will be replaced with the
++     new object and the old one will be freed automatically.
++
++     The index_key argument should hold index key information and is
++     passed to the methods in the ops table when they are called.
++
++     This function makes no alteration to the array itself, but rather returns
++     an edit script that must be applied.  -ENOMEM is returned in the case of
++     an out-of-memory error.
++
++     The caller should lock exclusively against other modifiers of the array.
++
++
++ (3) Delete an object from an associative array.
++
++	struct assoc_array_edit *
++	assoc_array_delete(struct assoc_array *array,
++			   const struct assoc_array_ops *ops,
++			   const void *index_key);
++
++     This deletes an object that matches the specified data from the array.
++
++     The index_key argument should hold index key information and is
++     passed to the methods in the ops table when they are called.
++
++     This function makes no alteration to the array itself, but rather returns
++     an edit script that must be applied.  -ENOMEM is returned in the case of
++     an out-of-memory error.  NULL will be returned if the specified object is
++     not found within the array.
++
++     The caller should lock exclusively against other modifiers of the array.
++
++
++ (4) Delete all objects from an associative array.
++
++	struct assoc_array_edit *
++	assoc_array_clear(struct assoc_array *array,
++			  const struct assoc_array_ops *ops);
++
++     This deletes all the objects from an associative array and leaves it
++     completely empty.
++
++     This function makes no alteration to the array itself, but rather returns
++     an edit script that must be applied.  -ENOMEM is returned in the case of
++     an out-of-memory error.
++
++     The caller should lock exclusively against other modifiers of the array.
++
++
++ (5) Destroy an associative array, deleting all objects.
++
++	void assoc_array_destroy(struct assoc_array *array,
++				 const struct assoc_array_ops *ops);
++
++     This destroys the contents of the associative array and leaves it
++     completely empty.  It is not permitted for another thread to be traversing
++     the array under the RCU read lock at the same time as this function is
++     destroying it as no RCU deferral is performed on memory release -
++     something that would require memory to be allocated.
++
++     The caller should lock exclusively against other modifiers and accessors
++     of the array.
++
++
++ (6) Garbage collect an associative array.
++
++	int assoc_array_gc(struct assoc_array *array,
++			   const struct assoc_array_ops *ops,
++			   bool (*iterator)(void *object, void *iterator_data),
++			   void *iterator_data);
++
++     This iterates over the objects in an associative array and passes each one
++     to iterator().  If iterator() returns true, the object is kept.  If it
++     returns false, the object will be freed.  If the iterator() function
++     returns true, it must perform any appropriate refcount incrementing on the
++     object before returning.
++
++     The internal tree will be packed down if possible as part of the iteration
++     to reduce the number of nodes in it.
++
++     The iterator_data is passed directly to iterator() and is otherwise
++     ignored by the function.
++
++     The function will return 0 if successful and -ENOMEM if there wasn't
++     enough memory.
++
++     It is possible for other threads to iterate over or search the array under
++     the RCU read lock whilst this function is in progress.  The caller should
++     lock exclusively against other modifiers of the array.
++
++
++ACCESS FUNCTIONS
++----------------
++
++There are two functions for accessing an associative array:
++
++ (1) Iterate over all the objects in an associative array.
++
++	int assoc_array_iterate(const struct assoc_array *array,
++				int (*iterator)(const void *object,
++						void *iterator_data),
++				void *iterator_data);
++
++     This passes each object in the array to the iterator callback function.
++     iterator_data is private data for that function.
++
++     This may be used on an array at the same time as the array is being
++     modified, provided the RCU read lock is held.  Under such circumstances,
++     it is possible for the iteration function to see some objects twice.  If
++     this is a problem, then modification should be locked against.  The
++     iteration algorithm should not, however, miss any objects.
++
++     The function will return 0 if no objects were in the array or else it will
++     return the result of the last iterator function called.  Iteration stops
++     immediately if any call to the iteration function results in a non-zero
++     return.
++
++
++ (2) Find an object in an associative array.
++
++	void *assoc_array_find(const struct assoc_array *array,
++			       const struct assoc_array_ops *ops,
++			       const void *index_key);
++
++     This walks through the array's internal tree directly to the object
++     specified by the index key..
++
++     This may be used on an array at the same time as the array is being
++     modified, provided the RCU read lock is held.
++
++     The function will return the object if found (and set *_type to the object
++     type) or will return NULL if the object was not found.
++
++
++INDEX KEY FORM
++--------------
++
++The index key can be of any form, but since the algorithms aren't told how long
++the key is, it is strongly recommended that the index key includes its length
++very early on before any variation due to the length would have an effect on
++comparisons.
++
++This will cause leaves with different length keys to scatter away from each
++other - and those with the same length keys to cluster together.
++
++It is also recommended that the index key begin with a hash of the rest of the
++key to maximise scattering throughout keyspace.
++
++The better the scattering, the wider and lower the internal tree will be.
++
++Poor scattering isn't too much of a problem as there are shortcuts and nodes
++can contain mixtures of leaves and metadata pointers.
++
++The index key is read in chunks of machine word.  Each chunk is subdivided into
++one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and
++on a 64-bit CPU, 16 levels.  Unless the scattering is really poor, it is
++unlikely that more than one word of any particular index key will have to be
++used.
++
++
++=================
++INTERNAL WORKINGS
++=================
++
++The associative array data structure has an internal tree.  This tree is
++constructed of two types of metadata blocks: nodes and shortcuts.
++
++A node is an array of slots.  Each slot can contain one of four things:
++
++ (*) A NULL pointer, indicating that the slot is empty.
++
++ (*) A pointer to an object (a leaf).
++
++ (*) A pointer to a node at the next level.
++
++ (*) A pointer to a shortcut.
++
++
++BASIC INTERNAL TREE LAYOUT
++--------------------------
++
++Ignoring shortcuts for the moment, the nodes form a multilevel tree.  The index
++key space is strictly subdivided by the nodes in the tree and nodes occur on
++fixed levels.  For example:
++
++ Level:	0		1		2		3
++	===============	===============	===============	===============
++							NODE D
++			NODE B		NODE C	+------>+---+
++		+------>+---+	+------>+---+	|	| 0 |
++	NODE A	|	| 0 |	|	| 0 |	|	+---+
++	+---+	|	+---+	|	+---+	|	:   :
++	| 0 |	|	:   :	|	:   :	|	+---+
++	+---+	|	+---+	|	+---+	|	| f |
++	| 1 |---+	| 3 |---+	| 7 |---+	+---+
++	+---+		+---+		+---+
++	:   :		:   :		| 8 |---+
++	+---+		+---+		+---+	|	NODE E
++	| e |---+	| f |		:   :   +------>+---+
++	+---+	|	+---+		+---+		| 0 |
++	| f |	|			| f |		+---+
++	+---+	|			+---+		:   :
++		|	NODE F				+---+
++		+------>+---+				| f |
++			| 0 |		NODE G		+---+
++			+---+	+------>+---+
++			:   :	|	| 0 |
++			+---+	|	+---+
++			| 6 |---+	:   :
++			+---+		+---+
++			:   :		| f |
++			+---+		+---+
++			| f |
++			+---+
++
++In the above example, there are 7 nodes (A-G), each with 16 slots (0-f).
++Assuming no other meta data nodes in the tree, the key space is divided thusly:
++
++	KEY PREFIX	NODE
++	==========	====
++	137*		D
++	138*		E
++	13[0-69-f]*	C
++	1[0-24-f]*	B
++	e6*		G
++	e[0-57-f]*	F
++	[02-df]*	A
++
++So, for instance, keys with the following example index keys will be found in
++the appropriate nodes:
++
++	INDEX KEY	PREFIX	NODE
++	===============	=======	====
++	13694892892489	13	C
++	13795289025897	137	D
++	13889dde88793	138	E
++	138bbb89003093	138	E
++	1394879524789	12	C
++	1458952489	1	B
++	9431809de993ba	-	A
++	b4542910809cd	-	A
++	e5284310def98	e	F
++	e68428974237	e6	G
++	e7fffcbd443	e	F
++	f3842239082	-	A
++
++To save memory, if a node can hold all the leaves in its portion of keyspace,
++then the node will have all those leaves in it and will not have any metadata
++pointers - even if some of those leaves would like to be in the same slot.
++
++A node can contain a heterogeneous mix of leaves and metadata pointers.
++Metadata pointers must be in the slots that match their subdivisions of key
++space.  The leaves can be in any slot not occupied by a metadata pointer.  It
++is guaranteed that none of the leaves in a node will match a slot occupied by a
++metadata pointer.  If the metadata pointer is there, any leaf whose key matches
++the metadata key prefix must be in the subtree that the metadata pointer points
++to.
++
++In the above example list of index keys, node A will contain:
++
++	SLOT	CONTENT		INDEX KEY (PREFIX)
++	====	===============	==================
++	1	PTR TO NODE B	1*
++	any	LEAF		9431809de993ba
++	any	LEAF		b4542910809cd
++	e	PTR TO NODE F	e*
++	any	LEAF		f3842239082
++
++and node B:
++
++	3	PTR TO NODE C	13*
++	any	LEAF		1458952489
++
++
++SHORTCUTS
++---------
++
++Shortcuts are metadata records that jump over a piece of keyspace.  A shortcut
++is a replacement for a series of single-occupancy nodes ascending through the
++levels.  Shortcuts exist to save memory and to speed up traversal.
++
++It is possible for the root of the tree to be a shortcut - say, for example,
++the tree contains at least 17 nodes all with key prefix '1111'.  The insertion
++algorithm will insert a shortcut to skip over the '1111' keyspace in a single
++bound and get to the fourth level where these actually become different.
++
++
++SPLITTING AND COLLAPSING NODES
++------------------------------
++
++Each node has a maximum capacity of 16 leaves and metadata pointers.  If the
++insertion algorithm finds that it is trying to insert a 17th object into a
++node, that node will be split such that at least two leaves that have a common
++key segment at that level end up in a separate node rooted on that slot for
++that common key segment.
++
++If the leaves in a full node and the leaf that is being inserted are
++sufficiently similar, then a shortcut will be inserted into the tree.
++
++When the number of objects in the subtree rooted at a node falls to 16 or
++fewer, then the subtree will be collapsed down to a single node - and this will
++ripple towards the root if possible.
++
++
++NON-RECURSIVE ITERATION
++-----------------------
++
++Each node and shortcut contains a back pointer to its parent and the number of
++slot in that parent that points to it.  None-recursive iteration uses these to
++proceed rootwards through the tree, going to the parent node, slot N + 1 to
++make sure progress is made without the need for a stack.
++
++The backpointers, however, make simultaneous alteration and iteration tricky.
++
++
++SIMULTANEOUS ALTERATION AND ITERATION
++-------------------------------------
++
++There are a number of cases to consider:
++
++ (1) Simple insert/replace.  This involves simply replacing a NULL or old
++     matching leaf pointer with the pointer to the new leaf after a barrier.
++     The metadata blocks don't change otherwise.  An old leaf won't be freed
++     until after the RCU grace period.
++
++ (2) Simple delete.  This involves just clearing an old matching leaf.  The
++     metadata blocks don't change otherwise.  The old leaf won't be freed until
++     after the RCU grace period.
++
++ (3) Insertion replacing part of a subtree that we haven't yet entered.  This
++     may involve replacement of part of that subtree - but that won't affect
++     the iteration as we won't have reached the pointer to it yet and the
++     ancestry blocks are not replaced (the layout of those does not change).
++
++ (4) Insertion replacing nodes that we're actively processing.  This isn't a
++     problem as we've passed the anchoring pointer and won't switch onto the
++     new layout until we follow the back pointers - at which point we've
++     already examined the leaves in the replaced node (we iterate over all the
++     leaves in a node before following any of its metadata pointers).
++
++     We might, however, re-see some leaves that have been split out into a new
++     branch that's in a slot further along than we were at.
++
++ (5) Insertion replacing nodes that we're processing a dependent branch of.
++     This won't affect us until we follow the back pointers.  Similar to (4).
++
++ (6) Deletion collapsing a branch under us.  This doesn't affect us because the
++     back pointers will get us back to the parent of the new node before we
++     could see the new node.  The entire collapsed subtree is thrown away
++     unchanged - and will still be rooted on the same slot, so we shouldn't
++     process it a second time as we'll go back to slot + 1.
++
++Note:
++
++ (*) Under some circumstances, we need to simultaneously change the parent
++     pointer and the parent slot pointer on a node (say, for example, we
++     inserted another node before it and moved it up a level).  We cannot do
++     this without locking against a read - so we have to replace that node too.
++
++     However, when we're changing a shortcut into a node this isn't a problem
++     as shortcuts only have one slot and so the parent slot number isn't used
++     when traversing backwards over one.  This means that it's okay to change
++     the slot number first - provided suitable barriers are used to make sure
++     the parent slot number is read after the back pointer.
++
++Obsolete blocks and leaves are freed up after an RCU grace period has passed,
++so as long as anyone doing walking or iteration holds the RCU read lock, the
++old superstructure should not go away on them.
+diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h
+new file mode 100644
+index 0000000..9a193b8
+--- /dev/null
++++ b/include/linux/assoc_array.h
+@@ -0,0 +1,92 @@
++/* Generic associative array implementation.
++ *
++ * See Documentation/assoc_array.txt for information.
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells at redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++
++#ifndef _LINUX_ASSOC_ARRAY_H
++#define _LINUX_ASSOC_ARRAY_H
++
++#ifdef CONFIG_ASSOCIATIVE_ARRAY
++
++#include <linux/types.h>
++
++#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */
++
++/*
++ * Generic associative array.
++ */
++struct assoc_array {
++	struct assoc_array_ptr	*root;		/* The node at the root of the tree */
++	unsigned long		nr_leaves_on_tree;
++};
++
++/*
++ * Operations on objects and index keys for use by array manipulation routines.
++ */
++struct assoc_array_ops {
++	/* Method to get a chunk of an index key from caller-supplied data */
++	unsigned long (*get_key_chunk)(const void *index_key, int level);
++
++	/* Method to get a piece of an object's index key */
++	unsigned long (*get_object_key_chunk)(const void *object, int level);
++
++	/* Is this the object we're looking for? */
++	bool (*compare_object)(const void *object, const void *index_key);
++
++	/* How different are two objects, to a bit position in their keys? (or
++	 * -1 if they're the same)
++	 */
++	int (*diff_objects)(const void *a, const void *b);
++
++	/* Method to free an object. */
++	void (*free_object)(void *object);
++};
++
++/*
++ * Access and manipulation functions.
++ */
++struct assoc_array_edit;
++
++static inline void assoc_array_init(struct assoc_array *array)
++{
++	array->root = NULL;
++	array->nr_leaves_on_tree = 0;
++}
++
++extern int assoc_array_iterate(const struct assoc_array *array,
++			       int (*iterator)(const void *object,
++					       void *iterator_data),
++			       void *iterator_data);
++extern void *assoc_array_find(const struct assoc_array *array,
++			      const struct assoc_array_ops *ops,
++			      const void *index_key);
++extern void assoc_array_destroy(struct assoc_array *array,
++				const struct assoc_array_ops *ops);
++extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
++						   const struct assoc_array_ops *ops,
++						   const void *index_key,
++						   void *object);
++extern void assoc_array_insert_set_object(struct assoc_array_edit *edit,
++					  void *object);
++extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
++						   const struct assoc_array_ops *ops,
++						   const void *index_key);
++extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
++						  const struct assoc_array_ops *ops);
++extern void assoc_array_apply_edit(struct assoc_array_edit *edit);
++extern void assoc_array_cancel_edit(struct assoc_array_edit *edit);
++extern int assoc_array_gc(struct assoc_array *array,
++			  const struct assoc_array_ops *ops,
++			  bool (*iterator)(void *object, void *iterator_data),
++			  void *iterator_data);
++
++#endif /* CONFIG_ASSOCIATIVE_ARRAY */
++#endif /* _LINUX_ASSOC_ARRAY_H */
+diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h
+new file mode 100644
+index 0000000..711275e
+--- /dev/null
++++ b/include/linux/assoc_array_priv.h
+@@ -0,0 +1,182 @@
++/* Private definitions for the generic associative array implementation.
++ *
++ * See Documentation/assoc_array.txt for information.
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells at redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++
++#ifndef _LINUX_ASSOC_ARRAY_PRIV_H
++#define _LINUX_ASSOC_ARRAY_PRIV_H
++
++#ifdef CONFIG_ASSOCIATIVE_ARRAY
++
++#include <linux/assoc_array.h>
++
++#define ASSOC_ARRAY_FAN_OUT		16	/* Number of slots per node */
++#define ASSOC_ARRAY_FAN_MASK		(ASSOC_ARRAY_FAN_OUT - 1)
++#define ASSOC_ARRAY_LEVEL_STEP		(ilog2(ASSOC_ARRAY_FAN_OUT))
++#define ASSOC_ARRAY_LEVEL_STEP_MASK	(ASSOC_ARRAY_LEVEL_STEP - 1)
++#define ASSOC_ARRAY_KEY_CHUNK_MASK	(ASSOC_ARRAY_KEY_CHUNK_SIZE - 1)
++#define ASSOC_ARRAY_KEY_CHUNK_SHIFT	(ilog2(BITS_PER_LONG))
++
++/*
++ * Undefined type representing a pointer with type information in the bottom
++ * two bits.
++ */
++struct assoc_array_ptr;
++
++/*
++ * An N-way node in the tree.
++ *
++ * Each slot contains one of four things:
++ *
++ *	(1) Nothing (NULL).
++ *
++ *	(2) A leaf object (pointer types 0).
++ *
++ *	(3) A next-level node (pointer type 1, subtype 0).
++ *
++ *	(4) A shortcut (pointer type 1, subtype 1).
++ *
++ * The tree is optimised for search-by-ID, but permits reasonable iteration
++ * also.
++ *
++ * The tree is navigated by constructing an index key consisting of an array of
++ * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size.
++ *
++ * The segments correspond to levels of the tree (the first segment is used at
++ * level 0, the second at level 1, etc.).
++ */
++struct assoc_array_node {
++	struct assoc_array_ptr	*back_pointer;
++	u8			parent_slot;
++	struct assoc_array_ptr	*slots[ASSOC_ARRAY_FAN_OUT];
++	unsigned long		nr_leaves_on_branch;
++};
++
++/*
++ * A shortcut through the index space out to where a collection of nodes/leaves
++ * with the same IDs live.
++ */
++struct assoc_array_shortcut {
++	struct assoc_array_ptr	*back_pointer;
++	int			parent_slot;
++	int			skip_to_level;
++	struct assoc_array_ptr	*next_node;
++	unsigned long		index_key[];
++};
++
++/*
++ * Preallocation cache.
++ */
++struct assoc_array_edit {
++	struct rcu_head			rcu;
++	struct assoc_array		*array;
++	const struct assoc_array_ops	*ops;
++	const struct assoc_array_ops	*ops_for_excised_subtree;
++	struct assoc_array_ptr		*leaf;
++	struct assoc_array_ptr		**leaf_p;
++	struct assoc_array_ptr		*dead_leaf;
++	struct assoc_array_ptr		*new_meta[3];
++	struct assoc_array_ptr		*excised_meta[1];
++	struct assoc_array_ptr		*excised_subtree;
++	struct assoc_array_ptr		**set_backpointers[ASSOC_ARRAY_FAN_OUT];
++	struct assoc_array_ptr		*set_backpointers_to;
++	struct assoc_array_node		*adjust_count_on;
++	long				adjust_count_by;
++	struct {
++		struct assoc_array_ptr	**ptr;
++		struct assoc_array_ptr	*to;
++	} set[2];
++	struct {
++		u8			*p;
++		u8			to;
++	} set_parent_slot[1];
++	u8				segment_cache[ASSOC_ARRAY_FAN_OUT + 1];
++};
++
++/*
++ * Internal tree member pointers are marked in the bottom one or two bits to
++ * indicate what type they are so that we don't have to look behind every
++ * pointer to see what it points to.
++ *
++ * We provide functions to test type annotations and to create and translate
++ * the annotated pointers.
++ */
++#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL
++#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL	/* Points to leaf (or nowhere) */
++#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL	/* Points to node or shortcut */
++#define ASSOC_ARRAY_PTR_SUBTYPE_MASK	0x2UL
++#define ASSOC_ARRAY_PTR_NODE_SUBTYPE	0x0UL
++#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL
++
++static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x)
++{
++	return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK;
++}
++static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x)
++{
++	return !assoc_array_ptr_is_meta(x);
++}
++static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x)
++{
++	return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK;
++}
++static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x)
++{
++	return !assoc_array_ptr_is_shortcut(x);
++}
++
++static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x)
++{
++	return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK);
++}
++
++static inline
++unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x)
++{
++	return (unsigned long)x &
++		~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK);
++}
++static inline
++struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x)
++{
++	return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x);
++}
++static inline
++struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x)
++{
++	return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x);
++}
++
++static inline
++struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t)
++{
++	return (struct assoc_array_ptr *)((unsigned long)p | t);
++}
++static inline
++struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p)
++{
++	return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE);
++}
++static inline
++struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p)
++{
++	return __assoc_array_x_to_ptr(
++		p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE);
++}
++static inline
++struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p)
++{
++	return __assoc_array_x_to_ptr(
++		p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE);
++}
++
++#endif /* CONFIG_ASSOCIATIVE_ARRAY */
++#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */
+diff --git a/lib/Kconfig b/lib/Kconfig
+index 35da513..b7d3234 100644
+--- a/lib/Kconfig
++++ b/lib/Kconfig
+@@ -312,6 +312,20 @@ config TEXTSEARCH_FSM
+ config BTREE
+ 	boolean
+ 
++config ASSOCIATIVE_ARRAY
++	bool
++	help
++	  Generic associative array.  Can be searched and iterated over whilst
++	  it is being modified.  It is also reasonably quick to search and
++	  modify.  The algorithms are non-recursive, and the trees are highly
++	  capacious.
++
++	  See:
++
++		Documentation/assoc_array.txt
++
++	  for more information.
++
+ config HAS_IOMEM
+ 	boolean
+ 	depends on !NO_IOMEM
+diff --git a/lib/Makefile b/lib/Makefile
+index 7baccfd..2c59891 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -49,6 +49,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
+ obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+ 
+ obj-$(CONFIG_BTREE) += btree.o
++obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
+ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
+ obj-$(CONFIG_DEBUG_LIST) += list_debug.o
+ obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
+diff --git a/lib/assoc_array.c b/lib/assoc_array.c
+new file mode 100644
+index 0000000..a095281
+--- /dev/null
++++ b/lib/assoc_array.c
+@@ -0,0 +1,1745 @@
++/* Generic associative array implementation.
++ *
++ * See Documentation/assoc_array.txt for information.
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells at redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++//#define DEBUG
++#include <linux/slab.h>
++#include <linux/assoc_array_priv.h>
++
++/*
++ * Iterate over an associative array.  The caller must hold the RCU read lock
++ * or better.
++ */
++static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
++				       const struct assoc_array_ptr *stop,
++				       int (*iterator)(const void *leaf,
++						       void *iterator_data),
++				       void *iterator_data)
++{
++	const struct assoc_array_shortcut *shortcut;
++	const struct assoc_array_node *node;
++	const struct assoc_array_ptr *cursor, *ptr, *parent;
++	unsigned long has_meta;
++	int slot, ret;
++
++	cursor = root;
++
++begin_node:
++	if (assoc_array_ptr_is_shortcut(cursor)) {
++		/* Descend through a shortcut */
++		shortcut = assoc_array_ptr_to_shortcut(cursor);
++		smp_read_barrier_depends();
++		cursor = ACCESS_ONCE(shortcut->next_node);
++	}
++
++	node = assoc_array_ptr_to_node(cursor);
++	smp_read_barrier_depends();
++	slot = 0;
++
++	/* We perform two passes of each node.
++	 *
++	 * The first pass does all the leaves in this node.  This means we
++	 * don't miss any leaves if the node is split up by insertion whilst
++	 * we're iterating over the branches rooted here (we may, however, see
++	 * some leaves twice).
++	 */
++	has_meta = 0;
++	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++		ptr = ACCESS_ONCE(node->slots[slot]);
++		has_meta |= (unsigned long)ptr;
++		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
++			/* We need a barrier between the read of the pointer
++			 * and dereferencing the pointer - but only if we are
++			 * actually going to dereference it.
++			 */
++			smp_read_barrier_depends();
++
++			/* Invoke the callback */
++			ret = iterator(assoc_array_ptr_to_leaf(ptr),
++				       iterator_data);
++			if (ret)
++				return ret;
++		}
++	}
++
++	/* The second pass attends to all the metadata pointers.  If we follow
++	 * one of these we may find that we don't come back here, but rather go
++	 * back to a replacement node with the leaves in a different layout.
++	 *
++	 * We are guaranteed to make progress, however, as the slot number for
++	 * a particular portion of the key space cannot change - and we
++	 * continue at the back pointer + 1.
++	 */
++	if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
++		goto finished_node;
++	slot = 0;
++
++continue_node:
++	node = assoc_array_ptr_to_node(cursor);
++	smp_read_barrier_depends();
++
++	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++		ptr = ACCESS_ONCE(node->slots[slot]);
++		if (assoc_array_ptr_is_meta(ptr)) {
++			cursor = ptr;
++			goto begin_node;
++		}
++	}
++
++finished_node:
++	/* Move up to the parent (may need to skip back over a shortcut) */
++	parent = ACCESS_ONCE(node->back_pointer);
++	slot = node->parent_slot;
++	if (parent == stop)
++		return 0;
++
++	if (assoc_array_ptr_is_shortcut(parent)) {
++		shortcut = assoc_array_ptr_to_shortcut(parent);
++		smp_read_barrier_depends();
++		cursor = parent;
++		parent = ACCESS_ONCE(shortcut->back_pointer);
++		slot = shortcut->parent_slot;
++		if (parent == stop)
++			return 0;
++	}
++
++	/* Ascend to next slot in parent node */
++	cursor = parent;
++	slot++;
++	goto continue_node;
++}
++
++/**
++ * assoc_array_iterate - Pass all objects in the array to a callback
++ * @array: The array to iterate over.
++ * @iterator: The callback function.
++ * @iterator_data: Private data for the callback function.
++ *
++ * Iterate over all the objects in an associative array.  Each one will be
++ * presented to the iterator function.
++ *
++ * If the array is being modified concurrently with the iteration then it is
++ * possible that some objects in the array will be passed to the iterator
++ * callback more than once - though every object should be passed at least
++ * once.  If this is undesirable then the caller must lock against modification
++ * for the duration of this function.
++ *
++ * The function will return 0 if no objects were in the array or else it will
++ * return the result of the last iterator function called.  Iteration stops
++ * immediately if any call to the iteration function results in a non-zero
++ * return.
++ *
++ * The caller should hold the RCU read lock or better if concurrent
++ * modification is possible.
++ */
++int assoc_array_iterate(const struct assoc_array *array,
++			int (*iterator)(const void *object,
++					void *iterator_data),
++			void *iterator_data)
++{
++	struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
++
++	if (!root)
++		return 0;
++	return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
++}
++
++enum assoc_array_walk_status {
++	assoc_array_walk_tree_empty,
++	assoc_array_walk_found_terminal_node,
++	assoc_array_walk_found_wrong_shortcut,
++} status;
++
++struct assoc_array_walk_result {
++	struct {
++		struct assoc_array_node	*node;	/* Node in which leaf might be found */
++		int		level;
++		int		slot;
++	} terminal_node;
++	struct {
++		struct assoc_array_shortcut *shortcut;
++		int		level;
++		int		sc_level;
++		unsigned long	sc_segments;
++		unsigned long	dissimilarity;
++	} wrong_shortcut;
++};
++
++/*
++ * Navigate through the internal tree looking for the closest node to the key.
++ */
++static enum assoc_array_walk_status
++assoc_array_walk(const struct assoc_array *array,
++		 const struct assoc_array_ops *ops,
++		 const void *index_key,
++		 struct assoc_array_walk_result *result)
++{
++	struct assoc_array_shortcut *shortcut;
++	struct assoc_array_node *node;
++	struct assoc_array_ptr *cursor, *ptr;
++	unsigned long sc_segments, dissimilarity;
++	unsigned long segments;
++	int level, sc_level, next_sc_level;
++	int slot;
++
++	pr_devel("-->%s()\n", __func__);
++
++	cursor = ACCESS_ONCE(array->root);
++	if (!cursor)
++		return assoc_array_walk_tree_empty;
++
++	level = 0;
++
++	/* Use segments from the key for the new leaf to navigate through the
++	 * internal tree, skipping through nodes and shortcuts that are on
++	 * route to the destination.  Eventually we'll come to a slot that is
++	 * either empty or contains a leaf at which point we've found a node in
++	 * which the leaf we're looking for might be found or into which it
++	 * should be inserted.
++	 */
++jumped:
++	segments = ops->get_key_chunk(index_key, level);
++	pr_devel("segments[%d]: %lx\n", level, segments);
++
++	if (assoc_array_ptr_is_shortcut(cursor))
++		goto follow_shortcut;
++
++consider_node:
++	node = assoc_array_ptr_to_node(cursor);
++	smp_read_barrier_depends();
++
++	slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
++	slot &= ASSOC_ARRAY_FAN_MASK;
++	ptr = ACCESS_ONCE(node->slots[slot]);
++
++	pr_devel("consider slot %x [ix=%d type=%lu]\n",
++		 slot, level, (unsigned long)ptr & 3);
++
++	if (!assoc_array_ptr_is_meta(ptr)) {
++		/* The node doesn't have a node/shortcut pointer in the slot
++		 * corresponding to the index key that we have to follow.
++		 */
++		result->terminal_node.node = node;
++		result->terminal_node.level = level;
++		result->terminal_node.slot = slot;
++		pr_devel("<--%s() = terminal_node\n", __func__);
++		return assoc_array_walk_found_terminal_node;
++	}
++
++	if (assoc_array_ptr_is_node(ptr)) {
++		/* There is a pointer to a node in the slot corresponding to
++		 * this index key segment, so we need to follow it.
++		 */
++		cursor = ptr;
++		level += ASSOC_ARRAY_LEVEL_STEP;
++		if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
++			goto consider_node;
++		goto jumped;
++	}
++
++	/* There is a shortcut in the slot corresponding to the index key
++	 * segment.  We follow the shortcut if its partial index key matches
++	 * this leaf's.  Otherwise we need to split the shortcut.
++	 */
++	cursor = ptr;
++follow_shortcut:
++	shortcut = assoc_array_ptr_to_shortcut(cursor);
++	smp_read_barrier_depends();
++	pr_devel("shortcut to %d\n", shortcut->skip_to_level);
++	sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
++	BUG_ON(sc_level > shortcut->skip_to_level);
++
++	do {
++		/* Check the leaf against the shortcut's index key a word at a
++		 * time, trimming the final word (the shortcut stores the index
++		 * key completely from the root to the shortcut's target).
++		 */
++		if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
++			segments = ops->get_key_chunk(index_key, sc_level);
++
++		sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
++		dissimilarity = segments ^ sc_segments;
++
++		if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
++			/* Trim segments that are beyond the shortcut */
++			int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
++			dissimilarity &= ~(ULONG_MAX << shift);
++			next_sc_level = shortcut->skip_to_level;
++		} else {
++			next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
++			next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
++		}
++
++		if (dissimilarity != 0) {
++			/* This shortcut points elsewhere */
++			result->wrong_shortcut.shortcut = shortcut;
++			result->wrong_shortcut.level = level;
++			result->wrong_shortcut.sc_level = sc_level;
++			result->wrong_shortcut.sc_segments = sc_segments;
++			result->wrong_shortcut.dissimilarity = dissimilarity;
++			return assoc_array_walk_found_wrong_shortcut;
++		}
++
++		sc_level = next_sc_level;
++	} while (sc_level < shortcut->skip_to_level);
++
++	/* The shortcut matches the leaf's index to this point. */
++	cursor = ACCESS_ONCE(shortcut->next_node);
++	if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
++		level = sc_level;
++		goto jumped;
++	} else {
++		level = sc_level;
++		goto consider_node;
++	}
++}
++
++/**
++ * assoc_array_find - Find an object by index key
++ * @array: The associative array to search.
++ * @ops: The operations to use.
++ * @index_key: The key to the object.
++ *
++ * Find an object in an associative array by walking through the internal tree
++ * to the node that should contain the object and then searching the leaves
++ * there.  NULL is returned if the requested object was not found in the array.
++ *
++ * The caller must hold the RCU read lock or better.
++ */
++void *assoc_array_find(const struct assoc_array *array,
++		       const struct assoc_array_ops *ops,
++		       const void *index_key)
++{
++	struct assoc_array_walk_result result;
++	const struct assoc_array_node *node;
++	const struct assoc_array_ptr *ptr;
++	const void *leaf;
++	int slot;
++
++	if (assoc_array_walk(array, ops, index_key, &result) !=
++	    assoc_array_walk_found_terminal_node)
++		return NULL;
++
++	node = result.terminal_node.node;
++	smp_read_barrier_depends();
++
++	/* If the target key is available to us, it's has to be pointed to by
++	 * the terminal node.
++	 */
++	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++		ptr = ACCESS_ONCE(node->slots[slot]);
++		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
++			/* We need a barrier between the read of the pointer
++			 * and dereferencing the pointer - but only if we are
++			 * actually going to dereference it.
++			 */
++			leaf = assoc_array_ptr_to_leaf(ptr);
++			smp_read_barrier_depends();
++			if (ops->compare_object(leaf, index_key))
++				return (void *)leaf;
++		}
++	}
++
++	return NULL;
++}
++
++/*
++ * Destructively iterate over an associative array.  The caller must prevent
++ * other simultaneous accesses.
++ */
++static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
++					const struct assoc_array_ops *ops)
++{
++	struct assoc_array_shortcut *shortcut;
++	struct assoc_array_node *node;
++	struct assoc_array_ptr *cursor, *parent = NULL;
++	int slot = -1;
++
++	pr_devel("-->%s()\n", __func__);
++
++	cursor = root;
++	if (!cursor) {
++		pr_devel("empty\n");
++		return;
++	}
++
++move_to_meta:
++	if (assoc_array_ptr_is_shortcut(cursor)) {
++		/* Descend through a shortcut */
++		pr_devel("[%d] shortcut\n", slot);
++		BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
++		shortcut = assoc_array_ptr_to_shortcut(cursor);
++		BUG_ON(shortcut->back_pointer != parent);
++		BUG_ON(slot != -1 && shortcut->parent_slot != slot);
++		parent = cursor;
++		cursor = shortcut->next_node;
++		slot = -1;
++		BUG_ON(!assoc_array_ptr_is_node(cursor));
++	}
++
++	pr_devel("[%d] node\n", slot);
++	node = assoc_array_ptr_to_node(cursor);
++	BUG_ON(node->back_pointer != parent);
++	BUG_ON(slot != -1 && node->parent_slot != slot);
++	slot = 0;
++
++continue_node:
++	pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
++	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++		struct assoc_array_ptr *ptr = node->slots[slot];
++		if (!ptr)
++			continue;
++		if (assoc_array_ptr_is_meta(ptr)) {
++			parent = cursor;
++			cursor = ptr;
++			goto move_to_meta;
++		}
++
++		if (ops) {
++			pr_devel("[%d] free leaf\n", slot);
++			ops->free_object(assoc_array_ptr_to_leaf(ptr));
++		}
++	}
++
++	parent = node->back_pointer;
++	slot = node->parent_slot;
++	pr_devel("free node\n");
++	kfree(node);
++	if (!parent)
++		return; /* Done */
++
++	/* Move back up to the parent (may need to free a shortcut on
++	 * the way up) */
++	if (assoc_array_ptr_is_shortcut(parent)) {
++		shortcut = assoc_array_ptr_to_shortcut(parent);
++		BUG_ON(shortcut->next_node != cursor);
++		cursor = parent;
++		parent = shortcut->back_pointer;
++		slot = shortcut->parent_slot;
++		pr_devel("free shortcut\n");
++		kfree(shortcut);
++		if (!parent)
++			return;
++
++		BUG_ON(!assoc_array_ptr_is_node(parent));
++	}
++
++	/* Ascend to next slot in parent node */
++	pr_devel("ascend to %p[%d]\n", parent, slot);
++	cursor = parent;
++	node = assoc_array_ptr_to_node(cursor);
++	slot++;
++	goto continue_node;
++}
++
++/**
++ * assoc_array_destroy - Destroy an associative array
++ * @array: The array to destroy.
++ * @ops: The operations to use.
++ *
++ * Discard all metadata and free all objects in an associative array.  The
++ * array will be empty and ready to use again upon completion.  This function
++ * cannot fail.
++ *
++ * The caller must prevent all other accesses whilst this takes place as no
++ * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
++ * accesses to continue.  On the other hand, no memory allocation is required.
++ */
++void assoc_array_destroy(struct assoc_array *array,
++			 const struct assoc_array_ops *ops)
++{
++	assoc_array_destroy_subtree(array->root, ops);
++	array->root = NULL;
++}
++
++/*
++ * Handle insertion into an empty tree.
++ */
++static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
++{
++	struct assoc_array_node *new_n0;
++
++	pr_devel("-->%s()\n", __func__);
++
++	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++	if (!new_n0)
++		return false;
++
++	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
++	edit->leaf_p = &new_n0->slots[0];
++	edit->adjust_count_on = new_n0;
++	edit->set[0].ptr = &edit->array->root;
++	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
++
++	pr_devel("<--%s() = ok [no root]\n", __func__);
++	return true;
++}
++
++/*
++ * Handle insertion into a terminal node.
++ */
++static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
++						  const struct assoc_array_ops *ops,
++						  const void *index_key,
++						  struct assoc_array_walk_result *result)
++{
++	struct assoc_array_shortcut *shortcut, *new_s0;
++	struct assoc_array_node *node, *new_n0, *new_n1, *side;
++	struct assoc_array_ptr *ptr;
++	unsigned long dissimilarity, base_seg, blank;
++	size_t keylen;
++	bool have_meta;
++	int level, diff;
++	int slot, next_slot, free_slot, i, j;
++
++	node	= result->terminal_node.node;
++	level	= result->terminal_node.level;
++	edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
++
++	pr_devel("-->%s()\n", __func__);
++
++	/* We arrived at a node which doesn't have an onward node or shortcut
++	 * pointer that we have to follow.  This means that (a) the leaf we
++	 * want must go here (either by insertion or replacement) or (b) we
++	 * need to split this node and insert in one of the fragments.
++	 */
++	free_slot = -1;
++
++	/* Firstly, we have to check the leaves in this node to see if there's
++	 * a matching one we should replace in place.
++	 */
++	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++		ptr = node->slots[i];
++		if (!ptr) {
++			free_slot = i;
++			continue;
++		}
++		if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
++			pr_devel("replace in slot %d\n", i);
++			edit->leaf_p = &node->slots[i];
++			edit->dead_leaf = node->slots[i];
++			pr_devel("<--%s() = ok [replace]\n", __func__);
++			return true;
++		}
++	}
++
++	/* If there is a free slot in this node then we can just insert the
++	 * leaf here.
++	 */
++	if (free_slot >= 0) {
++		pr_devel("insert in free slot %d\n", free_slot);
++		edit->leaf_p = &node->slots[free_slot];
++		edit->adjust_count_on = node;
++		pr_devel("<--%s() = ok [insert]\n", __func__);
++		return true;
++	}
++
++	/* The node has no spare slots - so we're either going to have to split
++	 * it or insert another node before it.
++	 *
++	 * Whatever, we're going to need at least two new nodes - so allocate
++	 * those now.  We may also need a new shortcut, but we deal with that
++	 * when we need it.
++	 */
++	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++	if (!new_n0)
++		return false;
++	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
++	new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++	if (!new_n1)
++		return false;
++	edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
++
++	/* We need to find out how similar the leaves are. */
++	pr_devel("no spare slots\n");
++	have_meta = false;
++	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++		ptr = node->slots[i];
++		if (assoc_array_ptr_is_meta(ptr)) {
++			edit->segment_cache[i] = 0xff;
++			have_meta = true;
++			continue;
++		}
++		base_seg = ops->get_object_key_chunk(
++			assoc_array_ptr_to_leaf(ptr), level);
++		base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
++		edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
++	}
++
++	if (have_meta) {
++		pr_devel("have meta\n");
++		goto split_node;
++	}
++
++	/* The node contains only leaves */
++	dissimilarity = 0;
++	base_seg = edit->segment_cache[0];
++	for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
++		dissimilarity |= edit->segment_cache[i] ^ base_seg;
++
++	pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
++
++	if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
++		/* The old leaves all cluster in the same slot.  We will need
++		 * to insert a shortcut if the new node wants to cluster with them.
++		 */
++		if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
++			goto all_leaves_cluster_together;
++
++		/* Otherwise we can just insert a new node ahead of the old
++		 * one.
++		 */
++		goto present_leaves_cluster_but_not_new_leaf;
++	}
++
++split_node:
++	pr_devel("split node\n");
++
++	/* We need to split the current node; we know that the node doesn't
++	 * simply contain a full set of leaves that cluster together (it
++	 * contains meta pointers and/or non-clustering leaves).
++	 *
++	 * We need to expel at least two leaves out of a set consisting of the
++	 * leaves in the node and the new leaf.
++	 *
++	 * We need a new node (n0) to replace the current one and a new node to
++	 * take the expelled nodes (n1).
++	 */
++	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
++	new_n0->back_pointer = node->back_pointer;
++	new_n0->parent_slot = node->parent_slot;
++	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
++	new_n1->parent_slot = -1; /* Need to calculate this */
++
++do_split_node:
++	pr_devel("do_split_node\n");
++
++	new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
++	new_n1->nr_leaves_on_branch = 0;
++
++	/* Begin by finding two matching leaves.  There have to be at least two
++	 * that match - even if there are meta pointers - because any leaf that
++	 * would match a slot with a meta pointer in it must be somewhere
++	 * behind that meta pointer and cannot be here.  Further, given N
++	 * remaining leaf slots, we now have N+1 leaves to go in them.
++	 */
++	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++		slot = edit->segment_cache[i];
++		if (slot != 0xff)
++			for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
++				if (edit->segment_cache[j] == slot)
++					goto found_slot_for_multiple_occupancy;
++	}
++found_slot_for_multiple_occupancy:
++	pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
++	BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
++	BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
++	BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
++
++	new_n1->parent_slot = slot;
++
++	/* Metadata pointers cannot change slot */
++	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
++		if (assoc_array_ptr_is_meta(node->slots[i]))
++			new_n0->slots[i] = node->slots[i];
++		else
++			new_n0->slots[i] = NULL;
++	BUG_ON(new_n0->slots[slot] != NULL);
++	new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
++
++	/* Filter the leaf pointers between the new nodes */
++	free_slot = -1;
++	next_slot = 0;
++	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++		if (assoc_array_ptr_is_meta(node->slots[i]))
++			continue;
++		if (edit->segment_cache[i] == slot) {
++			new_n1->slots[next_slot++] = node->slots[i];
++			new_n1->nr_leaves_on_branch++;
++		} else {
++			do {
++				free_slot++;
++			} while (new_n0->slots[free_slot] != NULL);
++			new_n0->slots[free_slot] = node->slots[i];
++		}
++	}
++
++	pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
++
++	if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
++		do {
++			free_slot++;
++		} while (new_n0->slots[free_slot] != NULL);
++		edit->leaf_p = &new_n0->slots[free_slot];
++		edit->adjust_count_on = new_n0;
++	} else {
++		edit->leaf_p = &new_n1->slots[next_slot++];
++		edit->adjust_count_on = new_n1;
++	}
++
++	BUG_ON(next_slot <= 1);
++
++	edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
++	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++		if (edit->segment_cache[i] == 0xff) {
++			ptr = node->slots[i];
++			BUG_ON(assoc_array_ptr_is_leaf(ptr));
++			if (assoc_array_ptr_is_node(ptr)) {
++				side = assoc_array_ptr_to_node(ptr);
++				edit->set_backpointers[i] = &side->back_pointer;
++			} else {
++				shortcut = assoc_array_ptr_to_shortcut(ptr);
++				edit->set_backpointers[i] = &shortcut->back_pointer;
++			}
++		}
++	}
++
++	ptr = node->back_pointer;
++	if (!ptr)
++		edit->set[0].ptr = &edit->array->root;
++	else if (assoc_array_ptr_is_node(ptr))
++		edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
++	else
++		edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
++	edit->excised_meta[0] = assoc_array_node_to_ptr(node);
++	pr_devel("<--%s() = ok [split node]\n", __func__);
++	return true;
++
++present_leaves_cluster_but_not_new_leaf:
++	/* All the old leaves cluster in the same slot, but the new leaf wants
++	 * to go into a different slot, so we create a new node to hold the new
++	 * leaf and a pointer to a new node holding all the old leaves.
++	 */
++	pr_devel("present leaves cluster but not new leaf\n");
++
++	new_n0->back_pointer = node->back_pointer;
++	new_n0->parent_slot = node->parent_slot;
++	new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
++	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
++	new_n1->parent_slot = edit->segment_cache[0];
++	new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
++	edit->adjust_count_on = new_n0;
++
++	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
++		new_n1->slots[i] = node->slots[i];
++
++	new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
++	edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
++
++	edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
++	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
++	edit->excised_meta[0] = assoc_array_node_to_ptr(node);
++	pr_devel("<--%s() = ok [insert node before]\n", __func__);
++	return true;
++
++all_leaves_cluster_together:
++	/* All the leaves, new and old, want to cluster together in this node
++	 * in the same slot, so we have to replace this node with a shortcut to
++	 * skip over the identical parts of the key and then place a pair of
++	 * nodes, one inside the other, at the end of the shortcut and
++	 * distribute the keys between them.
++	 *
++	 * Firstly we need to work out where the leaves start diverging as a
++	 * bit position into their keys so that we know how big the shortcut
++	 * needs to be.
++	 *
++	 * We only need to make a single pass of N of the N+1 leaves because if
++	 * any keys differ between themselves at bit X then at least one of
++	 * them must also differ with the base key at bit X or before.
++	 */
++	pr_devel("all leaves cluster together\n");
++	diff = INT_MAX;
++	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++		int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
++					  assoc_array_ptr_to_leaf(node->slots[i]));
++		if (x < diff) {
++			BUG_ON(x < 0);
++			diff = x;
++		}
++	}
++	BUG_ON(diff == INT_MAX);
++	BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
++
++	keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
++	keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
++
++	new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
++			 keylen * sizeof(unsigned long), GFP_KERNEL);
++	if (!new_s0)
++		return false;
++	edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
++
++	edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
++	new_s0->back_pointer = node->back_pointer;
++	new_s0->parent_slot = node->parent_slot;
++	new_s0->next_node = assoc_array_node_to_ptr(new_n0);
++	new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
++	new_n0->parent_slot = 0;
++	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
++	new_n1->parent_slot = -1; /* Need to calculate this */
++
++	new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
++	pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
++	BUG_ON(level <= 0);
++
++	for (i = 0; i < keylen; i++)
++		new_s0->index_key[i] =
++			ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
++
++	blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
++	pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
++	new_s0->index_key[keylen - 1] &= ~blank;
++
++	/* This now reduces to a node splitting exercise for which we'll need
++	 * to regenerate the disparity table.
++	 */
++	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++		ptr = node->slots[i];
++		base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
++						     level);
++		base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
++		edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
++	}
++
++	base_seg = ops->get_key_chunk(index_key, level);
++	base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
++	edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
++	goto do_split_node;
++}
++
++/*
++ * Handle insertion into the middle of a shortcut.
++ */
++static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
++					    const struct assoc_array_ops *ops,
++					    struct assoc_array_walk_result *result)
++{
++	struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
++	struct assoc_array_node *node, *new_n0, *side;
++	unsigned long sc_segments, dissimilarity, blank;
++	size_t keylen;
++	int level, sc_level, diff;
++	int sc_slot;
++
++	shortcut	= result->wrong_shortcut.shortcut;
++	level		= result->wrong_shortcut.level;
++	sc_level	= result->wrong_shortcut.sc_level;
++	sc_segments	= result->wrong_shortcut.sc_segments;
++	dissimilarity	= result->wrong_shortcut.dissimilarity;
++
++	pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
++		 __func__, level, dissimilarity, sc_level);
++
++	/* We need to split a shortcut and insert a node between the two
++	 * pieces.  Zero-length pieces will be dispensed with entirely.
++	 *
++	 * First of all, we need to find out in which level the first
++	 * difference was.
++	 */
++	diff = __ffs(dissimilarity);
++	diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
++	diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
++	pr_devel("diff=%d\n", diff);
++
++	if (!shortcut->back_pointer) {
++		edit->set[0].ptr = &edit->array->root;
++	} else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
++		node = assoc_array_ptr_to_node(shortcut->back_pointer);
++		edit->set[0].ptr = &node->slots[shortcut->parent_slot];
++	} else {
++		BUG();
++	}
++
++	edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
++
++	/* Create a new node now since we're going to need it anyway */
++	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++	if (!new_n0)
++		return false;
++	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
++	edit->adjust_count_on = new_n0;
++
++	/* Insert a new shortcut before the new node if this segment isn't of
++	 * zero length - otherwise we just connect the new node directly to the
++	 * parent.
++	 */
++	level += ASSOC_ARRAY_LEVEL_STEP;
++	if (diff > level) {
++		pr_devel("pre-shortcut %d...%d\n", level, diff);
++		keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
++		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
++
++		new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
++				 keylen * sizeof(unsigned long), GFP_KERNEL);
++		if (!new_s0)
++			return false;
++		edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
++		edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
++		new_s0->back_pointer = shortcut->back_pointer;
++		new_s0->parent_slot = shortcut->parent_slot;
++		new_s0->next_node = assoc_array_node_to_ptr(new_n0);
++		new_s0->skip_to_level = diff;
++
++		new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
++		new_n0->parent_slot = 0;
++
++		memcpy(new_s0->index_key, shortcut->index_key,
++		       keylen * sizeof(unsigned long));
++
++		blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
++		pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
++		new_s0->index_key[keylen - 1] &= ~blank;
++	} else {
++		pr_devel("no pre-shortcut\n");
++		edit->set[0].to = assoc_array_node_to_ptr(new_n0);
++		new_n0->back_pointer = shortcut->back_pointer;
++		new_n0->parent_slot = shortcut->parent_slot;
++	}
++
++	side = assoc_array_ptr_to_node(shortcut->next_node);
++	new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
++
++	/* We need to know which slot in the new node is going to take a
++	 * metadata pointer.
++	 */
++	sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
++	sc_slot &= ASSOC_ARRAY_FAN_MASK;
++
++	pr_devel("new slot %lx >> %d -> %d\n",
++		 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
++
++	/* Determine whether we need to follow the new node with a replacement
++	 * for the current shortcut.  We could in theory reuse the current
++	 * shortcut if its parent slot number doesn't change - but that's a
++	 * 1-in-16 chance so not worth expending the code upon.
++	 */
++	level = diff + ASSOC_ARRAY_LEVEL_STEP;
++	if (level < shortcut->skip_to_level) {
++		pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
++		keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
++		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
++
++		new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
++				 keylen * sizeof(unsigned long), GFP_KERNEL);
++		if (!new_s1)
++			return false;
++		edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
++
++		new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
++		new_s1->parent_slot = sc_slot;
++		new_s1->next_node = shortcut->next_node;
++		new_s1->skip_to_level = shortcut->skip_to_level;
++
++		new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
++
++		memcpy(new_s1->index_key, shortcut->index_key,
++		       keylen * sizeof(unsigned long));
++
++		edit->set[1].ptr = &side->back_pointer;
++		edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
++	} else {
++		pr_devel("no post-shortcut\n");
++
++		/* We don't have to replace the pointed-to node as long as we
++		 * use memory barriers to make sure the parent slot number is
++		 * changed before the back pointer (the parent slot number is
++		 * irrelevant to the old parent shortcut).
++		 */
++		new_n0->slots[sc_slot] = shortcut->next_node;
++		edit->set_parent_slot[0].p = &side->parent_slot;
++		edit->set_parent_slot[0].to = sc_slot;
++		edit->set[1].ptr = &side->back_pointer;
++		edit->set[1].to = assoc_array_node_to_ptr(new_n0);
++	}
++
++	/* Install the new leaf in a spare slot in the new node. */
++	if (sc_slot == 0)
++		edit->leaf_p = &new_n0->slots[1];
++	else
++		edit->leaf_p = &new_n0->slots[0];
++
++	pr_devel("<--%s() = ok [split shortcut]\n", __func__);
++	return edit;
++}
++
++/**
++ * assoc_array_insert - Script insertion of an object into an associative array
++ * @array: The array to insert into.
++ * @ops: The operations to use.
++ * @index_key: The key to insert at.
++ * @object: The object to insert.
++ *
++ * Precalculate and preallocate a script for the insertion or replacement of an
++ * object in an associative array.  This results in an edit script that can
++ * either be applied or cancelled.
++ *
++ * The function returns a pointer to an edit script or -ENOMEM.
++ *
++ * The caller should lock against other modifications and must continue to hold
++ * the lock until assoc_array_apply_edit() has been called.
++ *
++ * Accesses to the tree may take place concurrently with this function,
++ * provided they hold the RCU read lock.
++ */
++struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
++					    const struct assoc_array_ops *ops,
++					    const void *index_key,
++					    void *object)
++{
++	struct assoc_array_walk_result result;
++	struct assoc_array_edit *edit;
++
++	pr_devel("-->%s()\n", __func__);
++
++	/* The leaf pointer we're given must not have the bottom bit set as we
++	 * use those for type-marking the pointer.  NULL pointers are also not
++	 * allowed as they indicate an empty slot but we have to allow them
++	 * here as they can be updated later.
++	 */
++	BUG_ON(assoc_array_ptr_is_meta(object));
++
++	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
++	if (!edit)
++		return ERR_PTR(-ENOMEM);
++	edit->array = array;
++	edit->ops = ops;
++	edit->leaf = assoc_array_leaf_to_ptr(object);
++	edit->adjust_count_by = 1;
++
++	switch (assoc_array_walk(array, ops, index_key, &result)) {
++	case assoc_array_walk_tree_empty:
++		/* Allocate a root node if there isn't one yet */
++		if (!assoc_array_insert_in_empty_tree(edit))
++			goto enomem;
++		return edit;
++
++	case assoc_array_walk_found_terminal_node:
++		/* We found a node that doesn't have a node/shortcut pointer in
++		 * the slot corresponding to the index key that we have to
++		 * follow.
++		 */
++		if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
++							   &result))
++			goto enomem;
++		return edit;
++
++	case assoc_array_walk_found_wrong_shortcut:
++		/* We found a shortcut that didn't match our key in a slot we
++		 * needed to follow.
++		 */
++		if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
++			goto enomem;
++		return edit;
++	}
++
++enomem:
++	/* Clean up after an out of memory error */
++	pr_devel("enomem\n");
++	assoc_array_cancel_edit(edit);
++	return ERR_PTR(-ENOMEM);
++}
++
++/**
++ * assoc_array_insert_set_object - Set the new object pointer in an edit script
++ * @edit: The edit script to modify.
++ * @object: The object pointer to set.
++ *
++ * Change the object to be inserted in an edit script.  The object pointed to
++ * by the old object is not freed.  This must be done prior to applying the
++ * script.
++ */
++void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
++{
++	BUG_ON(!object);
++	edit->leaf = assoc_array_leaf_to_ptr(object);
++}
++
++struct assoc_array_delete_collapse_context {
++	struct assoc_array_node	*node;
++	const void		*skip_leaf;
++	int			slot;
++};
++
++/*
++ * Subtree collapse to node iterator.
++ */
++static int assoc_array_delete_collapse_iterator(const void *leaf,
++						void *iterator_data)
++{
++	struct assoc_array_delete_collapse_context *collapse = iterator_data;
++
++	if (leaf == collapse->skip_leaf)
++		return 0;
++
++	BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
++
++	collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
++	return 0;
++}
++
++/**
++ * assoc_array_delete - Script deletion of an object from an associative array
++ * @array: The array to search.
++ * @ops: The operations to use.
++ * @index_key: The key to the object.
++ *
++ * Precalculate and preallocate a script for the deletion of an object from an
++ * associative array.  This results in an edit script that can either be
++ * applied or cancelled.
++ *
++ * The function returns a pointer to an edit script if the object was found,
++ * NULL if the object was not found or -ENOMEM.
++ *
++ * The caller should lock against other modifications and must continue to hold
++ * the lock until assoc_array_apply_edit() has been called.
++ *
++ * Accesses to the tree may take place concurrently with this function,
++ * provided they hold the RCU read lock.
++ */
++struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
++					    const struct assoc_array_ops *ops,
++					    const void *index_key)
++{
++	struct assoc_array_delete_collapse_context collapse;
++	struct assoc_array_walk_result result;
++	struct assoc_array_node *node, *new_n0;
++	struct assoc_array_edit *edit;
++	struct assoc_array_ptr *ptr;
++	bool has_meta;
++	int slot, i;
++
++	pr_devel("-->%s()\n", __func__);
++
++	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
++	if (!edit)
++		return ERR_PTR(-ENOMEM);
++	edit->array = array;
++	edit->ops = ops;
++	edit->adjust_count_by = -1;
++
++	switch (assoc_array_walk(array, ops, index_key, &result)) {
++	case assoc_array_walk_found_terminal_node:
++		/* We found a node that should contain the leaf we've been
++		 * asked to remove - *if* it's in the tree.
++		 */
++		pr_devel("terminal_node\n");
++		node = result.terminal_node.node;
++
++		for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++			ptr = node->slots[slot];
++			if (ptr &&
++			    assoc_array_ptr_is_leaf(ptr) &&
++			    ops->compare_object(assoc_array_ptr_to_leaf(ptr),
++						index_key))
++				goto found_leaf;
++		}
++	case assoc_array_walk_tree_empty:
++	case assoc_array_walk_found_wrong_shortcut:
++	default:
++		assoc_array_cancel_edit(edit);
++		pr_devel("not found\n");
++		return NULL;
++	}
++
++found_leaf:
++	BUG_ON(array->nr_leaves_on_tree <= 0);
++
++	/* In the simplest form of deletion we just clear the slot and release
++	 * the leaf after a suitable interval.
++	 */
++	edit->dead_leaf = node->slots[slot];
++	edit->set[0].ptr = &node->slots[slot];
++	edit->set[0].to = NULL;
++	edit->adjust_count_on = node;
++
++	/* If that concludes erasure of the last leaf, then delete the entire
++	 * internal array.
++	 */
++	if (array->nr_leaves_on_tree == 1) {
++		edit->set[1].ptr = &array->root;
++		edit->set[1].to = NULL;
++		edit->adjust_count_on = NULL;
++		edit->excised_subtree = array->root;
++		pr_devel("all gone\n");
++		return edit;
++	}
++
++	/* However, we'd also like to clear up some metadata blocks if we
++	 * possibly can.
++	 *
++	 * We go for a simple algorithm of: if this node has FAN_OUT or fewer
++	 * leaves in it, then attempt to collapse it - and attempt to
++	 * recursively collapse up the tree.
++	 *
++	 * We could also try and collapse in partially filled subtrees to take
++	 * up space in this node.
++	 */
++	if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
++		struct assoc_array_node *parent, *grandparent;
++		struct assoc_array_ptr *ptr;
++
++		/* First of all, we need to know if this node has metadata so
++		 * that we don't try collapsing if all the leaves are already
++		 * here.
++		 */
++		has_meta = false;
++		for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++			ptr = node->slots[i];
++			if (assoc_array_ptr_is_meta(ptr)) {
++				has_meta = true;
++				break;
++			}
++		}
++
++		pr_devel("leaves: %ld [m=%d]\n",
++			 node->nr_leaves_on_branch - 1, has_meta);
++
++		/* Look further up the tree to see if we can collapse this node
++		 * into a more proximal node too.
++		 */
++		parent = node;
++	collapse_up:
++		pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
++
++		ptr = parent->back_pointer;
++		if (!ptr)
++			goto do_collapse;
++		if (assoc_array_ptr_is_shortcut(ptr)) {
++			struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
++			ptr = s->back_pointer;
++			if (!ptr)
++				goto do_collapse;
++		}
++
++		grandparent = assoc_array_ptr_to_node(ptr);
++		if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
++			parent = grandparent;
++			goto collapse_up;
++		}
++
++	do_collapse:
++		/* There's no point collapsing if the original node has no meta
++		 * pointers to discard and if we didn't merge into one of that
++		 * node's ancestry.
++		 */
++		if (has_meta || parent != node) {
++			node = parent;
++
++			/* Create a new node to collapse into */
++			new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++			if (!new_n0)
++				goto enomem;
++			edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
++
++			new_n0->back_pointer = node->back_pointer;
++			new_n0->parent_slot = node->parent_slot;
++			new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
++			edit->adjust_count_on = new_n0;
++
++			collapse.node = new_n0;
++			collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
++			collapse.slot = 0;
++			assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
++						    node->back_pointer,
++						    assoc_array_delete_collapse_iterator,
++						    &collapse);
++			pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
++			BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
++
++			if (!node->back_pointer) {
++				edit->set[1].ptr = &array->root;
++			} else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
++				BUG();
++			} else if (assoc_array_ptr_is_node(node->back_pointer)) {
++				struct assoc_array_node *p =
++					assoc_array_ptr_to_node(node->back_pointer);
++				edit->set[1].ptr = &p->slots[node->parent_slot];
++			} else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
++				struct assoc_array_shortcut *s =
++					assoc_array_ptr_to_shortcut(node->back_pointer);
++				edit->set[1].ptr = &s->next_node;
++			}
++			edit->set[1].to = assoc_array_node_to_ptr(new_n0);
++			edit->excised_subtree = assoc_array_node_to_ptr(node);
++		}
++	}
++
++	return edit;
++
++enomem:
++	/* Clean up after an out of memory error */
++	pr_devel("enomem\n");
++	assoc_array_cancel_edit(edit);
++	return ERR_PTR(-ENOMEM);
++}
++
++/**
++ * assoc_array_clear - Script deletion of all objects from an associative array
++ * @array: The array to clear.
++ * @ops: The operations to use.
++ *
++ * Precalculate and preallocate a script for the deletion of all the objects
++ * from an associative array.  This results in an edit script that can either
++ * be applied or cancelled.
++ *
++ * The function returns a pointer to an edit script if there are objects to be
++ * deleted, NULL if there are no objects in the array or -ENOMEM.
++ *
++ * The caller should lock against other modifications and must continue to hold
++ * the lock until assoc_array_apply_edit() has been called.
++ *
++ * Accesses to the tree may take place concurrently with this function,
++ * provided they hold the RCU read lock.
++ */
++struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
++					   const struct assoc_array_ops *ops)
++{
++	struct assoc_array_edit *edit;
++
++	pr_devel("-->%s()\n", __func__);
++
++	if (!array->root)
++		return NULL;
++
++	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
++	if (!edit)
++		return ERR_PTR(-ENOMEM);
++	edit->array = array;
++	edit->ops = ops;
++	edit->set[1].ptr = &array->root;
++	edit->set[1].to = NULL;
++	edit->excised_subtree = array->root;
++	edit->ops_for_excised_subtree = ops;
++	pr_devel("all gone\n");
++	return edit;
++}
++
++/*
++ * Handle the deferred destruction after an applied edit.
++ */
++static void assoc_array_rcu_cleanup(struct rcu_head *head)
++{
++	struct assoc_array_edit *edit =
++		container_of(head, struct assoc_array_edit, rcu);
++	int i;
++
++	pr_devel("-->%s()\n", __func__);
++
++	if (edit->dead_leaf)
++		edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
++	for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
++		if (edit->excised_meta[i])
++			kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
++
++	if (edit->excised_subtree) {
++		BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
++		if (assoc_array_ptr_is_node(edit->excised_subtree)) {
++			struct assoc_array_node *n =
++				assoc_array_ptr_to_node(edit->excised_subtree);
++			n->back_pointer = NULL;
++		} else {
++			struct assoc_array_shortcut *s =
++				assoc_array_ptr_to_shortcut(edit->excised_subtree);
++			s->back_pointer = NULL;
++		}
++		assoc_array_destroy_subtree(edit->excised_subtree,
++					    edit->ops_for_excised_subtree);
++	}
++
++	kfree(edit);
++}
++
++/**
++ * assoc_array_apply_edit - Apply an edit script to an associative array
++ * @edit: The script to apply.
++ *
++ * Apply an edit script to an associative array to effect an insertion,
++ * deletion or clearance.  As the edit script includes preallocated memory,
++ * this is guaranteed not to fail.
++ *
++ * The edit script, dead objects and dead metadata will be scheduled for
++ * destruction after an RCU grace period to permit those doing read-only
++ * accesses on the array to continue to do so under the RCU read lock whilst
++ * the edit is taking place.
++ */
++void assoc_array_apply_edit(struct assoc_array_edit *edit)
++{
++	struct assoc_array_shortcut *shortcut;
++	struct assoc_array_node *node;
++	struct assoc_array_ptr *ptr;
++	int i;
++
++	pr_devel("-->%s()\n", __func__);
++
++	smp_wmb();
++	if (edit->leaf_p)
++		*edit->leaf_p = edit->leaf;
++
++	smp_wmb();
++	for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
++		if (edit->set_parent_slot[i].p)
++			*edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
++
++	smp_wmb();
++	for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
++		if (edit->set_backpointers[i])
++			*edit->set_backpointers[i] = edit->set_backpointers_to;
++
++	smp_wmb();
++	for (i = 0; i < ARRAY_SIZE(edit->set); i++)
++		if (edit->set[i].ptr)
++			*edit->set[i].ptr = edit->set[i].to;
++
++	if (edit->array->root == NULL) {
++		edit->array->nr_leaves_on_tree = 0;
++	} else if (edit->adjust_count_on) {
++		node = edit->adjust_count_on;
++		for (;;) {
++			node->nr_leaves_on_branch += edit->adjust_count_by;
++
++			ptr = node->back_pointer;
++			if (!ptr)
++				break;
++			if (assoc_array_ptr_is_shortcut(ptr)) {
++				shortcut = assoc_array_ptr_to_shortcut(ptr);
++				ptr = shortcut->back_pointer;
++				if (!ptr)
++					break;
++			}
++			BUG_ON(!assoc_array_ptr_is_node(ptr));
++			node = assoc_array_ptr_to_node(ptr);
++		}
++
++		edit->array->nr_leaves_on_tree += edit->adjust_count_by;
++	}
++
++	call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
++}
++
++/**
++ * assoc_array_cancel_edit - Discard an edit script.
++ * @edit: The script to discard.
++ *
++ * Free an edit script and all the preallocated data it holds without making
++ * any changes to the associative array it was intended for.
++ *
++ * NOTE!  In the case of an insertion script, this does _not_ release the leaf
++ * that was to be inserted.  That is left to the caller.
++ */
++void assoc_array_cancel_edit(struct assoc_array_edit *edit)
++{
++	struct assoc_array_ptr *ptr;
++	int i;
++
++	pr_devel("-->%s()\n", __func__);
++
++	/* Clean up after an out of memory error */
++	for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
++		ptr = edit->new_meta[i];
++		if (ptr) {
++			if (assoc_array_ptr_is_node(ptr))
++				kfree(assoc_array_ptr_to_node(ptr));
++			else
++				kfree(assoc_array_ptr_to_shortcut(ptr));
++		}
++	}
++	kfree(edit);
++}
++
++/**
++ * assoc_array_gc - Garbage collect an associative array.
++ * @array: The array to clean.
++ * @ops: The operations to use.
++ * @iterator: A callback function to pass judgement on each object.
++ * @iterator_data: Private data for the callback function.
++ *
++ * Collect garbage from an associative array and pack down the internal tree to
++ * save memory.
++ *
++ * The iterator function is asked to pass judgement upon each object in the
++ * array.  If it returns false, the object is discard and if it returns true,
++ * the object is kept.  If it returns true, it must increment the object's
++ * usage count (or whatever it needs to do to retain it) before returning.
++ *
++ * This function returns 0 if successful or -ENOMEM if out of memory.  In the
++ * latter case, the array is not changed.
++ *
++ * The caller should lock against other modifications and must continue to hold
++ * the lock until assoc_array_apply_edit() has been called.
++ *
++ * Accesses to the tree may take place concurrently with this function,
++ * provided they hold the RCU read lock.
++ */
++int assoc_array_gc(struct assoc_array *array,
++		   const struct assoc_array_ops *ops,
++		   bool (*iterator)(void *object, void *iterator_data),
++		   void *iterator_data)
++{
++	struct assoc_array_shortcut *shortcut, *new_s;
++	struct assoc_array_node *node, *new_n;
++	struct assoc_array_edit *edit;
++	struct assoc_array_ptr *cursor, *ptr;
++	struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
++	unsigned long nr_leaves_on_tree;
++	int keylen, slot, nr_free, next_slot, i;
++
++	pr_devel("-->%s()\n", __func__);
++
++	if (!array->root)
++		return 0;
++
++	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
++	if (!edit)
++		return -ENOMEM;
++	edit->array = array;
++	edit->ops = ops;
++	edit->ops_for_excised_subtree = ops;
++	edit->set[0].ptr = &array->root;
++	edit->excised_subtree = array->root;
++
++	new_root = new_parent = NULL;
++	new_ptr_pp = &new_root;
++	cursor = array->root;
++
++descend:
++	/* If this point is a shortcut, then we need to duplicate it and
++	 * advance the target cursor.
++	 */
++	if (assoc_array_ptr_is_shortcut(cursor)) {
++		shortcut = assoc_array_ptr_to_shortcut(cursor);
++		keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
++		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
++		new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
++				keylen * sizeof(unsigned long), GFP_KERNEL);
++		if (!new_s)
++			goto enomem;
++		pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
++		memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
++					 keylen * sizeof(unsigned long)));
++		new_s->back_pointer = new_parent;
++		new_s->parent_slot = shortcut->parent_slot;
++		*new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
++		new_ptr_pp = &new_s->next_node;
++		cursor = shortcut->next_node;
++	}
++
++	/* Duplicate the node at this position */
++	node = assoc_array_ptr_to_node(cursor);
++	new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
++	if (!new_n)
++		goto enomem;
++	pr_devel("dup node %p -> %p\n", node, new_n);
++	new_n->back_pointer = new_parent;
++	new_n->parent_slot = node->parent_slot;
++	*new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
++	new_ptr_pp = NULL;
++	slot = 0;
++
++continue_node:
++	/* Filter across any leaves and gc any subtrees */
++	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++		ptr = node->slots[slot];
++		if (!ptr)
++			continue;
++
++		if (assoc_array_ptr_is_leaf(ptr)) {
++			if (iterator(assoc_array_ptr_to_leaf(ptr),
++				     iterator_data))
++				/* The iterator will have done any reference
++				 * counting on the object for us.
++				 */
++				new_n->slots[slot] = ptr;
++			continue;
++		}
++
++		new_ptr_pp = &new_n->slots[slot];
++		cursor = ptr;
++		goto descend;
++	}
++
++	pr_devel("-- compress node %p --\n", new_n);
++
++	/* Count up the number of empty slots in this node and work out the
++	 * subtree leaf count.
++	 */
++	new_n->nr_leaves_on_branch = 0;
++	nr_free = 0;
++	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++		ptr = new_n->slots[slot];
++		if (!ptr)
++			nr_free++;
++		else if (assoc_array_ptr_is_leaf(ptr))
++			new_n->nr_leaves_on_branch++;
++	}
++	pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
++
++	/* See what we can fold in */
++	next_slot = 0;
++	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++		struct assoc_array_shortcut *s;
++		struct assoc_array_node *child;
++
++		ptr = new_n->slots[slot];
++		if (!ptr || assoc_array_ptr_is_leaf(ptr))
++			continue;
++
++		s = NULL;
++		if (assoc_array_ptr_is_shortcut(ptr)) {
++			s = assoc_array_ptr_to_shortcut(ptr);
++			ptr = s->next_node;
++		}
++
++		child = assoc_array_ptr_to_node(ptr);
++		new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
++
++		if (child->nr_leaves_on_branch <= nr_free + 1) {
++			/* Fold the child node into this one */
++			pr_devel("[%d] fold node %lu/%d [nx %d]\n",
++				 slot, child->nr_leaves_on_branch, nr_free + 1,
++				 next_slot);
++
++			/* We would already have reaped an intervening shortcut
++			 * on the way back up the tree.
++			 */
++			BUG_ON(s);
++
++			new_n->slots[slot] = NULL;
++			nr_free++;
++			if (slot < next_slot)
++				next_slot = slot;
++			for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
++				struct assoc_array_ptr *p = child->slots[i];
++				if (!p)
++					continue;
++				BUG_ON(assoc_array_ptr_is_meta(p));
++				while (new_n->slots[next_slot])
++					next_slot++;
++				BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
++				new_n->slots[next_slot++] = p;
++				nr_free--;
++			}
++			kfree(child);
++		} else {
++			pr_devel("[%d] retain node %lu/%d [nx %d]\n",
++				 slot, child->nr_leaves_on_branch, nr_free + 1,
++				 next_slot);
++		}
++	}
++
++	pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
++
++	nr_leaves_on_tree = new_n->nr_leaves_on_branch;
++
++	/* Excise this node if it is singly occupied by a shortcut */
++	if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
++		for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
++			if ((ptr = new_n->slots[slot]))
++				break;
++
++		if (assoc_array_ptr_is_meta(ptr) &&
++		    assoc_array_ptr_is_shortcut(ptr)) {
++			pr_devel("excise node %p with 1 shortcut\n", new_n);
++			new_s = assoc_array_ptr_to_shortcut(ptr);
++			new_parent = new_n->back_pointer;
++			slot = new_n->parent_slot;
++			kfree(new_n);
++			if (!new_parent) {
++				new_s->back_pointer = NULL;
++				new_s->parent_slot = 0;
++				new_root = ptr;
++				goto gc_complete;
++			}
++
++			if (assoc_array_ptr_is_shortcut(new_parent)) {
++				/* We can discard any preceding shortcut also */
++				struct assoc_array_shortcut *s =
++					assoc_array_ptr_to_shortcut(new_parent);
++
++				pr_devel("excise preceding shortcut\n");
++
++				new_parent = new_s->back_pointer = s->back_pointer;
++				slot = new_s->parent_slot = s->parent_slot;
++				kfree(s);
++				if (!new_parent) {
++					new_s->back_pointer = NULL;
++					new_s->parent_slot = 0;
++					new_root = ptr;
++					goto gc_complete;
++				}
++			}
++
++			new_s->back_pointer = new_parent;
++			new_s->parent_slot = slot;
++			new_n = assoc_array_ptr_to_node(new_parent);
++			new_n->slots[slot] = ptr;
++			goto ascend_old_tree;
++		}
++	}
++
++	/* Excise any shortcuts we might encounter that point to nodes that
++	 * only contain leaves.
++	 */
++	ptr = new_n->back_pointer;
++	if (!ptr)
++		goto gc_complete;
++
++	if (assoc_array_ptr_is_shortcut(ptr)) {
++		new_s = assoc_array_ptr_to_shortcut(ptr);
++		new_parent = new_s->back_pointer;
++		slot = new_s->parent_slot;
++
++		if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
++			struct assoc_array_node *n;
++
++			pr_devel("excise shortcut\n");
++			new_n->back_pointer = new_parent;
++			new_n->parent_slot = slot;
++			kfree(new_s);
++			if (!new_parent) {
++				new_root = assoc_array_node_to_ptr(new_n);
++				goto gc_complete;
++			}
++
++			n = assoc_array_ptr_to_node(new_parent);
++			n->slots[slot] = assoc_array_node_to_ptr(new_n);
++		}
++	} else {
++		new_parent = ptr;
++	}
++	new_n = assoc_array_ptr_to_node(new_parent);
++
++ascend_old_tree:
++	ptr = node->back_pointer;
++	if (assoc_array_ptr_is_shortcut(ptr)) {
++		shortcut = assoc_array_ptr_to_shortcut(ptr);
++		slot = shortcut->parent_slot;
++		cursor = shortcut->back_pointer;
++	} else {
++		slot = node->parent_slot;
++		cursor = ptr;
++	}
++	BUG_ON(!ptr);
++	node = assoc_array_ptr_to_node(cursor);
++	slot++;
++	goto continue_node;
++
++gc_complete:
++	edit->set[0].to = new_root;
++	assoc_array_apply_edit(edit);
++	edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
++	return 0;
++
++enomem:
++	pr_devel("enomem\n");
++	assoc_array_destroy_subtree(new_root, edit->ops);
++	kfree(edit);
++	return -ENOMEM;
++}
+-- 
+1.8.3.1
+
+
+From 03ac60b84587fa8e57e7ec5cd3d59b7fa8d97c79 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:54 +0100
+Subject: [PATCH 10/10] KEYS: Expand the capacity of a keyring
+
+Expand the capacity of a keyring to be able to hold a lot more keys by using
+the previously added associative array implementation.  Currently the maximum
+capacity is:
+
+	(PAGE_SIZE - sizeof(header)) / sizeof(struct key *)
+
+which, on a 64-bit system, is a little more 500.  However, since this is being
+used for the NFS uid mapper, we need more than that.  The new implementation
+gives us effectively unlimited capacity.
+
+With some alterations, the keyutils testsuite runs successfully to completion
+after this patch is applied.  The alterations are because (a) keyrings that
+are simply added to no longer appear ordered and (b) some of the errors have
+changed a bit.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+---
+ include/keys/keyring-type.h |   17 +-
+ include/linux/key.h         |   13 +-
+ lib/assoc_array.c           |    1 +
+ security/keys/Kconfig       |    1 +
+ security/keys/gc.c          |   33 +-
+ security/keys/internal.h    |   17 +-
+ security/keys/key.c         |   35 +-
+ security/keys/keyring.c     | 1436 ++++++++++++++++++++++---------------------
+ security/keys/request_key.c |   12 +-
+ 9 files changed, 803 insertions(+), 762 deletions(-)
+
+diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h
+index cf49159..fca5c62 100644
+--- a/include/keys/keyring-type.h
++++ b/include/keys/keyring-type.h
+@@ -1,6 +1,6 @@
+ /* Keyring key type
+  *
+- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
++ * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved.
+  * Written by David Howells (dhowells at redhat.com)
+  *
+  * This program is free software; you can redistribute it and/or
+@@ -13,19 +13,6 @@
+ #define _KEYS_KEYRING_TYPE_H
+ 
+ #include <linux/key.h>
+-#include <linux/rcupdate.h>
+-
+-/*
+- * the keyring payload contains a list of the keys to which the keyring is
+- * subscribed
+- */
+-struct keyring_list {
+-	struct rcu_head	rcu;		/* RCU deletion hook */
+-	unsigned short	maxkeys;	/* max keys this list can hold */
+-	unsigned short	nkeys;		/* number of keys currently held */
+-	unsigned short	delkey;		/* key to be unlinked by RCU */
+-	struct key __rcu *keys[0];
+-};
+-
++#include <linux/assoc_array.h>
+ 
+ #endif /* _KEYS_KEYRING_TYPE_H */
+diff --git a/include/linux/key.h b/include/linux/key.h
+index ef596c7..2417f78 100644
+--- a/include/linux/key.h
++++ b/include/linux/key.h
+@@ -22,6 +22,7 @@
+ #include <linux/sysctl.h>
+ #include <linux/rwsem.h>
+ #include <linux/atomic.h>
++#include <linux/assoc_array.h>
+ 
+ #ifdef __KERNEL__
+ #include <linux/uidgid.h>
+@@ -196,11 +197,13 @@ struct key {
+ 	 *   whatever
+ 	 */
+ 	union {
+-		unsigned long		value;
+-		void __rcu		*rcudata;
+-		void			*data;
+-		struct keyring_list __rcu *subscriptions;
+-	} payload;
++		union {
++			unsigned long		value;
++			void __rcu		*rcudata;
++			void			*data;
++		} payload;
++		struct assoc_array keys;
++	};
+ };
+ 
+ extern struct key *key_alloc(struct key_type *type,
+diff --git a/lib/assoc_array.c b/lib/assoc_array.c
+index a095281..17edeaf 100644
+--- a/lib/assoc_array.c
++++ b/lib/assoc_array.c
+@@ -12,6 +12,7 @@
+  */
+ //#define DEBUG
+ #include <linux/slab.h>
++#include <linux/err.h>
+ #include <linux/assoc_array_priv.h>
+ 
+ /*
+diff --git a/security/keys/Kconfig b/security/keys/Kconfig
+index a90d6d3..15e0dfe 100644
+--- a/security/keys/Kconfig
++++ b/security/keys/Kconfig
+@@ -4,6 +4,7 @@
+ 
+ config KEYS
+ 	bool "Enable access key retention support"
++	select ASSOCIATIVE_ARRAY
+ 	help
+ 	  This option provides support for retaining authentication tokens and
+ 	  access keys in the kernel.
+diff --git a/security/keys/gc.c b/security/keys/gc.c
+index d67c97b..cce621c 100644
+--- a/security/keys/gc.c
++++ b/security/keys/gc.c
+@@ -130,6 +130,13 @@ void key_gc_keytype(struct key_type *ktype)
+ 	kleave("");
+ }
+ 
++static int key_gc_keyring_func(const void *object, void *iterator_data)
++{
++	const struct key *key = object;
++	time_t *limit = iterator_data;
++	return key_is_dead(key, *limit);
++}
++
+ /*
+  * Garbage collect pointers from a keyring.
+  *
+@@ -138,10 +145,9 @@ void key_gc_keytype(struct key_type *ktype)
+  */
+ static void key_gc_keyring(struct key *keyring, time_t limit)
+ {
+-	struct keyring_list *klist;
+-	int loop;
++	int result;
+ 
+-	kenter("%x", key_serial(keyring));
++	kenter("%x{%s}", keyring->serial, keyring->description ?: "");
+ 
+ 	if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ 			      (1 << KEY_FLAG_REVOKED)))
+@@ -149,27 +155,17 @@ static void key_gc_keyring(struct key *keyring, time_t limit)
+ 
+ 	/* scan the keyring looking for dead keys */
+ 	rcu_read_lock();
+-	klist = rcu_dereference(keyring->payload.subscriptions);
+-	if (!klist)
+-		goto unlock_dont_gc;
+-
+-	loop = klist->nkeys;
+-	smp_rmb();
+-	for (loop--; loop >= 0; loop--) {
+-		struct key *key = rcu_dereference(klist->keys[loop]);
+-		if (key_is_dead(key, limit))
+-			goto do_gc;
+-	}
+-
+-unlock_dont_gc:
++	result = assoc_array_iterate(&keyring->keys,
++				     key_gc_keyring_func, &limit);
+ 	rcu_read_unlock();
++	if (result == true)
++		goto do_gc;
++
+ dont_gc:
+ 	kleave(" [no gc]");
+ 	return;
+ 
+ do_gc:
+-	rcu_read_unlock();
+-
+ 	keyring_gc(keyring, limit);
+ 	kleave(" [gc]");
+ }
+@@ -392,7 +388,6 @@ found_unreferenced_key:
+ 	 */
+ found_keyring:
+ 	spin_unlock(&key_serial_lock);
+-	kdebug("scan keyring %d", key->serial);
+ 	key_gc_keyring(key, limit);
+ 	goto maybe_resched;
+ 
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index 73950bf..581c6f6 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -90,20 +90,23 @@ extern void key_type_put(struct key_type *ktype);
+ 
+ extern int __key_link_begin(struct key *keyring,
+ 			    const struct keyring_index_key *index_key,
+-			    unsigned long *_prealloc);
++			    struct assoc_array_edit **_edit);
+ extern int __key_link_check_live_key(struct key *keyring, struct key *key);
+-extern void __key_link(struct key *keyring, struct key *key,
+-		       unsigned long *_prealloc);
++extern void __key_link(struct key *key, struct assoc_array_edit **_edit);
+ extern void __key_link_end(struct key *keyring,
+ 			   const struct keyring_index_key *index_key,
+-			   unsigned long prealloc);
++			   struct assoc_array_edit *edit);
+ 
+-extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+-				      const struct keyring_index_key *index_key);
++extern key_ref_t find_key_to_update(key_ref_t keyring_ref,
++				    const struct keyring_index_key *index_key);
+ 
+ extern struct key *keyring_search_instkey(struct key *keyring,
+ 					  key_serial_t target_id);
+ 
++extern int iterate_over_keyring(const struct key *keyring,
++				int (*func)(const struct key *key, void *data),
++				void *data);
++
+ typedef int (*key_match_func_t)(const struct key *, const void *);
+ 
+ struct keyring_search_context {
+@@ -119,6 +122,8 @@ struct keyring_search_context {
+ #define KEYRING_SEARCH_NO_CHECK_PERM	0x0010	/* Don't check permissions */
+ #define KEYRING_SEARCH_DETECT_TOO_DEEP	0x0020	/* Give an error on excessive depth */
+ 
++	int (*iterator)(const void *object, void *iterator_data);
++
+ 	/* Internal stuff */
+ 	int			skipped_ret;
+ 	bool			possessed;
+diff --git a/security/keys/key.c b/security/keys/key.c
+index 7d716b8..a819b5c 100644
+--- a/security/keys/key.c
++++ b/security/keys/key.c
+@@ -409,7 +409,7 @@ static int __key_instantiate_and_link(struct key *key,
+ 				      struct key_preparsed_payload *prep,
+ 				      struct key *keyring,
+ 				      struct key *authkey,
+-				      unsigned long *_prealloc)
++				      struct assoc_array_edit **_edit)
+ {
+ 	int ret, awaken;
+ 
+@@ -436,7 +436,7 @@ static int __key_instantiate_and_link(struct key *key,
+ 
+ 			/* and link it into the destination keyring */
+ 			if (keyring)
+-				__key_link(keyring, key, _prealloc);
++				__key_link(key, _edit);
+ 
+ 			/* disable the authorisation key */
+ 			if (authkey)
+@@ -476,7 +476,7 @@ int key_instantiate_and_link(struct key *key,
+ 			     struct key *authkey)
+ {
+ 	struct key_preparsed_payload prep;
+-	unsigned long prealloc;
++	struct assoc_array_edit *edit;
+ 	int ret;
+ 
+ 	memset(&prep, 0, sizeof(prep));
+@@ -490,16 +490,15 @@ int key_instantiate_and_link(struct key *key,
+ 	}
+ 
+ 	if (keyring) {
+-		ret = __key_link_begin(keyring, &key->index_key, &prealloc);
++		ret = __key_link_begin(keyring, &key->index_key, &edit);
+ 		if (ret < 0)
+ 			goto error_free_preparse;
+ 	}
+ 
+-	ret = __key_instantiate_and_link(key, &prep, keyring, authkey,
+-					 &prealloc);
++	ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit);
+ 
+ 	if (keyring)
+-		__key_link_end(keyring, &key->index_key, prealloc);
++		__key_link_end(keyring, &key->index_key, edit);
+ 
+ error_free_preparse:
+ 	if (key->type->preparse)
+@@ -537,7 +536,7 @@ int key_reject_and_link(struct key *key,
+ 			struct key *keyring,
+ 			struct key *authkey)
+ {
+-	unsigned long prealloc;
++	struct assoc_array_edit *edit;
+ 	struct timespec now;
+ 	int ret, awaken, link_ret = 0;
+ 
+@@ -548,7 +547,7 @@ int key_reject_and_link(struct key *key,
+ 	ret = -EBUSY;
+ 
+ 	if (keyring)
+-		link_ret = __key_link_begin(keyring, &key->index_key, &prealloc);
++		link_ret = __key_link_begin(keyring, &key->index_key, &edit);
+ 
+ 	mutex_lock(&key_construction_mutex);
+ 
+@@ -570,7 +569,7 @@ int key_reject_and_link(struct key *key,
+ 
+ 		/* and link it into the destination keyring */
+ 		if (keyring && link_ret == 0)
+-			__key_link(keyring, key, &prealloc);
++			__key_link(key, &edit);
+ 
+ 		/* disable the authorisation key */
+ 		if (authkey)
+@@ -580,7 +579,7 @@ int key_reject_and_link(struct key *key,
+ 	mutex_unlock(&key_construction_mutex);
+ 
+ 	if (keyring)
+-		__key_link_end(keyring, &key->index_key, prealloc);
++		__key_link_end(keyring, &key->index_key, edit);
+ 
+ 	/* wake up anyone waiting for a key to be constructed */
+ 	if (awaken)
+@@ -783,8 +782,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 		.description	= description,
+ 	};
+ 	struct key_preparsed_payload prep;
++	struct assoc_array_edit *edit;
+ 	const struct cred *cred = current_cred();
+-	unsigned long prealloc;
+ 	struct key *keyring, *key = NULL;
+ 	key_ref_t key_ref;
+ 	int ret;
+@@ -828,7 +827,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 	}
+ 	index_key.desc_len = strlen(index_key.description);
+ 
+-	ret = __key_link_begin(keyring, &index_key, &prealloc);
++	ret = __key_link_begin(keyring, &index_key, &edit);
+ 	if (ret < 0) {
+ 		key_ref = ERR_PTR(ret);
+ 		goto error_free_prep;
+@@ -847,8 +846,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 	 * update that instead if possible
+ 	 */
+ 	if (index_key.type->update) {
+-		key_ref = __keyring_search_one(keyring_ref, &index_key);
+-		if (!IS_ERR(key_ref))
++		key_ref = find_key_to_update(keyring_ref, &index_key);
++		if (key_ref)
+ 			goto found_matching_key;
+ 	}
+ 
+@@ -874,7 +873,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 	}
+ 
+ 	/* instantiate it and link it into the target keyring */
+-	ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc);
++	ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit);
+ 	if (ret < 0) {
+ 		key_put(key);
+ 		key_ref = ERR_PTR(ret);
+@@ -884,7 +883,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
+ 	key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
+ 
+ error_link_end:
+-	__key_link_end(keyring, &index_key, prealloc);
++	__key_link_end(keyring, &index_key, edit);
+ error_free_prep:
+ 	if (index_key.type->preparse)
+ 		index_key.type->free_preparse(&prep);
+@@ -897,7 +896,7 @@ error:
+ 	/* we found a matching key, so we're going to try to update it
+ 	 * - we can drop the locks first as we have the key pinned
+ 	 */
+-	__key_link_end(keyring, &index_key, prealloc);
++	__key_link_end(keyring, &index_key, edit);
+ 
+ 	key_ref = __key_update(key_ref, &prep);
+ 	goto error_free_prep;
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+index eeef1a0..f7cdea2 100644
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -1,6 +1,6 @@
+ /* Keyring handling
+  *
+- * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved.
++ * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved.
+  * Written by David Howells (dhowells at redhat.com)
+  *
+  * This program is free software; you can redistribute it and/or
+@@ -17,25 +17,11 @@
+ #include <linux/seq_file.h>
+ #include <linux/err.h>
+ #include <keys/keyring-type.h>
++#include <keys/user-type.h>
++#include <linux/assoc_array_priv.h>
+ #include <linux/uaccess.h>
+ #include "internal.h"
+ 
+-#define rcu_dereference_locked_keyring(keyring)				\
+-	(rcu_dereference_protected(					\
+-		(keyring)->payload.subscriptions,			\
+-		rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
+-
+-#define rcu_deref_link_locked(klist, index, keyring)			\
+-	(rcu_dereference_protected(					\
+-		(klist)->keys[index],					\
+-		rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
+-
+-#define MAX_KEYRING_LINKS						\
+-	min_t(size_t, USHRT_MAX - 1,					\
+-	      ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *)))
+-
+-#define KEY_LINK_FIXQUOTA 1UL
+-
+ /*
+  * When plumbing the depths of the key tree, this sets a hard limit
+  * set on how deep we're willing to go.
+@@ -47,6 +33,28 @@
+  */
+ #define KEYRING_NAME_HASH_SIZE	(1 << 5)
+ 
++/*
++ * We mark pointers we pass to the associative array with bit 1 set if
++ * they're keyrings and clear otherwise.
++ */
++#define KEYRING_PTR_SUBTYPE	0x2UL
++
++static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x)
++{
++	return (unsigned long)x & KEYRING_PTR_SUBTYPE;
++}
++static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x)
++{
++	void *object = assoc_array_ptr_to_leaf(x);
++	return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE);
++}
++static inline void *keyring_key_to_ptr(struct key *key)
++{
++	if (key->type == &key_type_keyring)
++		return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE);
++	return key;
++}
++
+ static struct list_head	keyring_name_hash[KEYRING_NAME_HASH_SIZE];
+ static DEFINE_RWLOCK(keyring_name_lock);
+ 
+@@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc)
+  */
+ static int keyring_instantiate(struct key *keyring,
+ 			       struct key_preparsed_payload *prep);
+-static int keyring_match(const struct key *keyring, const void *criterion);
+ static void keyring_revoke(struct key *keyring);
+ static void keyring_destroy(struct key *keyring);
+ static void keyring_describe(const struct key *keyring, struct seq_file *m);
+@@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring,
+ 
+ struct key_type key_type_keyring = {
+ 	.name		= "keyring",
+-	.def_datalen	= sizeof(struct keyring_list),
++	.def_datalen	= 0,
+ 	.instantiate	= keyring_instantiate,
+-	.match		= keyring_match,
++	.match		= user_match,
+ 	.revoke		= keyring_revoke,
+ 	.destroy	= keyring_destroy,
+ 	.describe	= keyring_describe,
+@@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring,
+ 
+ 	ret = -EINVAL;
+ 	if (prep->datalen == 0) {
++		assoc_array_init(&keyring->keys);
+ 		/* make the keyring available by name if it has one */
+ 		keyring_publish_name(keyring);
+ 		ret = 0;
+@@ -136,15 +144,226 @@ static int keyring_instantiate(struct key *keyring,
+ }
+ 
+ /*
+- * Match keyrings on their name
++ * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit.  Ideally we'd
++ * fold the carry back too, but that requires inline asm.
++ */
++static u64 mult_64x32_and_fold(u64 x, u32 y)
++{
++	u64 hi = (u64)(u32)(x >> 32) * y;
++	u64 lo = (u64)(u32)(x) * y;
++	return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32);
++}
++
++/*
++ * Hash a key type and description.
++ */
++static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key)
++{
++	const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP;
++	const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK;
++	const char *description = index_key->description;
++	unsigned long hash, type;
++	u32 piece;
++	u64 acc;
++	int n, desc_len = index_key->desc_len;
++
++	type = (unsigned long)index_key->type;
++
++	acc = mult_64x32_and_fold(type, desc_len + 13);
++	acc = mult_64x32_and_fold(acc, 9207);
++	for (;;) {
++		n = desc_len;
++		if (n <= 0)
++			break;
++		if (n > 4)
++			n = 4;
++		piece = 0;
++		memcpy(&piece, description, n);
++		description += n;
++		desc_len -= n;
++		acc = mult_64x32_and_fold(acc, piece);
++		acc = mult_64x32_and_fold(acc, 9207);
++	}
++
++	/* Fold the hash down to 32 bits if need be. */
++	hash = acc;
++	if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32)
++		hash ^= acc >> 32;
++
++	/* Squidge all the keyrings into a separate part of the tree to
++	 * ordinary keys by making sure the lowest level segment in the hash is
++	 * zero for keyrings and non-zero otherwise.
++	 */
++	if (index_key->type != &key_type_keyring && (hash & level_mask) == 0)
++		return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1;
++	if (index_key->type == &key_type_keyring && (hash & level_mask) != 0)
++		return (hash + (hash << level_shift)) & ~level_mask;
++	return hash;
++}
++
++/*
++ * Build the next index key chunk.
++ *
++ * On 32-bit systems the index key is laid out as:
++ *
++ *	0	4	5	9...
++ *	hash	desclen	typeptr	desc[]
++ *
++ * On 64-bit systems:
++ *
++ *	0	8	9	17...
++ *	hash	desclen	typeptr	desc[]
++ *
++ * We return it one word-sized chunk at a time.
+  */
+-static int keyring_match(const struct key *keyring, const void *description)
++static unsigned long keyring_get_key_chunk(const void *data, int level)
++{
++	const struct keyring_index_key *index_key = data;
++	unsigned long chunk = 0;
++	long offset = 0;
++	int desc_len = index_key->desc_len, n = sizeof(chunk);
++
++	level /= ASSOC_ARRAY_KEY_CHUNK_SIZE;
++	switch (level) {
++	case 0:
++		return hash_key_type_and_desc(index_key);
++	case 1:
++		return ((unsigned long)index_key->type << 8) | desc_len;
++	case 2:
++		if (desc_len == 0)
++			return (u8)((unsigned long)index_key->type >>
++				    (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
++		n--;
++		offset = 1;
++	default:
++		offset += sizeof(chunk) - 1;
++		offset += (level - 3) * sizeof(chunk);
++		if (offset >= desc_len)
++			return 0;
++		desc_len -= offset;
++		if (desc_len > n)
++			desc_len = n;
++		offset += desc_len;
++		do {
++			chunk <<= 8;
++			chunk |= ((u8*)index_key->description)[--offset];
++		} while (--desc_len > 0);
++
++		if (level == 2) {
++			chunk <<= 8;
++			chunk |= (u8)((unsigned long)index_key->type >>
++				      (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
++		}
++		return chunk;
++	}
++}
++
++static unsigned long keyring_get_object_key_chunk(const void *object, int level)
++{
++	const struct key *key = keyring_ptr_to_key(object);
++	return keyring_get_key_chunk(&key->index_key, level);
++}
++
++static bool keyring_compare_object(const void *object, const void *data)
+ {
+-	return keyring->description &&
+-		strcmp(keyring->description, description) == 0;
++	const struct keyring_index_key *index_key = data;
++	const struct key *key = keyring_ptr_to_key(object);
++
++	return key->index_key.type == index_key->type &&
++		key->index_key.desc_len == index_key->desc_len &&
++		memcmp(key->index_key.description, index_key->description,
++		       index_key->desc_len) == 0;
+ }
+ 
+ /*
++ * Compare the index keys of a pair of objects and determine the bit position
++ * at which they differ - if they differ.
++ */
++static int keyring_diff_objects(const void *_a, const void *_b)
++{
++	const struct key *key_a = keyring_ptr_to_key(_a);
++	const struct key *key_b = keyring_ptr_to_key(_b);
++	const struct keyring_index_key *a = &key_a->index_key;
++	const struct keyring_index_key *b = &key_b->index_key;
++	unsigned long seg_a, seg_b;
++	int level, i;
++
++	level = 0;
++	seg_a = hash_key_type_and_desc(a);
++	seg_b = hash_key_type_and_desc(b);
++	if ((seg_a ^ seg_b) != 0)
++		goto differ;
++
++	/* The number of bits contributed by the hash is controlled by a
++	 * constant in the assoc_array headers.  Everything else thereafter we
++	 * can deal with as being machine word-size dependent.
++	 */
++	level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8;
++	seg_a = a->desc_len;
++	seg_b = b->desc_len;
++	if ((seg_a ^ seg_b) != 0)
++		goto differ;
++
++	/* The next bit may not work on big endian */
++	level++;
++	seg_a = (unsigned long)a->type;
++	seg_b = (unsigned long)b->type;
++	if ((seg_a ^ seg_b) != 0)
++		goto differ;
++
++	level += sizeof(unsigned long);
++	if (a->desc_len == 0)
++		goto same;
++
++	i = 0;
++	if (((unsigned long)a->description | (unsigned long)b->description) &
++	    (sizeof(unsigned long) - 1)) {
++		do {
++			seg_a = *(unsigned long *)(a->description + i);
++			seg_b = *(unsigned long *)(b->description + i);
++			if ((seg_a ^ seg_b) != 0)
++				goto differ_plus_i;
++			i += sizeof(unsigned long);
++		} while (i < (a->desc_len & (sizeof(unsigned long) - 1)));
++	}
++
++	for (; i < a->desc_len; i++) {
++		seg_a = *(unsigned char *)(a->description + i);
++		seg_b = *(unsigned char *)(b->description + i);
++		if ((seg_a ^ seg_b) != 0)
++			goto differ_plus_i;
++	}
++
++same:
++	return -1;
++
++differ_plus_i:
++	level += i;
++differ:
++	i = level * 8 + __ffs(seg_a ^ seg_b);
++	return i;
++}
++
++/*
++ * Free an object after stripping the keyring flag off of the pointer.
++ */
++static void keyring_free_object(void *object)
++{
++	key_put(keyring_ptr_to_key(object));
++}
++
++/*
++ * Operations for keyring management by the index-tree routines.
++ */
++static const struct assoc_array_ops keyring_assoc_array_ops = {
++	.get_key_chunk		= keyring_get_key_chunk,
++	.get_object_key_chunk	= keyring_get_object_key_chunk,
++	.compare_object		= keyring_compare_object,
++	.diff_objects		= keyring_diff_objects,
++	.free_object		= keyring_free_object,
++};
++
++/*
+  * Clean up a keyring when it is destroyed.  Unpublish its name if it had one
+  * and dispose of its data.
+  *
+@@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description)
+  */
+ static void keyring_destroy(struct key *keyring)
+ {
+-	struct keyring_list *klist;
+-	int loop;
+-
+ 	if (keyring->description) {
+ 		write_lock(&keyring_name_lock);
+ 
+@@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring)
+ 		write_unlock(&keyring_name_lock);
+ 	}
+ 
+-	klist = rcu_access_pointer(keyring->payload.subscriptions);
+-	if (klist) {
+-		for (loop = klist->nkeys - 1; loop >= 0; loop--)
+-			key_put(rcu_access_pointer(klist->keys[loop]));
+-		kfree(klist);
+-	}
++	assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops);
+ }
+ 
+ /*
+@@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring)
+  */
+ static void keyring_describe(const struct key *keyring, struct seq_file *m)
+ {
+-	struct keyring_list *klist;
+-
+ 	if (keyring->description)
+ 		seq_puts(m, keyring->description);
+ 	else
+ 		seq_puts(m, "[anon]");
+ 
+ 	if (key_is_instantiated(keyring)) {
+-		rcu_read_lock();
+-		klist = rcu_dereference(keyring->payload.subscriptions);
+-		if (klist)
+-			seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys);
++		if (keyring->keys.nr_leaves_on_tree != 0)
++			seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree);
+ 		else
+ 			seq_puts(m, ": empty");
+-		rcu_read_unlock();
+ 	}
+ }
+ 
++struct keyring_read_iterator_context {
++	size_t			qty;
++	size_t			count;
++	key_serial_t __user	*buffer;
++};
++
++static int keyring_read_iterator(const void *object, void *data)
++{
++	struct keyring_read_iterator_context *ctx = data;
++	const struct key *key = keyring_ptr_to_key(object);
++	int ret;
++
++	kenter("{%s,%d},,{%zu/%zu}",
++	       key->type->name, key->serial, ctx->count, ctx->qty);
++
++	if (ctx->count >= ctx->qty)
++		return 1;
++
++	ret = put_user(key->serial, ctx->buffer);
++	if (ret < 0)
++		return ret;
++	ctx->buffer++;
++	ctx->count += sizeof(key->serial);
++	return 0;
++}
++
+ /*
+  * Read a list of key IDs from the keyring's contents in binary form
+  *
+- * The keyring's semaphore is read-locked by the caller.
++ * The keyring's semaphore is read-locked by the caller.  This prevents someone
++ * from modifying it under us - which could cause us to read key IDs multiple
++ * times.
+  */
+ static long keyring_read(const struct key *keyring,
+ 			 char __user *buffer, size_t buflen)
+ {
+-	struct keyring_list *klist;
+-	struct key *key;
+-	size_t qty, tmp;
+-	int loop, ret;
++	struct keyring_read_iterator_context ctx;
++	unsigned long nr_keys;
++	int ret;
+ 
+-	ret = 0;
+-	klist = rcu_dereference_locked_keyring(keyring);
+-	if (klist) {
+-		/* calculate how much data we could return */
+-		qty = klist->nkeys * sizeof(key_serial_t);
+-
+-		if (buffer && buflen > 0) {
+-			if (buflen > qty)
+-				buflen = qty;
+-
+-			/* copy the IDs of the subscribed keys into the
+-			 * buffer */
+-			ret = -EFAULT;
+-
+-			for (loop = 0; loop < klist->nkeys; loop++) {
+-				key = rcu_deref_link_locked(klist, loop,
+-							    keyring);
+-
+-				tmp = sizeof(key_serial_t);
+-				if (tmp > buflen)
+-					tmp = buflen;
+-
+-				if (copy_to_user(buffer,
+-						 &key->serial,
+-						 tmp) != 0)
+-					goto error;
+-
+-				buflen -= tmp;
+-				if (buflen == 0)
+-					break;
+-				buffer += tmp;
+-			}
+-		}
++	kenter("{%d},,%zu", key_serial(keyring), buflen);
++
++	if (buflen & (sizeof(key_serial_t) - 1))
++		return -EINVAL;
++
++	nr_keys = keyring->keys.nr_leaves_on_tree;
++	if (nr_keys == 0)
++		return 0;
+ 
+-		ret = qty;
++	/* Calculate how much data we could return */
++	ctx.qty = nr_keys * sizeof(key_serial_t);
++
++	if (!buffer || !buflen)
++		return ctx.qty;
++
++	if (buflen > ctx.qty)
++		ctx.qty = buflen;
++
++	/* Copy the IDs of the subscribed keys into the buffer */
++	ctx.buffer = (key_serial_t __user *)buffer;
++	ctx.count = 0;
++	ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
++	if (ret < 0) {
++		kleave(" = %d [iterate]", ret);
++		return ret;
+ 	}
+ 
+-error:
+-	return ret;
++	kleave(" = %zu [ok]", ctx.count);
++	return ctx.count;
+ }
+ 
+ /*
+@@ -277,219 +500,360 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
+ }
+ EXPORT_SYMBOL(keyring_alloc);
+ 
+-/**
+- * keyring_search_aux - Search a keyring tree for a key matching some criteria
+- * @keyring_ref: A pointer to the keyring with possession indicator.
+- * @ctx: The keyring search context.
+- *
+- * Search the supplied keyring tree for a key that matches the criteria given.
+- * The root keyring and any linked keyrings must grant Search permission to the
+- * caller to be searchable and keys can only be found if they too grant Search
+- * to the caller. The possession flag on the root keyring pointer controls use
+- * of the possessor bits in permissions checking of the entire tree.  In
+- * addition, the LSM gets to forbid keyring searches and key matches.
+- *
+- * The search is performed as a breadth-then-depth search up to the prescribed
+- * limit (KEYRING_SEARCH_MAX_DEPTH).
+- *
+- * Keys are matched to the type provided and are then filtered by the match
+- * function, which is given the description to use in any way it sees fit.  The
+- * match function may use any attributes of a key that it wishes to to
+- * determine the match.  Normally the match function from the key type would be
+- * used.
+- *
+- * RCU is used to prevent the keyring key lists from disappearing without the
+- * need to take lots of locks.
+- *
+- * Returns a pointer to the found key and increments the key usage count if
+- * successful; -EAGAIN if no matching keys were found, or if expired or revoked
+- * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
+- * specified keyring wasn't a keyring.
+- *
+- * In the case of a successful return, the possession attribute from
+- * @keyring_ref is propagated to the returned key reference.
++/*
++ * Iteration function to consider each key found.
+  */
+-key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+-			     struct keyring_search_context *ctx)
++static int keyring_search_iterator(const void *object, void *iterator_data)
+ {
+-	struct {
+-		/* Need a separate keylist pointer for RCU purposes */
+-		struct key *keyring;
+-		struct keyring_list *keylist;
+-		int kix;
+-	} stack[KEYRING_SEARCH_MAX_DEPTH];
+-
+-	struct keyring_list *keylist;
+-	unsigned long kflags;
+-	struct key *keyring, *key;
+-	key_ref_t key_ref;
+-	long err;
+-	int sp, nkeys, kix;
++	struct keyring_search_context *ctx = iterator_data;
++	const struct key *key = keyring_ptr_to_key(object);
++	unsigned long kflags = key->flags;
+ 
+-	keyring = key_ref_to_ptr(keyring_ref);
+-	ctx->possessed = is_key_possessed(keyring_ref);
+-	key_check(keyring);
++	kenter("{%d}", key->serial);
+ 
+-	/* top keyring must have search permission to begin the search */
+-	err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
+-	if (err < 0) {
+-		key_ref = ERR_PTR(err);
+-		goto error;
++	/* ignore keys not of this type */
++	if (key->type != ctx->index_key.type) {
++		kleave(" = 0 [!type]");
++		return 0;
+ 	}
+ 
+-	key_ref = ERR_PTR(-ENOTDIR);
+-	if (keyring->type != &key_type_keyring)
+-		goto error;
++	/* skip invalidated, revoked and expired keys */
++	if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
++		if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
++			      (1 << KEY_FLAG_REVOKED))) {
++			ctx->result = ERR_PTR(-EKEYREVOKED);
++			kleave(" = %d [invrev]", ctx->skipped_ret);
++			goto skipped;
++		}
+ 
+-	rcu_read_lock();
++		if (key->expiry && ctx->now.tv_sec >= key->expiry) {
++			ctx->result = ERR_PTR(-EKEYEXPIRED);
++			kleave(" = %d [expire]", ctx->skipped_ret);
++			goto skipped;
++		}
++	}
+ 
+-	ctx->now = current_kernel_time();
+-	err = -EAGAIN;
+-	sp = 0;
+-
+-	/* firstly we should check to see if this top-level keyring is what we
+-	 * are looking for */
+-	key_ref = ERR_PTR(-EAGAIN);
+-	kflags = keyring->flags;
+-	if (keyring->type == ctx->index_key.type &&
+-	    ctx->match(keyring, ctx->match_data)) {
+-		key = keyring;
+-		if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+-			goto found;
++	/* keys that don't match */
++	if (!ctx->match(key, ctx->match_data)) {
++		kleave(" = 0 [!match]");
++		return 0;
++	}
+ 
+-		/* check it isn't negative and hasn't expired or been
+-		 * revoked */
+-		if (kflags & (1 << KEY_FLAG_REVOKED))
+-			goto error_2;
+-		if (key->expiry && ctx->now.tv_sec >= key->expiry)
+-			goto error_2;
+-		key_ref = ERR_PTR(key->type_data.reject_error);
+-		if (kflags & (1 << KEY_FLAG_NEGATIVE))
+-			goto error_2;
+-		goto found;
++	/* key must have search permissions */
++	if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
++	    key_task_permission(make_key_ref(key, ctx->possessed),
++				ctx->cred, KEY_SEARCH) < 0) {
++		ctx->result = ERR_PTR(-EACCES);
++		kleave(" = %d [!perm]", ctx->skipped_ret);
++		goto skipped;
+ 	}
+ 
+-	/* otherwise, the top keyring must not be revoked, expired, or
+-	 * negatively instantiated if we are to search it */
+-	key_ref = ERR_PTR(-EAGAIN);
+-	if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+-		      (1 << KEY_FLAG_REVOKED) |
+-		      (1 << KEY_FLAG_NEGATIVE)) ||
+-	    (keyring->expiry && ctx->now.tv_sec >= keyring->expiry))
+-		goto error_2;
+-
+-	/* start processing a new keyring */
+-descend:
+-	kflags = keyring->flags;
+-	if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+-		      (1 << KEY_FLAG_REVOKED)))
+-		goto not_this_keyring;
++	if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
++		/* we set a different error code if we pass a negative key */
++		if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
++			ctx->result = ERR_PTR(key->type_data.reject_error);
++			kleave(" = %d [neg]", ctx->skipped_ret);
++			goto skipped;
++		}
++	}
+ 
+-	keylist = rcu_dereference(keyring->payload.subscriptions);
+-	if (!keylist)
+-		goto not_this_keyring;
++	/* Found */
++	ctx->result = make_key_ref(key, ctx->possessed);
++	kleave(" = 1 [found]");
++	return 1;
+ 
+-	/* iterate through the keys in this keyring first */
+-	nkeys = keylist->nkeys;
+-	smp_rmb();
+-	for (kix = 0; kix < nkeys; kix++) {
+-		key = rcu_dereference(keylist->keys[kix]);
+-		kflags = key->flags;
++skipped:
++	return ctx->skipped_ret;
++}
+ 
+-		/* ignore keys not of this type */
+-		if (key->type != ctx->index_key.type)
+-			continue;
++/*
++ * Search inside a keyring for a key.  We can search by walking to it
++ * directly based on its index-key or we can iterate over the entire
++ * tree looking for it, based on the match function.
++ */
++static int search_keyring(struct key *keyring, struct keyring_search_context *ctx)
++{
++	if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) ==
++	    KEYRING_SEARCH_LOOKUP_DIRECT) {
++		const void *object;
++
++		object = assoc_array_find(&keyring->keys,
++					  &keyring_assoc_array_ops,
++					  &ctx->index_key);
++		return object ? ctx->iterator(object, ctx) : 0;
++	}
++	return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx);
++}
+ 
+-		/* skip invalidated, revoked and expired keys */
+-		if (!(ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)) {
+-			if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+-				      (1 << KEY_FLAG_REVOKED)))
+-				continue;
++/*
++ * Search a tree of keyrings that point to other keyrings up to the maximum
++ * depth.
++ */
++static bool search_nested_keyrings(struct key *keyring,
++				   struct keyring_search_context *ctx)
++{
++	struct {
++		struct key *keyring;
++		struct assoc_array_node *node;
++		int slot;
++	} stack[KEYRING_SEARCH_MAX_DEPTH];
+ 
+-			if (key->expiry && ctx->now.tv_sec >= key->expiry)
+-				continue;
+-		}
++	struct assoc_array_shortcut *shortcut;
++	struct assoc_array_node *node;
++	struct assoc_array_ptr *ptr;
++	struct key *key;
++	int sp = 0, slot;
+ 
+-		/* keys that don't match */
+-		if (!ctx->match(key, ctx->match_data))
+-			continue;
++	kenter("{%d},{%s,%s}",
++	       keyring->serial,
++	       ctx->index_key.type->name,
++	       ctx->index_key.description);
+ 
+-		/* key must have search permissions */
+-		if (key_task_permission(make_key_ref(key, ctx->possessed),
+-					ctx->cred, KEY_SEARCH) < 0)
+-			continue;
++	if (ctx->index_key.description)
++		ctx->index_key.desc_len = strlen(ctx->index_key.description);
+ 
+-		if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
++	/* Check to see if this top-level keyring is what we are looking for
++	 * and whether it is valid or not.
++	 */
++	if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE ||
++	    keyring_compare_object(keyring, &ctx->index_key)) {
++		ctx->skipped_ret = 2;
++		ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK;
++		switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) {
++		case 1:
+ 			goto found;
+-
+-		/* we set a different error code if we pass a negative key */
+-		if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
+-			err = key->type_data.reject_error;
+-			continue;
++		case 2:
++			return false;
++		default:
++			break;
+ 		}
++	}
+ 
++	ctx->skipped_ret = 0;
++	if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
++		ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK;
++
++	/* Start processing a new keyring */
++descend_to_keyring:
++	kdebug("descend to %d", keyring->serial);
++	if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
++			      (1 << KEY_FLAG_REVOKED)))
++		goto not_this_keyring;
++
++	/* Search through the keys in this keyring before its searching its
++	 * subtrees.
++	 */
++	if (search_keyring(keyring, ctx))
+ 		goto found;
+-	}
+ 
+-	/* search through the keyrings nested in this one */
+-	kix = 0;
+-ascend:
+-	nkeys = keylist->nkeys;
+-	smp_rmb();
+-	for (; kix < nkeys; kix++) {
+-		key = rcu_dereference(keylist->keys[kix]);
+-		if (key->type != &key_type_keyring)
+-			continue;
++	/* Then manually iterate through the keyrings nested in this one.
++	 *
++	 * Start from the root node of the index tree.  Because of the way the
++	 * hash function has been set up, keyrings cluster on the leftmost
++	 * branch of the root node (root slot 0) or in the root node itself.
++	 * Non-keyrings avoid the leftmost branch of the root entirely (root
++	 * slots 1-15).
++	 */
++	ptr = ACCESS_ONCE(keyring->keys.root);
++	if (!ptr)
++		goto not_this_keyring;
+ 
+-		/* recursively search nested keyrings
+-		 * - only search keyrings for which we have search permission
++	if (assoc_array_ptr_is_shortcut(ptr)) {
++		/* If the root is a shortcut, either the keyring only contains
++		 * keyring pointers (everything clusters behind root slot 0) or
++		 * doesn't contain any keyring pointers.
+ 		 */
+-		if (sp >= KEYRING_SEARCH_MAX_DEPTH)
++		shortcut = assoc_array_ptr_to_shortcut(ptr);
++		smp_read_barrier_depends();
++		if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0)
++			goto not_this_keyring;
++
++		ptr = ACCESS_ONCE(shortcut->next_node);
++		node = assoc_array_ptr_to_node(ptr);
++		goto begin_node;
++	}
++
++	node = assoc_array_ptr_to_node(ptr);
++	smp_read_barrier_depends();
++
++	ptr = node->slots[0];
++	if (!assoc_array_ptr_is_meta(ptr))
++		goto begin_node;
++
++descend_to_node:
++	/* Descend to a more distal node in this keyring's content tree and go
++	 * through that.
++	 */
++	kdebug("descend");
++	if (assoc_array_ptr_is_shortcut(ptr)) {
++		shortcut = assoc_array_ptr_to_shortcut(ptr);
++		smp_read_barrier_depends();
++		ptr = ACCESS_ONCE(shortcut->next_node);
++		BUG_ON(!assoc_array_ptr_is_node(ptr));
++		node = assoc_array_ptr_to_node(ptr);
++	}
++
++begin_node:
++	kdebug("begin_node");
++	smp_read_barrier_depends();
++	slot = 0;
++ascend_to_node:
++	/* Go through the slots in a node */
++	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
++		ptr = ACCESS_ONCE(node->slots[slot]);
++
++		if (assoc_array_ptr_is_meta(ptr) && node->back_pointer)
++			goto descend_to_node;
++
++		if (!keyring_ptr_is_keyring(ptr))
+ 			continue;
+ 
+-		if (key_task_permission(make_key_ref(key, ctx->possessed),
++		key = keyring_ptr_to_key(ptr);
++
++		if (sp >= KEYRING_SEARCH_MAX_DEPTH) {
++			if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) {
++				ctx->result = ERR_PTR(-ELOOP);
++				return false;
++			}
++			goto not_this_keyring;
++		}
++
++		/* Search a nested keyring */
++		if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
++		    key_task_permission(make_key_ref(key, ctx->possessed),
+ 					ctx->cred, KEY_SEARCH) < 0)
+ 			continue;
+ 
+ 		/* stack the current position */
+ 		stack[sp].keyring = keyring;
+-		stack[sp].keylist = keylist;
+-		stack[sp].kix = kix;
++		stack[sp].node = node;
++		stack[sp].slot = slot;
+ 		sp++;
+ 
+ 		/* begin again with the new keyring */
+ 		keyring = key;
+-		goto descend;
++		goto descend_to_keyring;
++	}
++
++	/* We've dealt with all the slots in the current node, so now we need
++	 * to ascend to the parent and continue processing there.
++	 */
++	ptr = ACCESS_ONCE(node->back_pointer);
++	slot = node->parent_slot;
++
++	if (ptr && assoc_array_ptr_is_shortcut(ptr)) {
++		shortcut = assoc_array_ptr_to_shortcut(ptr);
++		smp_read_barrier_depends();
++		ptr = ACCESS_ONCE(shortcut->back_pointer);
++		slot = shortcut->parent_slot;
++	}
++	if (!ptr)
++		goto not_this_keyring;
++	node = assoc_array_ptr_to_node(ptr);
++	smp_read_barrier_depends();
++	slot++;
++
++	/* If we've ascended to the root (zero backpointer), we must have just
++	 * finished processing the leftmost branch rather than the root slots -
++	 * so there can't be any more keyrings for us to find.
++	 */
++	if (node->back_pointer) {
++		kdebug("ascend %d", slot);
++		goto ascend_to_node;
+ 	}
+ 
+-	/* the keyring we're looking at was disqualified or didn't contain a
+-	 * matching key */
++	/* The keyring we're looking at was disqualified or didn't contain a
++	 * matching key.
++	 */
+ not_this_keyring:
+-	if (sp > 0) {
+-		/* resume the processing of a keyring higher up in the tree */
+-		sp--;
+-		keyring = stack[sp].keyring;
+-		keylist = stack[sp].keylist;
+-		kix = stack[sp].kix + 1;
+-		goto ascend;
++	kdebug("not_this_keyring %d", sp);
++	if (sp <= 0) {
++		kleave(" = false");
++		return false;
+ 	}
+ 
+-	key_ref = ERR_PTR(err);
+-	goto error_2;
++	/* Resume the processing of a keyring higher up in the tree */
++	sp--;
++	keyring = stack[sp].keyring;
++	node = stack[sp].node;
++	slot = stack[sp].slot + 1;
++	kdebug("ascend to %d [%d]", keyring->serial, slot);
++	goto ascend_to_node;
+ 
+-	/* we found a viable match */
++	/* We found a viable match */
+ found:
+-	__key_get(key);
+-	key->last_used_at = ctx->now.tv_sec;
+-	keyring->last_used_at = ctx->now.tv_sec;
+-	while (sp > 0)
+-		stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
++	key = key_ref_to_ptr(ctx->result);
+ 	key_check(key);
+-	key_ref = make_key_ref(key, ctx->possessed);
+-error_2:
++	if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) {
++		key->last_used_at = ctx->now.tv_sec;
++		keyring->last_used_at = ctx->now.tv_sec;
++		while (sp > 0)
++			stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
++	}
++	kleave(" = true");
++	return true;
++}
++
++/**
++ * keyring_search_aux - Search a keyring tree for a key matching some criteria
++ * @keyring_ref: A pointer to the keyring with possession indicator.
++ * @ctx: The keyring search context.
++ *
++ * Search the supplied keyring tree for a key that matches the criteria given.
++ * The root keyring and any linked keyrings must grant Search permission to the
++ * caller to be searchable and keys can only be found if they too grant Search
++ * to the caller. The possession flag on the root keyring pointer controls use
++ * of the possessor bits in permissions checking of the entire tree.  In
++ * addition, the LSM gets to forbid keyring searches and key matches.
++ *
++ * The search is performed as a breadth-then-depth search up to the prescribed
++ * limit (KEYRING_SEARCH_MAX_DEPTH).
++ *
++ * Keys are matched to the type provided and are then filtered by the match
++ * function, which is given the description to use in any way it sees fit.  The
++ * match function may use any attributes of a key that it wishes to to
++ * determine the match.  Normally the match function from the key type would be
++ * used.
++ *
++ * RCU can be used to prevent the keyring key lists from disappearing without
++ * the need to take lots of locks.
++ *
++ * Returns a pointer to the found key and increments the key usage count if
++ * successful; -EAGAIN if no matching keys were found, or if expired or revoked
++ * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
++ * specified keyring wasn't a keyring.
++ *
++ * In the case of a successful return, the possession attribute from
++ * @keyring_ref is propagated to the returned key reference.
++ */
++key_ref_t keyring_search_aux(key_ref_t keyring_ref,
++			     struct keyring_search_context *ctx)
++{
++	struct key *keyring;
++	long err;
++
++	ctx->iterator = keyring_search_iterator;
++	ctx->possessed = is_key_possessed(keyring_ref);
++	ctx->result = ERR_PTR(-EAGAIN);
++
++	keyring = key_ref_to_ptr(keyring_ref);
++	key_check(keyring);
++
++	if (keyring->type != &key_type_keyring)
++		return ERR_PTR(-ENOTDIR);
++
++	if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) {
++		err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
++		if (err < 0)
++			return ERR_PTR(err);
++	}
++
++	rcu_read_lock();
++	ctx->now = current_kernel_time();
++	if (search_nested_keyrings(keyring, ctx))
++		__key_get(key_ref_to_ptr(ctx->result));
+ 	rcu_read_unlock();
+-error:
+-	return key_ref;
++	return ctx->result;
+ }
+ 
+ /**
+@@ -499,7 +863,7 @@ error:
+  * @description: The name of the keyring we want to find.
+  *
+  * As keyring_search_aux() above, but using the current task's credentials and
+- * type's default matching function.
++ * type's default matching function and preferred search method.
+  */
+ key_ref_t keyring_search(key_ref_t keyring,
+ 			 struct key_type *type,
+@@ -523,58 +887,49 @@ key_ref_t keyring_search(key_ref_t keyring,
+ EXPORT_SYMBOL(keyring_search);
+ 
+ /*
+- * Search the given keyring only (no recursion).
++ * Search the given keyring for a key that might be updated.
+  *
+  * The caller must guarantee that the keyring is a keyring and that the
+- * permission is granted to search the keyring as no check is made here.
+- *
+- * RCU is used to make it unnecessary to lock the keyring key list here.
++ * permission is granted to modify the keyring as no check is made here.  The
++ * caller must also hold a lock on the keyring semaphore.
+  *
+  * Returns a pointer to the found key with usage count incremented if
+- * successful and returns -ENOKEY if not found.  Revoked and invalidated keys
+- * are skipped over.
++ * successful and returns NULL if not found.  Revoked and invalidated keys are
++ * skipped over.
+  *
+  * If successful, the possession indicator is propagated from the keyring ref
+  * to the returned key reference.
+  */
+-key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+-			       const struct keyring_index_key *index_key)
++key_ref_t find_key_to_update(key_ref_t keyring_ref,
++			     const struct keyring_index_key *index_key)
+ {
+-	struct keyring_list *klist;
+ 	struct key *keyring, *key;
+-	bool possessed;
+-	int nkeys, loop;
++	const void *object;
+ 
+ 	keyring = key_ref_to_ptr(keyring_ref);
+-	possessed = is_key_possessed(keyring_ref);
+ 
+-	rcu_read_lock();
++	kenter("{%d},{%s,%s}",
++	       keyring->serial, index_key->type->name, index_key->description);
+ 
+-	klist = rcu_dereference(keyring->payload.subscriptions);
+-	if (klist) {
+-		nkeys = klist->nkeys;
+-		smp_rmb();
+-		for (loop = 0; loop < nkeys ; loop++) {
+-			key = rcu_dereference(klist->keys[loop]);
+-			if (key->type == index_key->type &&
+-			    (!key->type->match ||
+-			     key->type->match(key, index_key->description)) &&
+-			    !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+-					    (1 << KEY_FLAG_REVOKED)))
+-			    )
+-				goto found;
+-		}
+-	}
++	object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops,
++				  index_key);
+ 
+-	rcu_read_unlock();
+-	return ERR_PTR(-ENOKEY);
++	if (object)
++		goto found;
++
++	kleave(" = NULL");
++	return NULL;
+ 
+ found:
++	key = keyring_ptr_to_key(object);
++	if (key->flags & ((1 << KEY_FLAG_INVALIDATED) |
++			  (1 << KEY_FLAG_REVOKED))) {
++		kleave(" = NULL [x]");
++		return NULL;
++	}
+ 	__key_get(key);
+-	keyring->last_used_at = key->last_used_at =
+-		current_kernel_time().tv_sec;
+-	rcu_read_unlock();
+-	return make_key_ref(key, possessed);
++	kleave(" = {%d}", key->serial);
++	return make_key_ref(key, is_key_possessed(keyring_ref));
+ }
+ 
+ /*
+@@ -637,6 +992,19 @@ out:
+ 	return keyring;
+ }
+ 
++static int keyring_detect_cycle_iterator(const void *object,
++					 void *iterator_data)
++{
++	struct keyring_search_context *ctx = iterator_data;
++	const struct key *key = keyring_ptr_to_key(object);
++
++	kenter("{%d}", key->serial);
++
++	BUG_ON(key != ctx->match_data);
++	ctx->result = ERR_PTR(-EDEADLK);
++	return 1;
++}
++
+ /*
+  * See if a cycle will will be created by inserting acyclic tree B in acyclic
+  * tree A at the topmost level (ie: as a direct child of A).
+@@ -646,117 +1014,39 @@ out:
+  */
+ static int keyring_detect_cycle(struct key *A, struct key *B)
+ {
+-	struct {
+-		struct keyring_list *keylist;
+-		int kix;
+-	} stack[KEYRING_SEARCH_MAX_DEPTH];
+-
+-	struct keyring_list *keylist;
+-	struct key *subtree, *key;
+-	int sp, nkeys, kix, ret;
++	struct keyring_search_context ctx = {
++		.index_key	= A->index_key,
++		.match_data	= A,
++		.iterator	= keyring_detect_cycle_iterator,
++		.flags		= (KEYRING_SEARCH_LOOKUP_DIRECT |
++				   KEYRING_SEARCH_NO_STATE_CHECK |
++				   KEYRING_SEARCH_NO_UPDATE_TIME |
++				   KEYRING_SEARCH_NO_CHECK_PERM |
++				   KEYRING_SEARCH_DETECT_TOO_DEEP),
++	};
+ 
+ 	rcu_read_lock();
+-
+-	ret = -EDEADLK;
+-	if (A == B)
+-		goto cycle_detected;
+-
+-	subtree = B;
+-	sp = 0;
+-
+-	/* start processing a new keyring */
+-descend:
+-	if (test_bit(KEY_FLAG_REVOKED, &subtree->flags))
+-		goto not_this_keyring;
+-
+-	keylist = rcu_dereference(subtree->payload.subscriptions);
+-	if (!keylist)
+-		goto not_this_keyring;
+-	kix = 0;
+-
+-ascend:
+-	/* iterate through the remaining keys in this keyring */
+-	nkeys = keylist->nkeys;
+-	smp_rmb();
+-	for (; kix < nkeys; kix++) {
+-		key = rcu_dereference(keylist->keys[kix]);
+-
+-		if (key == A)
+-			goto cycle_detected;
+-
+-		/* recursively check nested keyrings */
+-		if (key->type == &key_type_keyring) {
+-			if (sp >= KEYRING_SEARCH_MAX_DEPTH)
+-				goto too_deep;
+-
+-			/* stack the current position */
+-			stack[sp].keylist = keylist;
+-			stack[sp].kix = kix;
+-			sp++;
+-
+-			/* begin again with the new keyring */
+-			subtree = key;
+-			goto descend;
+-		}
+-	}
+-
+-	/* the keyring we're looking at was disqualified or didn't contain a
+-	 * matching key */
+-not_this_keyring:
+-	if (sp > 0) {
+-		/* resume the checking of a keyring higher up in the tree */
+-		sp--;
+-		keylist = stack[sp].keylist;
+-		kix = stack[sp].kix + 1;
+-		goto ascend;
+-	}
+-
+-	ret = 0; /* no cycles detected */
+-
+-error:
++	search_nested_keyrings(B, &ctx);
+ 	rcu_read_unlock();
+-	return ret;
+-
+-too_deep:
+-	ret = -ELOOP;
+-	goto error;
+-
+-cycle_detected:
+-	ret = -EDEADLK;
+-	goto error;
+-}
+-
+-/*
+- * Dispose of a keyring list after the RCU grace period, freeing the unlinked
+- * key
+- */
+-static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
+-{
+-	struct keyring_list *klist =
+-		container_of(rcu, struct keyring_list, rcu);
+-
+-	if (klist->delkey != USHRT_MAX)
+-		key_put(rcu_access_pointer(klist->keys[klist->delkey]));
+-	kfree(klist);
++	return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result);
+ }
+ 
+ /*
+  * Preallocate memory so that a key can be linked into to a keyring.
+  */
+-int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key,
+-		     unsigned long *_prealloc)
++int __key_link_begin(struct key *keyring,
++		     const struct keyring_index_key *index_key,
++		     struct assoc_array_edit **_edit)
+ 	__acquires(&keyring->sem)
+ 	__acquires(&keyring_serialise_link_sem)
+ {
+-	struct keyring_list *klist, *nklist;
+-	unsigned long prealloc;
+-	unsigned max;
+-	time_t lowest_lru;
+-	size_t size;
+-	int loop, lru, ret;
++	struct assoc_array_edit *edit;
++	int ret;
+ 
+ 	kenter("%d,%s,%s,",
+-	       key_serial(keyring), index_key->type->name, index_key->description);
++	       keyring->serial, index_key->type->name, index_key->description);
++
++	BUG_ON(index_key->desc_len == 0);
+ 
+ 	if (keyring->type != &key_type_keyring)
+ 		return -ENOTDIR;
+@@ -772,88 +1062,25 @@ int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_
+ 	if (index_key->type == &key_type_keyring)
+ 		down_write(&keyring_serialise_link_sem);
+ 
+-	klist = rcu_dereference_locked_keyring(keyring);
+-
+-	/* see if there's a matching key we can displace */
+-	lru = -1;
+-	if (klist && klist->nkeys > 0) {
+-		lowest_lru = TIME_T_MAX;
+-		for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+-			struct key *key = rcu_deref_link_locked(klist, loop,
+-								keyring);
+-			if (key->type == index_key->type &&
+-			    strcmp(key->description, index_key->description) == 0) {
+-				/* Found a match - we'll replace the link with
+-				 * one to the new key.  We record the slot
+-				 * position.
+-				 */
+-				klist->delkey = loop;
+-				prealloc = 0;
+-				goto done;
+-			}
+-			if (key->last_used_at < lowest_lru) {
+-				lowest_lru = key->last_used_at;
+-				lru = loop;
+-			}
+-		}
+-	}
+-
+-	/* If the keyring is full then do an LRU discard */
+-	if (klist &&
+-	    klist->nkeys == klist->maxkeys &&
+-	    klist->maxkeys >= MAX_KEYRING_LINKS) {
+-		kdebug("LRU discard %d\n", lru);
+-		klist->delkey = lru;
+-		prealloc = 0;
+-		goto done;
+-	}
+-
+ 	/* check that we aren't going to overrun the user's quota */
+ 	ret = key_payload_reserve(keyring,
+ 				  keyring->datalen + KEYQUOTA_LINK_BYTES);
+ 	if (ret < 0)
+ 		goto error_sem;
+ 
+-	if (klist && klist->nkeys < klist->maxkeys) {
+-		/* there's sufficient slack space to append directly */
+-		klist->delkey = klist->nkeys;
+-		prealloc = KEY_LINK_FIXQUOTA;
+-	} else {
+-		/* grow the key list */
+-		max = 4;
+-		if (klist) {
+-			max += klist->maxkeys;
+-			if (max > MAX_KEYRING_LINKS)
+-				max = MAX_KEYRING_LINKS;
+-			BUG_ON(max <= klist->maxkeys);
+-		}
+-
+-		size = sizeof(*klist) + sizeof(struct key *) * max;
+-
+-		ret = -ENOMEM;
+-		nklist = kmalloc(size, GFP_KERNEL);
+-		if (!nklist)
+-			goto error_quota;
+-
+-		nklist->maxkeys = max;
+-		if (klist) {
+-			memcpy(nklist->keys, klist->keys,
+-			       sizeof(struct key *) * klist->nkeys);
+-			nklist->delkey = klist->nkeys;
+-			nklist->nkeys = klist->nkeys + 1;
+-			klist->delkey = USHRT_MAX;
+-		} else {
+-			nklist->nkeys = 1;
+-			nklist->delkey = 0;
+-		}
+-
+-		/* add the key into the new space */
+-		RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL);
+-		prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA;
++	/* Create an edit script that will insert/replace the key in the
++	 * keyring tree.
++	 */
++	edit = assoc_array_insert(&keyring->keys,
++				  &keyring_assoc_array_ops,
++				  index_key,
++				  NULL);
++	if (IS_ERR(edit)) {
++		ret = PTR_ERR(edit);
++		goto error_quota;
+ 	}
+ 
+-done:
+-	*_prealloc = prealloc;
++	*_edit = edit;
+ 	kleave(" = 0");
+ 	return 0;
+ 
+@@ -893,60 +1120,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key)
+  * holds at most one link to any given key of a particular type+description
+  * combination.
+  */
+-void __key_link(struct key *keyring, struct key *key,
+-		unsigned long *_prealloc)
++void __key_link(struct key *key, struct assoc_array_edit **_edit)
+ {
+-	struct keyring_list *klist, *nklist;
+-	struct key *discard;
+-
+-	nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA);
+-	*_prealloc = 0;
+-
+-	kenter("%d,%d,%p", keyring->serial, key->serial, nklist);
+-
+-	klist = rcu_dereference_locked_keyring(keyring);
+-
+ 	__key_get(key);
+-	keyring->last_used_at = key->last_used_at =
+-		current_kernel_time().tv_sec;
+-
+-	/* there's a matching key we can displace or an empty slot in a newly
+-	 * allocated list we can fill */
+-	if (nklist) {
+-		kdebug("reissue %hu/%hu/%hu",
+-		       nklist->delkey, nklist->nkeys, nklist->maxkeys);
+-
+-		RCU_INIT_POINTER(nklist->keys[nklist->delkey], key);
+-
+-		rcu_assign_pointer(keyring->payload.subscriptions, nklist);
+-
+-		/* dispose of the old keyring list and, if there was one, the
+-		 * displaced key */
+-		if (klist) {
+-			kdebug("dispose %hu/%hu/%hu",
+-			       klist->delkey, klist->nkeys, klist->maxkeys);
+-			call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
+-		}
+-	} else if (klist->delkey < klist->nkeys) {
+-		kdebug("replace %hu/%hu/%hu",
+-		       klist->delkey, klist->nkeys, klist->maxkeys);
+-
+-		discard = rcu_dereference_protected(
+-			klist->keys[klist->delkey],
+-			rwsem_is_locked(&keyring->sem));
+-		rcu_assign_pointer(klist->keys[klist->delkey], key);
+-		/* The garbage collector will take care of RCU
+-		 * synchronisation */
+-		key_put(discard);
+-	} else {
+-		/* there's sufficient slack space to append directly */
+-		kdebug("append %hu/%hu/%hu",
+-		       klist->delkey, klist->nkeys, klist->maxkeys);
+-
+-		RCU_INIT_POINTER(klist->keys[klist->delkey], key);
+-		smp_wmb();
+-		klist->nkeys++;
+-	}
++	assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key));
++	assoc_array_apply_edit(*_edit);
++	*_edit = NULL;
+ }
+ 
+ /*
+@@ -956,23 +1135,20 @@ void __key_link(struct key *keyring, struct key *key,
+  */
+ void __key_link_end(struct key *keyring,
+ 		    const struct keyring_index_key *index_key,
+-		    unsigned long prealloc)
++		    struct assoc_array_edit *edit)
+ 	__releases(&keyring->sem)
+ 	__releases(&keyring_serialise_link_sem)
+ {
+ 	BUG_ON(index_key->type == NULL);
+-	BUG_ON(index_key->type->name == NULL);
+-	kenter("%d,%s,%lx", keyring->serial, index_key->type->name, prealloc);
++	kenter("%d,%s,", keyring->serial, index_key->type->name);
+ 
+ 	if (index_key->type == &key_type_keyring)
+ 		up_write(&keyring_serialise_link_sem);
+ 
+-	if (prealloc) {
+-		if (prealloc & KEY_LINK_FIXQUOTA)
+-			key_payload_reserve(keyring,
+-					    keyring->datalen -
+-					    KEYQUOTA_LINK_BYTES);
+-		kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA));
++	if (edit) {
++		key_payload_reserve(keyring,
++				    keyring->datalen - KEYQUOTA_LINK_BYTES);
++		assoc_array_cancel_edit(edit);
+ 	}
+ 	up_write(&keyring->sem);
+ }
+@@ -999,20 +1175,24 @@ void __key_link_end(struct key *keyring,
+  */
+ int key_link(struct key *keyring, struct key *key)
+ {
+-	unsigned long prealloc;
++	struct assoc_array_edit *edit;
+ 	int ret;
+ 
++	kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage));
++
+ 	key_check(keyring);
+ 	key_check(key);
+ 
+-	ret = __key_link_begin(keyring, &key->index_key, &prealloc);
++	ret = __key_link_begin(keyring, &key->index_key, &edit);
+ 	if (ret == 0) {
++		kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage));
+ 		ret = __key_link_check_live_key(keyring, key);
+ 		if (ret == 0)
+-			__key_link(keyring, key, &prealloc);
+-		__key_link_end(keyring, &key->index_key, prealloc);
++			__key_link(key, &edit);
++		__key_link_end(keyring, &key->index_key, edit);
+ 	}
+ 
++	kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage));
+ 	return ret;
+ }
+ EXPORT_SYMBOL(key_link);
+@@ -1036,90 +1216,36 @@ EXPORT_SYMBOL(key_link);
+  */
+ int key_unlink(struct key *keyring, struct key *key)
+ {
+-	struct keyring_list *klist, *nklist;
+-	int loop, ret;
++	struct assoc_array_edit *edit;
++	int ret;
+ 
+ 	key_check(keyring);
+ 	key_check(key);
+ 
+-	ret = -ENOTDIR;
+ 	if (keyring->type != &key_type_keyring)
+-		goto error;
++		return -ENOTDIR;
+ 
+ 	down_write(&keyring->sem);
+ 
+-	klist = rcu_dereference_locked_keyring(keyring);
+-	if (klist) {
+-		/* search the keyring for the key */
+-		for (loop = 0; loop < klist->nkeys; loop++)
+-			if (rcu_access_pointer(klist->keys[loop]) == key)
+-				goto key_is_present;
++	edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops,
++				  &key->index_key);
++	if (IS_ERR(edit)) {
++		ret = PTR_ERR(edit);
++		goto error;
+ 	}
+-
+-	up_write(&keyring->sem);
+ 	ret = -ENOENT;
+-	goto error;
+-
+-key_is_present:
+-	/* we need to copy the key list for RCU purposes */
+-	nklist = kmalloc(sizeof(*klist) +
+-			 sizeof(struct key *) * klist->maxkeys,
+-			 GFP_KERNEL);
+-	if (!nklist)
+-		goto nomem;
+-	nklist->maxkeys = klist->maxkeys;
+-	nklist->nkeys = klist->nkeys - 1;
+-
+-	if (loop > 0)
+-		memcpy(&nklist->keys[0],
+-		       &klist->keys[0],
+-		       loop * sizeof(struct key *));
+-
+-	if (loop < nklist->nkeys)
+-		memcpy(&nklist->keys[loop],
+-		       &klist->keys[loop + 1],
+-		       (nklist->nkeys - loop) * sizeof(struct key *));
+-
+-	/* adjust the user's quota */
+-	key_payload_reserve(keyring,
+-			    keyring->datalen - KEYQUOTA_LINK_BYTES);
+-
+-	rcu_assign_pointer(keyring->payload.subscriptions, nklist);
+-
+-	up_write(&keyring->sem);
+-
+-	/* schedule for later cleanup */
+-	klist->delkey = loop;
+-	call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
++	if (edit == NULL)
++		goto error;
+ 
++	assoc_array_apply_edit(edit);
+ 	ret = 0;
+ 
+ error:
+-	return ret;
+-nomem:
+-	ret = -ENOMEM;
+ 	up_write(&keyring->sem);
+-	goto error;
++	return ret;
+ }
+ EXPORT_SYMBOL(key_unlink);
+ 
+-/*
+- * Dispose of a keyring list after the RCU grace period, releasing the keys it
+- * links to.
+- */
+-static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
+-{
+-	struct keyring_list *klist;
+-	int loop;
+-
+-	klist = container_of(rcu, struct keyring_list, rcu);
+-
+-	for (loop = klist->nkeys - 1; loop >= 0; loop--)
+-		key_put(rcu_access_pointer(klist->keys[loop]));
+-
+-	kfree(klist);
+-}
+-
+ /**
+  * keyring_clear - Clear a keyring
+  * @keyring: The keyring to clear.
+@@ -1130,33 +1256,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
+  */
+ int keyring_clear(struct key *keyring)
+ {
+-	struct keyring_list *klist;
++	struct assoc_array_edit *edit;
+ 	int ret;
+ 
+-	ret = -ENOTDIR;
+-	if (keyring->type == &key_type_keyring) {
+-		/* detach the pointer block with the locks held */
+-		down_write(&keyring->sem);
+-
+-		klist = rcu_dereference_locked_keyring(keyring);
+-		if (klist) {
+-			/* adjust the quota */
+-			key_payload_reserve(keyring,
+-					    sizeof(struct keyring_list));
+-
+-			rcu_assign_pointer(keyring->payload.subscriptions,
+-					   NULL);
+-		}
+-
+-		up_write(&keyring->sem);
++	if (keyring->type != &key_type_keyring)
++		return -ENOTDIR;
+ 
+-		/* free the keys after the locks have been dropped */
+-		if (klist)
+-			call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
++	down_write(&keyring->sem);
+ 
++	edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
++	if (IS_ERR(edit)) {
++		ret = PTR_ERR(edit);
++	} else {
++		if (edit)
++			assoc_array_apply_edit(edit);
++		key_payload_reserve(keyring, 0);
+ 		ret = 0;
+ 	}
+ 
++	up_write(&keyring->sem);
+ 	return ret;
+ }
+ EXPORT_SYMBOL(keyring_clear);
+@@ -1168,17 +1286,25 @@ EXPORT_SYMBOL(keyring_clear);
+  */
+ static void keyring_revoke(struct key *keyring)
+ {
+-	struct keyring_list *klist;
++	struct assoc_array_edit *edit;
+ 
+-	klist = rcu_dereference_locked_keyring(keyring);
++	edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
++	if (!IS_ERR(edit)) {
++		if (edit)
++			assoc_array_apply_edit(edit);
++		key_payload_reserve(keyring, 0);
++	}
++}
+ 
+-	/* adjust the quota */
+-	key_payload_reserve(keyring, 0);
++static bool gc_iterator(void *object, void *iterator_data)
++{
++	struct key *key = keyring_ptr_to_key(object);
++	time_t *limit = iterator_data;
+ 
+-	if (klist) {
+-		rcu_assign_pointer(keyring->payload.subscriptions, NULL);
+-		call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+-	}
++	if (key_is_dead(key, *limit))
++		return false;
++	key_get(key);
++	return true;
+ }
+ 
+ /*
+@@ -1191,88 +1317,12 @@ static void keyring_revoke(struct key *keyring)
+  */
+ void keyring_gc(struct key *keyring, time_t limit)
+ {
+-	struct keyring_list *klist, *new;
+-	struct key *key;
+-	int loop, keep, max;
+-
+ 	kenter("{%x,%s}", key_serial(keyring), keyring->description);
+ 
+ 	down_write(&keyring->sem);
+-
+-	klist = rcu_dereference_locked_keyring(keyring);
+-	if (!klist)
+-		goto no_klist;
+-
+-	/* work out how many subscriptions we're keeping */
+-	keep = 0;
+-	for (loop = klist->nkeys - 1; loop >= 0; loop--)
+-		if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring),
+-				 limit))
+-			keep++;
+-
+-	if (keep == klist->nkeys)
+-		goto just_return;
+-
+-	/* allocate a new keyring payload */
+-	max = roundup(keep, 4);
+-	new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
+-		      GFP_KERNEL);
+-	if (!new)
+-		goto nomem;
+-	new->maxkeys = max;
+-	new->nkeys = 0;
+-	new->delkey = 0;
+-
+-	/* install the live keys
+-	 * - must take care as expired keys may be updated back to life
+-	 */
+-	keep = 0;
+-	for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+-		key = rcu_deref_link_locked(klist, loop, keyring);
+-		if (!key_is_dead(key, limit)) {
+-			if (keep >= max)
+-				goto discard_new;
+-			RCU_INIT_POINTER(new->keys[keep++], key_get(key));
+-		}
+-	}
+-	new->nkeys = keep;
+-
+-	/* adjust the quota */
+-	key_payload_reserve(keyring,
+-			    sizeof(struct keyring_list) +
+-			    KEYQUOTA_LINK_BYTES * keep);
+-
+-	if (keep == 0) {
+-		rcu_assign_pointer(keyring->payload.subscriptions, NULL);
+-		kfree(new);
+-	} else {
+-		rcu_assign_pointer(keyring->payload.subscriptions, new);
+-	}
+-
+-	up_write(&keyring->sem);
+-
+-	call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+-	kleave(" [yes]");
+-	return;
+-
+-discard_new:
+-	new->nkeys = keep;
+-	keyring_clear_rcu_disposal(&new->rcu);
++	assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops,
++		       gc_iterator, &limit);
+ 	up_write(&keyring->sem);
+-	kleave(" [discard]");
+-	return;
+ 
+-just_return:
+-	up_write(&keyring->sem);
+-	kleave(" [no dead]");
+-	return;
+-
+-no_klist:
+-	up_write(&keyring->sem);
+-	kleave(" [no_klist]");
+-	return;
+-
+-nomem:
+-	up_write(&keyring->sem);
+-	kleave(" [oom]");
++	kleave("");
+ }
+diff --git a/security/keys/request_key.c b/security/keys/request_key.c
+index ab75df4..df94827 100644
+--- a/security/keys/request_key.c
++++ b/security/keys/request_key.c
+@@ -351,7 +351,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
+ 			       struct key_user *user,
+ 			       struct key **_key)
+ {
+-	unsigned long prealloc;
++	struct assoc_array_edit *edit;
+ 	struct key *key;
+ 	key_perm_t perm;
+ 	key_ref_t key_ref;
+@@ -380,7 +380,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
+ 	set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
+ 
+ 	if (dest_keyring) {
+-		ret = __key_link_begin(dest_keyring, &ctx->index_key, &prealloc);
++		ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
+ 		if (ret < 0)
+ 			goto link_prealloc_failed;
+ 	}
+@@ -395,11 +395,11 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
+ 		goto key_already_present;
+ 
+ 	if (dest_keyring)
+-		__key_link(dest_keyring, key, &prealloc);
++		__key_link(key, &edit);
+ 
+ 	mutex_unlock(&key_construction_mutex);
+ 	if (dest_keyring)
+-		__key_link_end(dest_keyring, &ctx->index_key, prealloc);
++		__key_link_end(dest_keyring, &ctx->index_key, edit);
+ 	mutex_unlock(&user->cons_lock);
+ 	*_key = key;
+ 	kleave(" = 0 [%d]", key_serial(key));
+@@ -414,8 +414,8 @@ key_already_present:
+ 	if (dest_keyring) {
+ 		ret = __key_link_check_live_key(dest_keyring, key);
+ 		if (ret == 0)
+-			__key_link(dest_keyring, key, &prealloc);
+-		__key_link_end(dest_keyring, &ctx->index_key, prealloc);
++			__key_link(key, &edit);
++		__key_link_end(dest_keyring, &ctx->index_key, edit);
+ 		if (ret < 0)
+ 			goto link_check_failed;
+ 	}
+-- 
+1.8.3.1
+
diff --git a/keys-krb-support.patch b/keys-krb-support.patch
new file mode 100644
index 0000000..07a909d
--- /dev/null
+++ b/keys-krb-support.patch
@@ -0,0 +1,747 @@
+From 64160c504842a359801cff17464931fa028ff164 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:54 +0100
+Subject: [PATCH 1/2] KEYS: Implement a big key type that can save to tmpfs
+
+Implement a big key type that can save its contents to tmpfs and thus
+swapspace when memory is tight.  This is useful for Kerberos ticket caches.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+Tested-by: Simo Sorce <simo at redhat.com>
+---
+ include/keys/big_key-type.h |  25 ++++++
+ include/linux/key.h         |   1 +
+ security/keys/Kconfig       |  11 +++
+ security/keys/Makefile      |   1 +
+ security/keys/big_key.c     | 204 ++++++++++++++++++++++++++++++++++++++++++++
+ 5 files changed, 242 insertions(+)
+ create mode 100644 include/keys/big_key-type.h
+ create mode 100644 security/keys/big_key.c
+
+diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h
+new file mode 100644
+index 0000000..d69bc8a
+--- /dev/null
++++ b/include/keys/big_key-type.h
+@@ -0,0 +1,25 @@
++/* Big capacity key type.
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells at redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#ifndef _KEYS_BIG_KEY_TYPE_H
++#define _KEYS_BIG_KEY_TYPE_H
++
++#include <linux/key-type.h>
++
++extern struct key_type key_type_big_key;
++
++extern int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep);
++extern void big_key_revoke(struct key *key);
++extern void big_key_destroy(struct key *key);
++extern void big_key_describe(const struct key *big_key, struct seq_file *m);
++extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen);
++
++#endif /* _KEYS_BIG_KEY_TYPE_H */
+diff --git a/include/linux/key.h b/include/linux/key.h
+index 2417f78..010dbb6 100644
+--- a/include/linux/key.h
++++ b/include/linux/key.h
+@@ -201,6 +201,7 @@ struct key {
+ 			unsigned long		value;
+ 			void __rcu		*rcudata;
+ 			void			*data;
++			void			*data2[2];
+ 		} payload;
+ 		struct assoc_array keys;
+ 	};
+diff --git a/security/keys/Kconfig b/security/keys/Kconfig
+index 15e0dfe..b563622 100644
+--- a/security/keys/Kconfig
++++ b/security/keys/Kconfig
+@@ -20,6 +20,17 @@ config KEYS
+ 
+ 	  If you are unsure as to whether this is required, answer N.
+ 
++config BIG_KEYS
++	tristate "Large payload keys"
++	depends on KEYS
++	depends on TMPFS
++	help
++	  This option provides support for holding large keys within the kernel
++	  (for example Kerberos ticket caches).  The data may be stored out to
++	  swapspace by tmpfs.
++
++	  If you are unsure as to whether this is required, answer N.
++
+ config TRUSTED_KEYS
+ 	tristate "TRUSTED KEYS"
+ 	depends on KEYS && TCG_TPM
+diff --git a/security/keys/Makefile b/security/keys/Makefile
+index 504aaa0..c487c77 100644
+--- a/security/keys/Makefile
++++ b/security/keys/Makefile
+@@ -22,5 +22,6 @@ obj-$(CONFIG_SYSCTL) += sysctl.o
+ #
+ # Key types
+ #
++obj-$(CONFIG_BIG_KEYS) += big_key.o
+ obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
+ obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/
+diff --git a/security/keys/big_key.c b/security/keys/big_key.c
+new file mode 100644
+index 0000000..5f9defc
+--- /dev/null
++++ b/security/keys/big_key.c
+@@ -0,0 +1,204 @@
++/* Large capacity key type
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells at redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/seq_file.h>
++#include <linux/file.h>
++#include <linux/shmem_fs.h>
++#include <linux/err.h>
++#include <keys/user-type.h>
++#include <keys/big_key-type.h>
++
++MODULE_LICENSE("GPL");
++
++/*
++ * If the data is under this limit, there's no point creating a shm file to
++ * hold it as the permanently resident metadata for the shmem fs will be at
++ * least as large as the data.
++ */
++#define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry))
++
++/*
++ * big_key defined keys take an arbitrary string as the description and an
++ * arbitrary blob of data as the payload
++ */
++struct key_type key_type_big_key = {
++	.name			= "big_key",
++	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
++	.instantiate		= big_key_instantiate,
++	.match			= user_match,
++	.revoke			= big_key_revoke,
++	.destroy		= big_key_destroy,
++	.describe		= big_key_describe,
++	.read			= big_key_read,
++};
++
++/*
++ * Instantiate a big key
++ */
++int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
++{
++	struct path *path = (struct path *)&key->payload.data2;
++	struct file *file;
++	ssize_t written;
++	size_t datalen = prep->datalen;
++	int ret;
++
++	ret = -EINVAL;
++	if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
++		goto error;
++
++	/* Set an arbitrary quota */
++	ret = key_payload_reserve(key, 16);
++	if (ret < 0)
++		goto error;
++
++	key->type_data.x[1] = datalen;
++
++	if (datalen > BIG_KEY_FILE_THRESHOLD) {
++		/* Create a shmem file to store the data in.  This will permit the data
++		 * to be swapped out if needed.
++		 *
++		 * TODO: Encrypt the stored data with a temporary key.
++		 */
++		file = shmem_file_setup("", datalen, 0);
++		if (IS_ERR(file))
++			goto err_quota;
++
++		written = kernel_write(file, prep->data, prep->datalen, 0);
++		if (written != datalen) {
++			if (written >= 0)
++				ret = -ENOMEM;
++			goto err_fput;
++		}
++
++		/* Pin the mount and dentry to the key so that we can open it again
++		 * later
++		 */
++		*path = file->f_path;
++		path_get(path);
++		fput(file);
++	} else {
++		/* Just store the data in a buffer */
++		void *data = kmalloc(datalen, GFP_KERNEL);
++		if (!data) {
++			ret = -ENOMEM;
++			goto err_quota;
++		}
++
++		key->payload.data = memcpy(data, prep->data, prep->datalen);
++	}
++	return 0;
++
++err_fput:
++	fput(file);
++err_quota:
++	key_payload_reserve(key, 0);
++error:
++	return ret;
++}
++
++/*
++ * dispose of the links from a revoked keyring
++ * - called with the key sem write-locked
++ */
++void big_key_revoke(struct key *key)
++{
++	struct path *path = (struct path *)&key->payload.data2;
++
++	/* clear the quota */
++	key_payload_reserve(key, 0);
++	if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD)
++		vfs_truncate(path, 0);
++}
++
++/*
++ * dispose of the data dangling from the corpse of a big_key key
++ */
++void big_key_destroy(struct key *key)
++{
++	if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) {
++		struct path *path = (struct path *)&key->payload.data2;
++		path_put(path);
++		path->mnt = NULL;
++		path->dentry = NULL;
++	} else {
++		kfree(key->payload.data);
++		key->payload.data = NULL;
++	}
++}
++
++/*
++ * describe the big_key key
++ */
++void big_key_describe(const struct key *key, struct seq_file *m)
++{
++	unsigned long datalen = key->type_data.x[1];
++
++	seq_puts(m, key->description);
++
++	if (key_is_instantiated(key))
++		seq_printf(m, ": %lu [%s]",
++			   datalen,
++			   datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
++}
++
++/*
++ * read the key data
++ * - the key's semaphore is read-locked
++ */
++long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
++{
++	unsigned long datalen = key->type_data.x[1];
++	long ret;
++
++	if (!buffer || buflen < datalen)
++		return datalen;
++
++	if (datalen > BIG_KEY_FILE_THRESHOLD) {
++		struct path *path = (struct path *)&key->payload.data2;
++		struct file *file;
++		loff_t pos;
++
++		file = dentry_open(path, O_RDONLY, current_cred());
++		if (IS_ERR(file))
++			return PTR_ERR(file);
++
++		pos = 0;
++		ret = vfs_read(file, buffer, datalen, &pos);
++		fput(file);
++		if (ret >= 0 && ret != datalen)
++			ret = -EIO;
++	} else {
++		ret = datalen;
++		if (copy_to_user(buffer, key->payload.data, datalen) != 0)
++			ret = -EFAULT;
++	}
++
++	return ret;
++}
++
++/*
++ * Module stuff
++ */
++static int __init big_key_init(void)
++{
++	return register_key_type(&key_type_big_key);
++}
++
++static void __exit big_key_cleanup(void)
++{
++	unregister_key_type(&key_type_big_key);
++}
++
++module_init(big_key_init);
++module_exit(big_key_cleanup);
+-- 
+1.8.3.1
+
+
+From b1e5b74e060add16de8d6005802644fa1700167f Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells at redhat.com>
+Date: Fri, 30 Aug 2013 15:37:54 +0100
+Subject: [PATCH 2/2] KEYS: Add per-user_namespace registers for persistent
+ per-UID kerberos caches
+
+Add support for per-user_namespace registers of persistent per-UID kerberos
+caches held within the kernel.
+
+This allows the kerberos cache to be retained beyond the life of all a user's
+processes so that the user's cron jobs can work.
+
+The kerberos cache is envisioned as a keyring/key tree looking something like:
+
+	struct user_namespace
+	  \___ .krb_cache keyring		- The register
+		\___ _krb.0 keyring		- Root's Kerberos cache
+		\___ _krb.5000 keyring		- User 5000's Kerberos cache
+		\___ _krb.5001 keyring		- User 5001's Kerberos cache
+			\___ tkt785 big_key	- A ccache blob
+			\___ tkt12345 big_key	- Another ccache blob
+
+Or possibly:
+
+	struct user_namespace
+	  \___ .krb_cache keyring		- The register
+		\___ _krb.0 keyring		- Root's Kerberos cache
+		\___ _krb.5000 keyring		- User 5000's Kerberos cache
+		\___ _krb.5001 keyring		- User 5001's Kerberos cache
+			\___ tkt785 keyring	- A ccache
+				\___ krbtgt/REDHAT.COM at REDHAT.COM big_key
+				\___ http/REDHAT.COM at REDHAT.COM user
+				\___ afs/REDHAT.COM at REDHAT.COM user
+				\___ nfs/REDHAT.COM at REDHAT.COM user
+				\___ krbtgt/KERNEL.ORG at KERNEL.ORG big_key
+				\___ http/KERNEL.ORG at KERNEL.ORG big_key
+
+What goes into a particular Kerberos cache is entirely up to userspace.  Kernel
+support is limited to giving you the Kerberos cache keyring that you want.
+
+The user asks for their Kerberos cache by:
+
+	krb_cache = keyctl_get_krbcache(uid, dest_keyring);
+
+The uid is -1 or the user's own UID for the user's own cache or the uid of some
+other user's cache (requires CAP_SETUID).  This permits rpc.gssd or whatever to
+mess with the cache.
+
+The cache returned is a keyring named "_krb.<uid>" that the possessor can read,
+search, clear, invalidate, unlink from and add links to.  Active LSMs get a
+chance to rule on whether the caller is permitted to make a link.
+
+Each uid's cache keyring is created when it first accessed and is given a
+timeout that is extended each time this function is called so that the keyring
+goes away after a while.  The timeout is configurable by sysctl but defaults to
+three days.
+
+Each user_namespace struct gets a lazily-created keyring that serves as the
+register.  The cache keyrings are added to it.  This means that standard key
+search and garbage collection facilities are available.
+
+The user_namespace struct's register goes away when it does and anything left
+in it is then automatically gc'd.
+
+Signed-off-by: David Howells <dhowells at redhat.com>
+Tested-by: Simo Sorce <simo at redhat.com>
+cc: Serge E. Hallyn <serge.hallyn at ubuntu.com>
+cc: Eric W. Biederman <ebiederm at xmission.com>
+---
+ include/linux/user_namespace.h |   6 ++
+ include/uapi/linux/keyctl.h    |   1 +
+ kernel/user.c                  |   4 +
+ kernel/user_namespace.c        |   6 ++
+ security/keys/Kconfig          |  17 +++++
+ security/keys/Makefile         |   1 +
+ security/keys/compat.c         |   3 +
+ security/keys/internal.h       |   9 +++
+ security/keys/keyctl.c         |   3 +
+ security/keys/persistent.c     | 169 +++++++++++++++++++++++++++++++++++++++++
+ security/keys/sysctl.c         |  11 +++
+ 11 files changed, 230 insertions(+)
+ create mode 100644 security/keys/persistent.c
+
+diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
+index b6b215f..cf21958 100644
+--- a/include/linux/user_namespace.h
++++ b/include/linux/user_namespace.h
+@@ -28,6 +28,12 @@ struct user_namespace {
+ 	unsigned int		proc_inum;
+ 	bool			may_mount_sysfs;
+ 	bool			may_mount_proc;
++
++	/* Register of per-UID persistent keyrings for this namespace */
++#ifdef CONFIG_PERSISTENT_KEYRINGS
++	struct key		*persistent_keyring_register;
++	struct rw_semaphore	persistent_keyring_register_sem;
++#endif
+ };
+ 
+ extern struct user_namespace init_user_ns;
+diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
+index c9b7f4fa..840cb99 100644
+--- a/include/uapi/linux/keyctl.h
++++ b/include/uapi/linux/keyctl.h
+@@ -56,5 +56,6 @@
+ #define KEYCTL_REJECT			19	/* reject a partially constructed key */
+ #define KEYCTL_INSTANTIATE_IOV		20	/* instantiate a partially constructed key */
+ #define KEYCTL_INVALIDATE		21	/* invalidate a key */
++#define KEYCTL_GET_PERSISTENT		22	/* get a user's persistent keyring */
+ 
+ #endif /*  _LINUX_KEYCTL_H */
+diff --git a/kernel/user.c b/kernel/user.c
+index 69b4c3d..6c9e1b9 100644
+--- a/kernel/user.c
++++ b/kernel/user.c
+@@ -53,6 +53,10 @@ struct user_namespace init_user_ns = {
+ 	.proc_inum = PROC_USER_INIT_INO,
+ 	.may_mount_sysfs = true,
+ 	.may_mount_proc = true,
++#ifdef CONFIG_KEYS_KERBEROS_CACHE
++	.krb_cache_register_sem =
++	__RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem),
++#endif
+ };
+ EXPORT_SYMBOL_GPL(init_user_ns);
+ 
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index d8c30db..ef7985e 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -99,6 +99,9 @@ int create_user_ns(struct cred *new)
+ 
+ 	update_mnt_policy(ns);
+ 
++#ifdef CONFIG_PERSISTENT_KEYRINGS
++	rwsem_init(&ns->persistent_keyring_register_sem);
++#endif
+ 	return 0;
+ }
+ 
+@@ -123,6 +126,9 @@ void free_user_ns(struct user_namespace *ns)
+ 
+ 	do {
+ 		parent = ns->parent;
++#ifdef CONFIG_PERSISTENT_KEYRINGS
++		key_put(ns->persistent_keyring_register);
++#endif
+ 		proc_free_inum(ns->proc_inum);
+ 		kmem_cache_free(user_ns_cachep, ns);
+ 		ns = parent;
+diff --git a/security/keys/Kconfig b/security/keys/Kconfig
+index b563622..53d8748 100644
+--- a/security/keys/Kconfig
++++ b/security/keys/Kconfig
+@@ -20,6 +20,23 @@ config KEYS
+ 
+ 	  If you are unsure as to whether this is required, answer N.
+ 
++config PERSISTENT_KEYRINGS
++	bool "Enable register of persistent per-UID keyrings"
++	depends on KEYS
++	help
++	  This option provides a register of persistent per-UID keyrings,
++	  primarily aimed at Kerberos key storage.  The keyrings are persistent
++	  in the sense that they stay around after all processes of that UID
++	  have exited, not that they survive the machine being rebooted.
++
++	  A particular keyring may be accessed by either the user whose keyring
++	  it is or by a process with administrative privileges.  The active
++	  LSMs gets to rule on which admin-level processes get to access the
++	  cache.
++
++	  Keyrings are created and added into the register upon demand and get
++	  removed if they expire (a default timeout is set upon creation).
++
+ config BIG_KEYS
+ 	tristate "Large payload keys"
+ 	depends on KEYS
+diff --git a/security/keys/Makefile b/security/keys/Makefile
+index c487c77..dfb3a7b 100644
+--- a/security/keys/Makefile
++++ b/security/keys/Makefile
+@@ -18,6 +18,7 @@ obj-y := \
+ obj-$(CONFIG_KEYS_COMPAT) += compat.o
+ obj-$(CONFIG_PROC_FS) += proc.o
+ obj-$(CONFIG_SYSCTL) += sysctl.o
++obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
+ 
+ #
+ # Key types
+diff --git a/security/keys/compat.c b/security/keys/compat.c
+index d65fa7f..bbd32c7 100644
+--- a/security/keys/compat.c
++++ b/security/keys/compat.c
+@@ -138,6 +138,9 @@ asmlinkage long compat_sys_keyctl(u32 option,
+ 	case KEYCTL_INVALIDATE:
+ 		return keyctl_invalidate_key(arg2);
+ 
++	case KEYCTL_GET_PERSISTENT:
++		return keyctl_get_persistent(arg2, arg3);
++
+ 	default:
+ 		return -EOPNOTSUPP;
+ 	}
+diff --git a/security/keys/internal.h b/security/keys/internal.h
+index 581c6f6..80b2aac 100644
+--- a/security/keys/internal.h
++++ b/security/keys/internal.h
+@@ -255,6 +255,15 @@ extern long keyctl_invalidate_key(key_serial_t);
+ extern long keyctl_instantiate_key_common(key_serial_t,
+ 					  const struct iovec *,
+ 					  unsigned, size_t, key_serial_t);
++#ifdef CONFIG_PERSISTENT_KEYRINGS
++extern long keyctl_get_persistent(uid_t, key_serial_t);
++extern unsigned persistent_keyring_expiry;
++#else
++static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring)
++{
++	return -EOPNOTSUPP;
++}
++#endif
+ 
+ /*
+  * Debugging key validation
+diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
+index 33cfd27..cee72ce 100644
+--- a/security/keys/keyctl.c
++++ b/security/keys/keyctl.c
+@@ -1667,6 +1667,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
+ 	case KEYCTL_INVALIDATE:
+ 		return keyctl_invalidate_key((key_serial_t) arg2);
+ 
++	case KEYCTL_GET_PERSISTENT:
++		return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3);
++
+ 	default:
+ 		return -EOPNOTSUPP;
+ 	}
+diff --git a/security/keys/persistent.c b/security/keys/persistent.c
+new file mode 100644
+index 0000000..631a022
+--- /dev/null
++++ b/security/keys/persistent.c
+@@ -0,0 +1,169 @@
++/* General persistent per-UID keyrings register
++ *
++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
++ * Written by David Howells (dhowells at redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++
++#include <linux/user_namespace.h>
++#include "internal.h"
++
++unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */
++
++/*
++ * Create the persistent keyring register for the current user namespace.
++ *
++ * Called with the namespace's sem locked for writing.
++ */
++static int key_create_persistent_register(struct user_namespace *ns)
++{
++	struct key *reg = keyring_alloc(".persistent_register",
++					KUIDT_INIT(0), KGIDT_INIT(0),
++					current_cred(),
++					((KEY_POS_ALL & ~KEY_POS_SETATTR) |
++					 KEY_USR_VIEW | KEY_USR_READ),
++					KEY_ALLOC_NOT_IN_QUOTA, NULL);
++	if (IS_ERR(reg))
++		return PTR_ERR(reg);
++
++	ns->persistent_keyring_register = reg;
++	return 0;
++}
++
++/*
++ * Create the persistent keyring for the specified user.
++ *
++ * Called with the namespace's sem locked for writing.
++ */
++static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid,
++				       struct keyring_index_key *index_key)
++{
++	struct key *persistent;
++	key_ref_t reg_ref, persistent_ref;
++
++	if (!ns->persistent_keyring_register) {
++		long err = key_create_persistent_register(ns);
++		if (err < 0)
++			return ERR_PTR(err);
++	} else {
++		reg_ref = make_key_ref(ns->persistent_keyring_register, true);
++		persistent_ref = find_key_to_update(reg_ref, index_key);
++		if (persistent_ref)
++			return persistent_ref;
++	}
++
++	persistent = keyring_alloc(index_key->description,
++				   uid, INVALID_GID, current_cred(),
++				   ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
++				    KEY_USR_VIEW | KEY_USR_READ),
++				   KEY_ALLOC_NOT_IN_QUOTA,
++				   ns->persistent_keyring_register);
++	if (IS_ERR(persistent))
++		return ERR_CAST(persistent);
++
++	return make_key_ref(persistent, true);
++}
++
++/*
++ * Get the persistent keyring for a specific UID and link it to the nominated
++ * keyring.
++ */
++static long key_get_persistent(struct user_namespace *ns, kuid_t uid,
++			       key_ref_t dest_ref)
++{
++	struct keyring_index_key index_key;
++	struct key *persistent;
++	key_ref_t reg_ref, persistent_ref;
++	char buf[32];
++	long ret;
++
++	/* Look in the register if it exists */
++	index_key.type = &key_type_keyring;
++	index_key.description = buf;
++	index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid));
++
++	if (ns->persistent_keyring_register) {
++		reg_ref = make_key_ref(ns->persistent_keyring_register, true);
++		down_read(&ns->persistent_keyring_register_sem);
++		persistent_ref = find_key_to_update(reg_ref, &index_key);
++		up_read(&ns->persistent_keyring_register_sem);
++
++		if (persistent_ref)
++			goto found;
++	}
++
++	/* It wasn't in the register, so we'll need to create it.  We might
++	 * also need to create the register.
++	 */
++	down_write(&ns->persistent_keyring_register_sem);
++	persistent_ref = key_create_persistent(ns, uid, &index_key);
++	up_write(&ns->persistent_keyring_register_sem);
++	if (!IS_ERR(persistent_ref))
++		goto found;
++
++	return PTR_ERR(persistent_ref);
++
++found:
++	ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK);
++	if (ret == 0) {
++		persistent = key_ref_to_ptr(persistent_ref);
++		ret = key_link(key_ref_to_ptr(dest_ref), persistent);
++		if (ret == 0) {
++			key_set_timeout(persistent, persistent_keyring_expiry);
++			ret = persistent->serial;		
++		}
++	}
++
++	key_ref_put(persistent_ref);
++	return ret;
++}
++
++/*
++ * Get the persistent keyring for a specific UID and link it to the nominated
++ * keyring.
++ */
++long keyctl_get_persistent(uid_t _uid, key_serial_t destid)
++{
++	struct user_namespace *ns = current_user_ns();
++	key_ref_t dest_ref;
++	kuid_t uid;
++	long ret;
++
++	/* -1 indicates the current user */
++	if (_uid == (uid_t)-1) {
++		uid = current_uid();
++	} else {
++		uid = make_kuid(ns, _uid);
++		if (!uid_valid(uid))
++			return -EINVAL;
++
++		/* You can only see your own persistent cache if you're not
++		 * sufficiently privileged.
++		 */
++		if (uid != current_uid() &&
++		    uid != current_suid() &&
++		    uid != current_euid() &&
++		    uid != current_fsuid() &&
++		    !ns_capable(ns, CAP_SETUID))
++			return -EPERM;
++	}
++
++	/* There must be a destination keyring */
++	dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE);
++	if (IS_ERR(dest_ref))
++		return PTR_ERR(dest_ref);
++	if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) {
++		ret = -ENOTDIR;
++		goto out_put_dest;
++	}
++
++	ret = key_get_persistent(ns, uid, dest_ref);
++
++out_put_dest:
++	key_ref_put(dest_ref);
++	return ret;
++}
+diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c
+index ee32d18..8c0af08 100644
+--- a/security/keys/sysctl.c
++++ b/security/keys/sysctl.c
+@@ -61,5 +61,16 @@ ctl_table key_sysctls[] = {
+ 		.extra1 = (void *) &zero,
+ 		.extra2 = (void *) &max,
+ 	},
++#ifdef CONFIG_PERSISTENT_KEYRINGS
++	{
++		.procname = "persistent_keyring_expiry",
++		.data = &persistent_keyring_expiry,
++		.maxlen = sizeof(unsigned),
++		.mode = 0644,
++		.proc_handler = proc_dointvec_minmax,
++		.extra1 = (void *) &zero,
++		.extra2 = (void *) &max,
++	},
++#endif
+ 	{ }
+ };
+-- 
+1.8.3.1
+


More information about the scm-commits mailing list