[systemtap] fix systemtap PR14348: race condition in task_work_add() vs. buildid.exp test case

fche fche at fedoraproject.org
Wed Jul 11 21:38:04 UTC 2012


commit 806f00ccc34c0811da4c65ffbfde781a893ad1af
Author: Frank Ch. Eigler <fche at redhat.com>
Date:   Wed Jul 11 17:37:25 2012 -0400

    fix systemtap PR14348: race condition in task_work_add() vs. buildid.exp test case

 PR14348.patch  |  412 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 systemtap.spec |    7 +-
 2 files changed, 418 insertions(+), 1 deletions(-)
---
diff --git a/PR14348.patch b/PR14348.patch
new file mode 100644
index 0000000..e1015a9
--- /dev/null
+++ b/PR14348.patch
@@ -0,0 +1,412 @@
+commit 50bd2c22a62981ae8b7b379d48574671228f1446
+Author: David Smith <dsmith at redhat.com>
+Date:   Mon Jul 9 16:07:22 2012 -0500
+
+    Add code to keep track of task_work callbacks to avoid a possible crash.
+    
+    * runtime/stp_task_work.c: New file.
+    * runtime/stp_utrace.c (utrace_init): Move STAPCONF_TASK_WORK_ADD_EXPORTED
+      code to stp_task_work_init().
+      (utrace_exit): Call stp_task_work_exit().
+      (utrace_cleanup): Call task_work_cancel() wrapper function.
+      (utrace_free): Ditto.
+      (utrace_do_stop): Call task_work_add() wrapper function.
+      (utrace_control): Ditto.
+      (finish_report): Ditto.
+      (utrace_resume): Call stp_task_work_func_done().
+    * runtime/task_finder2.c (__stp_tf_cancel_task_work): Call
+      task_work_cancel() wrapper function.
+      (__stp_tf_quiesce_worker): Call stp_task_work_func_done().
+      (__stp_utrace_task_finder_target_quiesce): Call task_work_add() wrapper
+      function.
+      (__stp_tf_mmap_worker): Call stp_task_work_func_done().
+      (__stp_utrace_task_finder_target_syscall_exit): Call task_work_add() wrapper
+      function.
+
+diff --git a/runtime/task_finder2.c b/runtime/task_finder2.c
+index 19cb99e..81cb04e 100644
+--- a/runtime/task_finder2.c
++++ b/runtime/task_finder2.c
+@@ -9,7 +9,6 @@
+ #ifndef STAPCONF_TASK_UID
+ #include <linux/cred.h>
+ #endif
+-#include <linux/task_work.h>
+ #include "syscall.h"
+ #include "task_finder_map.c"
+ 
+@@ -170,7 +169,7 @@ static void __stp_tf_cancel_task_work(void)
+ 	list_for_each_entry(node, &__stp_tf_task_work_list, list) {
+ 	    // Remove the item from the list, cancel it, then free it.
+ 	    list_del(&node->list);
+-	    task_work_cancel(node->task, node->work.func);
++	    stp_task_work_cancel(node->task, node->work.func);
+ 	    _stp_kfree(node);
+ 	}
+ 	spin_unlock_irqrestore(&__stp_tf_task_work_list_lock, flags);
+@@ -1210,8 +1209,11 @@ __stp_tf_quiesce_worker(struct task_work *work)
+ 
+ 	might_sleep();
+ 	__stp_tf_free_task_work(work);
+-	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING)
++	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
++		/* Remember that this task_work_func is finished. */
++		stp_task_work_func_done();
+ 		return;
++	}
+ 
+ 	__stp_tf_handler_start();
+ 
+@@ -1227,6 +1229,9 @@ __stp_tf_quiesce_worker(struct task_work *work)
+ 	}
+ 
+ 	__stp_tf_handler_end();
++
++	/* Remember that this task_work_func is finished. */
++	stp_task_work_func_done();
+ 	return;
+ }
+ 
+@@ -1288,12 +1293,13 @@ __stp_utrace_task_finder_target_quiesce(u32 action,
+ 			return UTRACE_RESUME;
+ 		}
+ 		init_task_work(work, &__stp_tf_quiesce_worker, tgt);
+-		rc = task_work_add(tsk, work, true);
+-		/* task_work_add() returns -ESRCH if the task has
++
++		rc = stp_task_work_add(tsk, work);
++		/* stp_task_work_add() returns -ESRCH if the task has
+ 		 * already passed exit_task_work(). Just ignore this
+ 		 * error. */
+ 		if (rc != 0 && rc != -ESRCH) {
+-			printk(KERN_ERR "%s:%d - task_work_add() returned %d\n",
++			printk(KERN_ERR "%s:%d - stp_task_work_add() returned %d\n",
+ 			       __FUNCTION__, __LINE__, rc);
+ 		}
+ 	}
+@@ -1397,13 +1403,19 @@ __stp_tf_mmap_worker(struct task_work *work)
+ 
+ 	might_sleep();
+ 	__stp_tf_free_task_work(work);
+-	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING)
++	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
++		/* Remember that this task_work_func is finished. */
++		stp_task_work_func_done();
+ 		return;
++	}
+ 
+ 	// See if we can find saved syscall info.
+ 	entry = __stp_tf_get_map_entry(current);
+-	if (entry == NULL)
++	if (entry == NULL) {
++		/* Remember that this task_work_func is finished. */
++		stp_task_work_func_done();
+ 		return;
++	}
+ 
+ 	__stp_tf_handler_start();
+ 
+@@ -1426,6 +1438,9 @@ __stp_tf_mmap_worker(struct task_work *work)
+ 	__stp_tf_remove_map_entry(entry);
+ 
+ 	__stp_tf_handler_end();
++
++	/* Remember that this task_work_func is finished. */
++	stp_task_work_func_done();
+ 	return;
+ }
+ 
+@@ -1492,12 +1507,12 @@ __stp_utrace_task_finder_target_syscall_exit(u32 action,
+ 			return UTRACE_RESUME;
+ 		}
+ 		init_task_work(work, &__stp_tf_mmap_worker, tgt);
+-		rc = task_work_add(tsk, work, true);
+-		/* task_work_add() returns -ESRCH if the task has
++		rc = stp_task_work_add(tsk, work);
++		/* stp_task_work_add() returns -ESRCH if the task has
+ 		 * already passed exit_task_work(). Just ignore this
+ 		 * error. */
+ 		if (rc != 0 && rc != -ESRCH) {
+-			printk(KERN_ERR "%s:%d - task_work_add() returned %d\n",
++			printk(KERN_ERR "%s:%d - stp_task_work_add() returned %d\n",
+ 			       __FUNCTION__, __LINE__, rc);
+ 		}
+ 	}
+diff --git a/runtime/stp_task_work.c b/runtime/stp_task_work.c
+new file mode 100644
+index 0000000..b99593e
+--- /dev/null
++++ b/runtime/stp_task_work.c
+@@ -0,0 +1,102 @@
++#ifndef _STP_TASK_WORK_C
++#define _STP_TASK_WORK_C
++
++#include <linux/task_work.h>
++
++#if !defined(STAPCONF_TASK_WORK_ADD_EXPORTED)
++typedef int (*task_work_add_fn)(struct task_struct *task,
++				  struct task_work *twork, bool notify);
++#define task_work_add (* (task_work_add_fn)kallsyms_task_work_add)
++typedef struct task_work *(*task_work_cancel_fn)(struct task_struct *,
++						 task_work_func_t);
++#define task_work_cancel (* (task_work_cancel_fn)kallsyms_task_work_cancel)
++#endif
++
++/* To avoid a crash when a task_work callback gets called after the
++ * module is unloaded, keep track of the number of current callbacks. */
++static atomic_t stp_task_work_callbacks = ATOMIC_INIT(0);
++
++/*
++ * stp_task_work_init() should be called before any other
++ * stp_task_work_* functions are called to do setup.
++ */
++int
++stp_task_work_init(void)
++{
++#if !defined(STAPCONF_TASK_WORK_ADD_EXPORTED)
++	/* The task_work_add()/task_work_cancel() functions aren't
++	 * exported. Look up those function addresses. */
++        kallsyms_task_work_add = (void *)kallsyms_lookup_name("task_work_add");
++        if (kallsyms_task_work_add == NULL) {
++		_stp_error("Can't resolve task_work_add!");
++		return -ENOENT;
++        }
++        kallsyms_task_work_cancel = (void *)kallsyms_lookup_name("task_work_cancel");
++        if (kallsyms_task_work_cancel == NULL) {
++		_stp_error("Can't resolve task_work_cancel!");
++		return -ENOENT;
++        }
++#endif
++	return 0;
++}
++
++/*
++ * stap_task_work_exit() should be called when no more
++ * stp_task_work_* functions will be called (before module exit).
++ *
++ * This function makes sure that all the callbacks are finished before
++ * letting the module unload.  If the module unloads before a callback
++ * is called, the kernel will try to make a function call to an
++ * invalid address.
++ */
++void
++stp_task_work_exit(void)
++{
++	while (atomic_read(&stp_task_work_callbacks))
++		schedule_timeout_uninterruptible(1);
++	return;
++}
++
++/*
++ * Our task_work_add() wrapper that remembers that we've got a pending
++ * callback.
++ */
++int
++stp_task_work_add(struct task_struct *task, struct task_work *twork)
++{
++	int rc;
++
++	rc = task_work_add(task, twork, true);
++	if (rc == 0)
++		atomic_inc(&stp_task_work_callbacks);	
++	return rc;
++}
++
++/*
++ * Our task_work_cancel() wrapper that remembers that a callback has
++ * been cancelled.
++ */
++struct task_work *
++stp_task_work_cancel(struct task_struct *task, task_work_func_t func)
++{
++	struct task_work *twork;
++
++	twork = task_work_cancel(task, func);
++	if (twork != NULL)
++		atomic_dec(&stp_task_work_callbacks);
++	return twork;
++}
++
++/*
++ * stp_task_work_func_done() should be called at the very end of a
++ * task_work callback function so that we can keep up with callback
++ * accounting.
++ */
++void
++stp_task_work_func_done(void)
++{
++	atomic_dec(&stp_task_work_callbacks);
++}
++
++
++#endif /* _STP_TASK_WORK_C */
+diff --git a/runtime/stp_utrace.c b/runtime/stp_utrace.c
+index d4538d9..face3f9 100644
+--- a/runtime/stp_utrace.c
++++ b/runtime/stp_utrace.c
+@@ -25,7 +25,7 @@
+ #include <linux/spinlock.h>
+ #include <trace/events/sched.h>
+ #include <trace/events/syscalls.h>
+-#include <linux/task_work.h>
++#include "stp_task_work.c"
+ 
+ /*
+  * Per-thread structure private to utrace implementation.
+@@ -105,38 +105,13 @@ static void utrace_report_exec(void *cb_data __attribute__ ((unused)),
+ #define __UTRACE_REGISTERED	1
+ static atomic_t utrace_state = ATOMIC_INIT(__UTRACE_UNREGISTERED);
+ 
+-#if !defined(STAPCONF_TASK_WORK_ADD_EXPORTED)
+-typedef int (*task_work_add_fn)(struct task_struct *task,
+-				  struct task_work *twork, bool notify);
+-#define task_work_add (* (task_work_add_fn)kallsyms_task_work_add)
+-typedef struct task_work *(*task_work_cancel_fn)(struct task_struct *,
+-						 task_work_func_t);
+-#define task_work_cancel (* (task_work_cancel_fn)kallsyms_task_work_cancel)
+-#endif
+-
+ int utrace_init(void)
+ {
+ 	int i;
+ 	int rc = -1;
+ 
+-#if !defined(STAPCONF_TASK_WORK_ADD_EXPORTED)
+-	/* The task_work_add()/task_work_cancel() functions aren't
+-	 * exported. Look up those function addresses. */
+-        kallsyms_task_work_add = (void *)kallsyms_lookup_name ("task_work_add");
+-        if (kallsyms_task_work_add == NULL) {
+-		printk(KERN_ERR "%s can't resolve task_work_add!",
+-		       THIS_MODULE->name);
+-		rc = -ENOENT;
++	if (unlikely(stp_task_work_init() != 0))
+ 		goto error;
+-        }
+-        kallsyms_task_work_cancel = (void *)kallsyms_lookup_name ("task_work_cancel");
+-        if (kallsyms_task_work_cancel == NULL) {
+-		printk(KERN_ERR "%s can't resolve task_work_cancel!",
+-		       THIS_MODULE->name);
+-		rc = -ENOENT;
+-		goto error;
+-        }
+-#endif
+ 
+ 	/* initialize the list heads */
+ 	for (i = 0; i < TASK_UTRACE_TABLE_SIZE; i++) {
+@@ -205,6 +180,8 @@ int utrace_exit(void)
+ 		kmem_cache_destroy(utrace_cachep);
+ 	if (utrace_engine_cachep)
+ 		kmem_cache_destroy(utrace_engine_cachep);
++
++	stp_task_work_exit();
+ 	return 0;
+ }
+ 
+@@ -239,7 +216,7 @@ static void utrace_cleanup(struct utrace *utrace)
+ 	}
+ 
+ 	if (utrace->task_work_added) {
+-		if (task_work_cancel(utrace->task, &utrace_resume) == NULL)
++		if (stp_task_work_cancel(utrace->task, &utrace_resume) == NULL)
+ 			printk(KERN_ERR "%s:%d - task_work_cancel() failed? task %p, %d, %s\n",
+ 			       __FUNCTION__, __LINE__, utrace->task,
+ 			       utrace->task->tgid,
+@@ -379,7 +356,7 @@ static void utrace_free(struct utrace *utrace)
+ #endif
+ 
+ 	if (utrace->task_work_added) {
+-		if (task_work_cancel(utrace->task, &utrace_resume) == NULL)
++		if (stp_task_work_cancel(utrace->task, &utrace_resume) == NULL)
+ 			printk(KERN_ERR "%s:%d - task_work_cancel() failed? task %p, %d, %s\n",
+ 			       __FUNCTION__, __LINE__, utrace->task,
+ 			       utrace->task->tgid,
+@@ -898,11 +875,11 @@ static bool utrace_do_stop(struct task_struct *target, struct utrace *utrace)
+ 	} else if (utrace->resume > UTRACE_REPORT) {
+ 		utrace->resume = UTRACE_REPORT;
+ 		if (! utrace->task_work_added) {
+-			int rc = task_work_add(target, &utrace->work, true);
++			int rc = stp_task_work_add(target, &utrace->work);
+ 			if (rc == 0) {
+ 				utrace->task_work_added = 1;
+ 			}
+-			/* task_work_add() returns -ESRCH if the task
++			/* stp_task_work_add() returns -ESRCH if the task
+ 			 * has already passed exit_task_work(). Just
+ 			 * ignore this error. */
+ 			else if (rc != -ESRCH) {
+@@ -1041,16 +1018,16 @@ static void utrace_stop(struct task_struct *task, struct utrace *utrace,
+ 		 */
+ 		utrace->resume = action;
+ 		if (! utrace->task_work_added) {
+-			int rc = task_work_add(task, &utrace->work, true);
++			int rc = stp_task_work_add(task, &utrace->work);
+ 			if (rc == 0) {
+ 				utrace->task_work_added = 1;
+ 			}
+-			/* task_work_add() returns -ESRCH if the task
++			/* stp_task_work_add() returns -ESRCH if the task
+ 			 * has already passed exit_task_work(). Just
+ 			 * ignore this error. */
+ 			else if (rc != -ESRCH) {
+ 				printk(KERN_ERR
+-				       "%s:%d - task_work_add() returned %d\n",
++				       "%s:%d - stp_task_work_add() returned %d\n",
+ 				       __FUNCTION__, __LINE__, rc);
+ 			}
+ 		}
+@@ -1397,18 +1374,17 @@ int utrace_control(struct task_struct *target,
+ 		if (action < utrace->resume) {
+ 			utrace->resume = action;
+ 			if (! utrace->task_work_added) {
+-				ret = task_work_add(target, &utrace->work,
+-						    true);
++				ret = stp_task_work_add(target, &utrace->work);
+ 				if (ret == 0) {
+ 					utrace->task_work_added = 1;
+ 				}
+-				/* task_work_add() returns -ESRCH if
++				/* stp_task_work_add() returns -ESRCH if
+ 				 * the task has already passed
+ 				 * exit_task_work(). Just ignore this
+ 				 * error. */
+ 				else if (ret != -ESRCH) {
+ 					printk(KERN_ERR
+-					       "%s:%d - task_work_add() returned %d\n",
++					       "%s:%d - stp_task_work_add() returned %d\n",
+ 					       __FUNCTION__, __LINE__, ret);
+ 				}
+ 			}
+@@ -1560,11 +1536,11 @@ static void finish_report(struct task_struct *task, struct utrace *utrace,
+ 		spin_lock(&utrace->lock);
+ 		utrace->resume = resume;
+ 		if (! utrace->task_work_added) {
+-			int rc = task_work_add(task, &utrace->work, true);
++			int rc = stp_task_work_add(task, &utrace->work);
+ 			if (rc == 0) {
+ 				utrace->task_work_added = 1;
+ 			}
+-			/* task_work_add() returns -ESRCH if the task
++			/* stp_task_work_add() returns -ESRCH if the task
+ 			 * has already passed exit_task_work(). Just
+ 			 * ignore this error. */
+ 			else if (rc != -ESRCH) {
+@@ -2132,6 +2108,9 @@ void utrace_resume(struct task_work *work)
+ 		 * call.  (The arch might use TIF_NOTIFY_RESUME for other
+ 		 * purposes as well as calling us.)
+ 		 */
++
++		/* Remember that this task_work_func is finished. */
++		stp_task_work_func_done();
+ 		return;
+ 	case UTRACE_REPORT:
+ 		if (unlikely(!(utrace->utrace_flags & UTRACE_EVENT(QUIESCE))))
+@@ -2160,6 +2139,9 @@ void utrace_resume(struct task_work *work)
+ 	 * effect now (i.e. step or interrupt).
+ 	 */
+ 	finish_resume_report(task, utrace, &report);
++	
++	/* Remember that this task_work_func is finished. */
++	stp_task_work_func_done();
+ }
+ 
+ #endif	/* _STP_UTRACE_C */
diff --git a/systemtap.spec b/systemtap.spec
index 48c7e45..5d8ec44 100644
--- a/systemtap.spec
+++ b/systemtap.spec
@@ -20,7 +20,7 @@
 
 Name: systemtap
 Version: 1.8
-Release: 1%{?dist}
+Release: 3%{?dist}
 # for version, see also configure.ac
 
 
@@ -91,6 +91,7 @@ BuildRequires: /usr/share/publican/Common_Content/%{publican_brand}/defaults.cfg
 %endif
 
 Patch2: bz837641-staprun-no-linux-types.patch
+Patch3: PR14348.patch
 
 # Install requirements
 Requires: systemtap-client = %{version}-%{release}
@@ -253,6 +254,7 @@ cd ..
 
 # bz837641-staprun-no-linux-types.patch
 %patch2 -p1
+%patch3 -p1
 
 %build
 
@@ -584,6 +586,9 @@ exit 0
 # ------------------------------------------------------------------------
 
 %changelog
+* Wed Jul 11 2012 Frank Ch. Eigler <fche at redhat.com> - 1.8-3
+- PR14348 task_work_add race condition fix
+
 * Mon Jul 09 2012 Josh Stone <jistone at redhat.com>
 - bz837641 build fix
 


More information about the scm-commits mailing list