system can't finish boot with systemd-28-4.fc16 and kernel-3.0-0.rc2.git0.2.fc16

Jerry James loganjerry at gmail.com
Wed Jun 15 19:22:07 UTC 2011


I updated the machine to get the rc3 kernel, and rebooted.  I got a
hang in the same place.  I'm starting to think the "hung task"
messages that result from waiting long enough deserve a closer look.
Here's the first one:

[   75.420108] INFO: rcu_sched_state detected stalls on CPUs/tasks: {
1} (detected by 0, t=65002 jiffies)
[   75.421092] sending NMI to all CPUs:
[   75.421092] NMI backtrace for cpu 0
[   75.421092] Modules linked in: uinput virtio_blk
[   75.421092]
[   75.421092] Pid: 0, comm: swapper Tainted: G        W
3.0-0.rc3.git0.3.fc16.i686.PAE #1 Bochs Bochs
[   75.421092] EIP: 0060:[<c0420067>] EFLAGS: 00010046 CPU: 0
[   75.421092] EIP is at native_apic_mem_write+0x17/0x19
[   75.421092] EAX: fffb300 EBX: 00000002 ECX: fffff000 EDX: 00000c00
[   75.421092] ESI: 00000c00 EDI: 00000003 EBP: c0a7fdf4 ESP: c0a7fdf4
[   75.421092]  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[   75.421092] Process swapper (pid: 0, ti=c0a7e000 task=c0b14fe0
task.ti=c0a7e000)
[   75.421092] Stack:
[   75.421092]  c0a7fe08 c042010c 00000003 00000006 00000002 c0a7fe28
c04203e0 80da80ee
[   75.421092]  00002710 c0b28980 00002710 c0b28980 c0b28c00 c0a7fe30
c0420488 c0a7fe40
[   75.421092]  c042059f c09b9b5a f5743984 c0a7fe70 c049d809 c09ccd44
00000000 0000fdea
[   75.421092] Call Trace:
[   75.421092]  [<c042010c>] __default_send_IPI_dest_field+0x51/0x56
[   75.421092]  [<c04203e0>] default_send_IPI_mask_logical+0x81/0xab
[   75.421092]  [<c0420488>] default_send_IPI_all+0x26/0x3c
[   75.421092]  [<c042059f>] arch_trigger_all_cpu_backtrace+0x4d/0x7a
[   75.421092]  [<c049d809>] __rcu_pending+0x1c1/0x282
[   75.421092]  [<c049dc58>] rcu_check_callbacks+0x72/0x98
[   75.421092]  [<c044fe0d>] update_process_times+0x32/0x5d
[   75.421092]  [<c046b861>] tick_sched_timer+0x72/0xa7
[   75.421092]  [<c045fa22>] __run_hrtimer+0xa7/0x13d
[   75.421092]  [<c085d67b>] ? _raw_spin_lock+0x61/0x68
[   75.421092]  [<c046b7ef>] ? tick_nohz_handler+0xe8/0xe8
[   75.421092]  [<c04603eb>] hrtimer_interrupt+0xd6/0x1b3
[   75.421092]  [<c046223f>] ? sched_clock_idle_wakeup_event+0x16/0x1b
[   75.421092]  [<c041f877>] smp_apic_timer_interrupt+0x64/0x76
[   75.421092]  [<c06013b4>] ? trace_hardirqs_off_thunk+0xc/0x10
[   75.421092]  [<c085eb32>] apic_timer_interrupt+0x36/0x3c
[   75.421092]  [<c04700e0>] ? __lock_acquire+0xb1a/0xb63
[   75.421092]  [<c0427149>] ? native_safe_halt+0xa/0xc
[   75.421092]  [<c040eeb0>] default_idle+0x63/0xa5
[   75.421092]  [<c0408261>] cpu_idle+0x93/0xad
[   75.421092]  [<c083f28e>] rest_init+0xa6/0xac
[   75.421092]  [<c083f1e8>] ? reciprocal_value+0x38/0x38
[   75.421092]  [<c0b7f7a6>] start_kernel+0x36b/0x371
[   75.421092]  [<c0b7f1c4>] ? loglevel+0x18/0x18
[   75.421092]  [<c0b7f0ba>] i386_start_kernel+0xa9/0xaf
[   75.421092] Code: 89 e5 9c 58 8d 74 26 00 89 c1 fa 90 8d 74 26 00
89 c8 5d c3 55 89 e5 3e 8d 74 26 00 8b 0d 6c b6 b1 c0 8d 84 08 00 c0
ff ff 89 10 <5d> c3 55 89 e5 3e 8d 74 26 00 eb 02 f3 90 a1 6c b6 b1 c0
8b 80
[   75.421092] Call Trace:
[   75.421092]  [<c042010c>] __default_send_IPI_dest_field+0x51/0x56
[   75.421092]  [<c04203e0>] default_send_IPI_mask_logical+0x81/0xab
[   75.421092]  [<c0420488>] default_send_IPI_all+0x26/0x3c
[   75.421092]  [<c042059f>] arch_trigger_all_cpu_backtrace+0x4d/0x7a
[   75.421092]  [<c049d809>] __rcu_pending+0x1c1/0x282
[   75.421092]  [<c049dc58>] rcu_check_callbacks+0x72/0x98
[   75.421092]  [<c044fe0d>] update_process_times+0x32/0x5d
[   75.421092]  [<c046b861>] tick_sched_timer+0x72/0xa7
[   75.421092]  [<c045fa22>] __run_hrtimer+0xa7/0x13d
[   75.421092]  [<c085d67b>] ? _raw_spin_lock+0x61/0x68
[   75.421092]  [<c046b7ef>] ? tick_nohz_handler+0xe8/0xe8
[   75.421092]  [<c04603eb>] hrtimer_interrupt+0xd6/0x1b3
[   75.421092]  [<c046223f>] ? sched_clock_idle_wakeup_event+0x16/0x1b
[   75.421092]  [<c041f877>] smp_apic_timer_interrupt+0x64/0x76
[   75.421092]  [<c06013b4>] ? trace_hardirqs_off_thunk+0xc/0x10
[   75.421092]  [<c085eb32>] apic_timer_interrupt+0x36/0x3c
[   75.421092]  [<c04700e0>] ? __lock_acquire+0xb1a/0xb63
[   75.421092]  [<c0427149>] ? native_safe_halt+0xa/0xc
[   75.421092]  [<c040eeb0>] default_idle+0x63/0xa5
[   75.421092]  [<c0408261>] cpu_idle+0x93/0xad
[   75.421092]  [<c083f28e>] rest_init+0xa6/0xac
[   75.421092]  [<c083f1e8>] ? reciprocal_value+0x38/0x38
[   75.421092]  [<c0b7f7a6>] start_kernel+0x36b/0x371
[   75.421092]  [<c0b7f1c4>] ? loglevel+0x18/0x18
[   75.421092]  [<c0b7f0ba>] i386_start_kernel+0xa9/0xaf
[   10.017070] NMI backtrace for cpu 1
[   10.017070] Modules linked in: uinput virtio_blk
[   10.017070]
[   10.017070] Pid: 13, comm: rcuc1 Tainted: G        W
3.0-0.rc3.git0.3.fc16.i686.PAE #1 Bochs Bochs
[   10.017070] EIP: 0060:[<c042f3ea>] EFLAGS: 00000086 CPU: 1
[   10.017070] EIP is at task_waking_fair+0x1a/0x30
[   10.017070] EAX: f0e254c0 EBX: ffffffff ECX: fff00000 EDX: f43d41e0
[   10.017070] ESI: 00000000 EDI: 00000000 EBP: f4ccbe70 ESP: f4ccbe64
[   10.017070]  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[   10.017070] Process rcuc1 (pid: 13, ti=f4cca000 task=f4cd0000
task.ti=f4cca000)
[   10.017070] Stack:
[   10.017070]  f0e254c0 00000003 00000000 f4ccbe94 c043c5e5 00000001
00000082 f0e258c4
[   10.017070]  00000000 f0f1de8c f0f1dee4 00000003 f4ccbe9c c043c6b1
f4ccbec0 c042f719
[   10.017070]  00000000 f0f1def0 00000001 00000001 f0f1dec8 00000282
f0f1dec4 f4ccbedc
[   10.017070] Call Trace:
[   10.017070]  [<c043c5e5>] try_to_wake_up+0xb5/0x171
[   10.017070]  [<c043c6b1>] default_wake_function+0x10/0x12
[   10.017070]  [<c042f719>] __wake_up_common+0x3b/0x62
[   10.017070]  [<c0430b30>] complete+0x3e/0x4e
[   10.017070]  [<c045a80a>] wakeme_after_rcu+0x10/0x12
[   10.017070]  [<c049d16c>] __rcu_process_callbacks+0x19c/0x276
[   10.017070]  [<c049e015>] rcu_cpu_kthread+0x1e3/0x2f6
[   10.017070]  [<c049c72a>] ? synchronize_sched_expedited_cpu_stop+0x11/0x11
[   10.017070]  [<c049de32>] ? rcu_needs_cpu+0x167/0x167
[   10.017070]  [<c045c801>] kthread+0x72/0x77
[   10.017070]  [<c045c78f>] ? __init_kthread_worker+0x4a/0x4a
[   10.017070]  [<c0864582>] kernel_thread_helper+0x6/0x10
[   10.017070] Code: 8b 58 18 8b 48 14 89 58 20 89 48 1c 83 c4 18 5b
5e 5f 5d c3 55 89 e5 57 56 53 3e 8d 74 26 00 8b 90 58 01 00 00 8b 7a
1c 8b 72 20 <8b> 5a 18 8b 4a 14 39 f3 75 f0 39 f9 75 ec 29 48 64 19 58
68 5b
[   10.017070] Call Trace:
[   10.017070]  [<c043c5e5>] try_to_wake_up+0xb5/0x171
[   10.017070]  [<c043c6b1>] default_wake_function+0x10/0x12
[   10.017070]  [<c042f719>] __wake_up_common+0x3b/0x62
[   10.017070]  [<c0430b30>] complete+0x3e/0x4e
[   10.017070]  [<c045a80a>] wakeme_after_rcu+0x10/0x12
[   10.017070]  [<c049d16c>] __rcu_process_callbacks+0x19c/0x276
[   10.017070]  [<c049e015>] rcu_cpu_kthread+0x1e3/0x2f6
[   10.017070]  [<c049c72a>] ? synchronize_sched_expedited_cpu_stop+0x11/0x11
[   10.017070]  [<c049de32>] ? rcu_needs_cpu+0x167/0x167
[   10.017070]  [<c045c801>] kthread+0x72/0x77
[   10.017070]  [<c045c78f>] ? __init_kthread_worker+0x4a/0x4a
[   10.017070]  [<c0864582>] kernel_thread_helper+0x6/0x10

Hmmm, try_to_wake_up has a big block of CONFIG_SMP code, and I'm
giving my virtual machines 2 CPUs.  Let's try to boot with 1 CPU...

It's still hanging, but not as often.  That suggests some kind of race
condition in the kernel.  On the first run where it did not hang, I
finally got the SELinux relabel I've been trying to get for awhile,
but only a couple of files were relabeled.  A few attempts later, I've
got a second successful boot.  I see some log messages that may or may
not be relevant:
...
[    5.798255] type=1404 audit(1308164633.296:2): enforcing=1
old_enforcing=0 auid=4294967295 ses=4294967295
[    7.757284] type=1403 audit(1308164635.255:3): policy loaded
auid=4294967295 ses=4294967295
...
[    9.351525] systemd[1]: Cannot add dependency job for unit
hwclock-load.service, ignoring: Unit hwclock-load.service failed to
load: No such file or directory. See system logs and 'systemctl
status' for details.
...
Starting Recreate Volatile Files and Directories...
[   14.246628] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.264083] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.267537] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.270785] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.280595] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_run_t:s0 for /run: Permission denied
[   14.284929] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_run_t:s0 for /run: Permission denied
[   14.288056] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_run_t:s0 for /run: Permission denied
[   14.291151] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.294575] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.297865] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.301255] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.304592] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.307561] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.310581] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.313827] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.327361] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.330307] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.333585] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.337475] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.342734] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.346828] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.350382] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_run_t:s0 for /run: Permission denied
[   14.365093] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.368169] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_t:s0 for /var: Permission denied
[   14.375580] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:var_run_t:s0 for /run: Permission denied
[   14.381680] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:tmp_t:s0 for /tmp: Permission denied
[   14.385635] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:tmp_t:s0 for /tmp: Permission denied
[   14.397205] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:tmp_t:s0 for /tmp: Permission denied
[   14.406395] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:tmp_t:s0 for /tmp: Permission denied
[   14.410923] systemd-tmpfiles[612]: Failed to set security context
system_u:object_r:tmp_t:s0 for /tmp: Permission denied
Started Recreate Volatile Files and Directories.
...

When I check the labeling immediately after the boot process finishes,
/run, /tmp/, and /var all have the correct labels.
-- 
Jerry James
http://www.jamezone.org/


More information about the devel mailing list