cancel
Showing results for 
Search instead for 
Did you mean: 

HPUX kernel memory corruption

Eytanaim
Occasional Visitor

HPUX kernel memory corruption

Hi,

We suffer from recurring kernel crashes on HPUX system (with different versions).
This is probably is caused by our driver which is installed on these machines during a high throughput scenario.
The crashes type and threads context are diverse and usually from a global (not our code) context . For example:

bad_kern_reference: 0x1f1bfc00.0x40000173746f7359, fault = 0xf

#0  0x8cefa4 in panic+0xb4 ()
#1  0xa1288c in bad_kern_reference+0x64 ()
#2  0xadc5c in vfault+0x254 ()
#3  0x91295c in trap+0x2cc ()
#4  0x909724 in $call_trap+0x28 ()
#5  0xb206c4 in vx_iupdat_cluster+0x10c ()
#6  0xb202a8 in vx_async_iupdat+0xc0 ()
#7  0xb1fa04 in vx_iupdat_local+0x13c ()
#8  0xb1fffc in vx_iupdat+0x54 ()
#9  0xb0e430 in vx_iflush_list+0x1d8 ()
#10 0xb0d7e0 in vx_iflush+0xe0 ()
#11 0xb0d170 in vx_worklist_process+0x150 ()
#12 0xb0cf9c in vx_worklist_thread+0x44 ()
#13 0x7df134 in kthread_daemon_startup+0x24 ()
#14 0x7df110 in kthread_daemon_startup+0 ()
bad_kern_reference: 0x28137400.0x4000006f72793a20, fault = 0xf

#0  0x931fd4 in panic+0xb4 ()
#1  0xaa0b1c in bad_kern_reference+0x64 ()
#2  0xb7238 in vfault+0x210 ()
#3  0x975aec in trap+0x2cc ()
#4  0x96c894 in $call_trap+0x28 ()
#5  0xa47d08 in kmem_arena_fpl_alloc+0x60 ()
#6  0xfb428 in kmem_arena_refill+0x3e8 ()
#7  0xbcfd4 in kmem_arena_varalloc+0x244 ()
#8  0xbdf3b0 in vx_zalloc+0x38 ()
#9  0xbe4c20 in vx_inode_miscalloc+0x28 ()
#10 0xba6c6c in vx_inode_mem_init+0x24 ()
#11 0xba8cc8 in vx_ilist_chunkinit+0x58 ()
#12 0xbaa83c in vx_ilist_chunkalloc+0x2c ()
#13 0xbaace8 in vx_ireuse+0x2f8 ()
#14 0xba7770 in vx_iget+0x1b0 ()
#15 0xb65b2c in vx_dirlook+0x1ac ()
#16 0xbe9e8c in vx_lookup+0x264 ()
#17 0xa299c in lookuppnvp+0x554 ()
#18 0x9f7c8 in lookuppn+0x58 ()
#19 0x89400 in vn_remove+0x88 ()
#20 0x744a8 in unlink+0x28 ()
#21 0xbfbc0 in syscall+0x2d8 ()
#22 0x37510 in $syscallrtn+0 ()
panic: post_hndlr(): Unresolved kernel interruption


#0  0xe000000001f82f80:0 in panic_save_regs_switchstack+0x110
    (0x692, 0xe000000001f7e180, 0x144000206c61019b, 0xe000000100db63e0,
       0xe000000100db64e0, 0x0, 0xe000000100db64a0, 0xe000000100db6420)
#1  0xe000000001f7e1c0:0 in panic
    (0xe000000000516c70, 0x144000206c61015b, 0xc9f, 0xe0000000007e3d80,
       0x144000206c61015b, 0xe00000010205e1f0, 0x0, 0xe00000010205e1e8) at /ux/core/kern/em/svc/shutdown/panic.c:376
#2  0xe0000000007e3d80:0 in $cold_vm_hndlr+0x940
    (0x9fffffff5fea7400, 0x9fffffff5fea75d0, 0x288, 0xe000000157a6e868,
       0xe000000001dd3780, 0x144000206c61001d, 0xe0000000f0000998,
       0x8)
#3  0xe000000001dd3780:0 in bubbleup+0x880
    (0x9fffffff5fea7400, 0x9fffffff5fea73f0, 0x9fffffff5fea9a80,
       0x9fffffff5fea8001, 0x144000206c61001d, 0x9fffffff5fea7400,
       0x9fffffff5fea75d0, 0x288)
#4  0xe000000000c34fe0:0 in kmalloc
    (0xcc0, 0xe00000015616fc80, 0x0, 0x91a, 0xe00000016ae439b0, 0x400000000000203,
       0xe00000015616fcc8, 0xe00000015616fcb0) at /ux/core/kern/common/vm/kmem/vm_arena_iface.c:2609
#5  0xe00000016ae439b0:0 in memory_manager_alloc
    (0xc90, 0x1, 0x0, 0xe00000016ac809b0, 0xdc, 0xf0f0f0f0, 0x817, 0xe00000016ae51810) at memory_manager.c:410
#6  0xe00000016ae51810:0 in _mem_pool_init_chunk
    (0xe000000157a6e830, 0x2, 0xd22, 0xe00000016ae504e0, 0x400000000000255,
       0xe000000157a6e884, 0x0, 0xe000000157a6e880) at mem_pool.c:220
#7  0xe00000016ae504e0:0 in _mem_pool_init_chunk_size
    (0xe00000016ac80dc0, 0xe000000157a6e868, 0x2000, 0xe00000016ac531e0,
       0xe000000157a6e898, 0x50f, 0xe00000016ae51620, 0x40000000000021f) at mem_pool.c:408
#8  0xe00000016ae51620:0 in mem_pool_init_reserved
    (0x618, 0x1f00, 0x100, 0x1, 0xe00000016ac531e0, 0x0, 0x512, 0xe00000016ad05c40) at mem_pool.c:270
#9  0xe00000016ad05c40:0 in ku_list_init
    (0xe0000001975a6eb0, 0x1, 0x4b1e, 0xe00000016ad0d7a0, 0x40000000000025b,
       0x9fffffff5fea7810, 0x1f00, 0xe0000001975a6ef8) at ku_list_transport.c:120
#10 0xe00000016ad0d7a0:0 in message_builder_init
    (0x1, 0x1, 0xc, 0x10, 0xc, 0x10, 0xe00000016ac53960, 0x9fffffff5fea7838) at message_builder.c:196
#11 0xe00000016ae11660:0 in _init_modules
    (0x0, 0x512, 0xe00000016ae0fe60, 0x400000000008645, 0x1, 0x9fffffff5fea78b8,
       0x9fffffff5fea7858, 0x9fffffff5fea78b0) at syscall_driver.c:781
#12 0xe00000016ae0fe60:0 in driver_init_modules
    (0xe0000001a679a100, 0x40f, 0xe00000016ae09e70, 0x400000000008607,
       0x0, 0xe00000016af10ca0, 0xe00000016af10ca0, 0xe00000016ae09e50) at syscall_driver.c:1138
#13 0xe00000016ae09e70:0 in _rnt_init_modules_ioctl
    (0xe0000001a679a100, 0x716, 0xe00000016ae01480, 0x400000000008605,
       0xe00000016af090c0, 0x20000000777cc5d0, 0x200000007fffef30,
       0xc00000000000060f) at krapi_ioctl.c:142
#14 0xe00000016ae01480:0 in krapi_ioctl
    (0x8004, 0xe0000001a679a100, 0x0, 0x3, 0x28a, 0xe00000016ae283b0,
       0x400000000008601, 0x0) at krapi_ioctl.c:1923
#15 0xe00000016ae283b0:0 in kragent_ioctl
    (0x612, 0xffffffffc4b0c304, 0xe0000001a679a100, 0x3, 0xe000000000ac7af0,
       0x8004, 0xe0000001a679a100, 0x0) at syscall_device_hpux.c:58
#16 0xe000000000ac7af0:0 in spec_ioctl
    (0xe000000160951608, 0xffffffffc4b0c304, 0xe0000001a679a100,
       0x3, 0x40d, 0xe000000000b568f0, 0x400000000008641, 0xe0000001006abf00) at /ux/core/kern/common/fs/misc/spec_vnops.c:770
#17 0xe000000000b568f0:0 in vno_ioctl
    (0xe0000001640ec980, 0xffffffffc4b0c304, 0xe0000001a679a100,
       0x792, 0x9fffffff5fea7950, 0xe000000000b97330, 0x400000000008743,
       0xe000000160951608) at /ux/core/kern/common/fs/vfs/vfs_io.c:223
#18 0xe000000000b97330:0 in ioctl
    (0xd9f, 0x4b0, 0xe000000000c74090, 0x400000000008787, 0x447eae24,
       0x1, 0xe000000000c741e0, 0x400000000008743) at /ux/core/kern/common/fs/vfs/sys_gen.c:701
#19 0xe000000000c741e0:0 in syscall
    (0x36, 0x9fffffff5fea7c00, 0x0, 0xe0000001205c4480, 0x400000000008707,
       0x86, 0x288, 0x9fffffff5fea8ca2) at /ux/core/kern/common/svc/scall/syscall.c:1739
#20 0xe0000001205c4480 in <unknown_procedure>
    (0x9fffffff5fea7c00, 0x9fffffff5fea7bf0, 0x9fffffff5fea9298,
       0x9fffffff5fea8001, 0x400000000008707, 0x36, 0x9fffffff5fea7c00,
       0x0)

We assume that the only way to explain such crashes (and more), is some memory corruption in the kernel heap space which is caused by our driver.

Can you present us with kernel memory debug tools? Are there any available?

Kernel heap corruption detector?

Some kernel module verifier/sandbox?

Can Kwdb help us somehow?

We couldn't find any useful tips in the net.

 

Thanks.

1 REPLY
Dave Olker
HPE Pro

Re: HPUX kernel memory corruption

Have you opened a case with HPE support?  They should be able to provide tools and dump reading assistance.

Dave