Kernel panic with dgrp daemon

Hello,

The daemon dgrp give us a kernel panic when we read data from dgrp_net_read (we have 3 kernel panic in two months)

The changelog of Redhat Kernel 2.6.18-x or dgrp driver does not seem to fix this problem.

can you help us ?

Best regard

------ HARDWARE ------

[user@oxxxxxo_app dgrp]$ cat config

Avail

ID Major State Ports

c 250 READY 4
b 251 READY 2
a 252 READY 8
[user@oxxxxxo_app dgrp]$ cat info
version: 1.9-20
register_with_sysfs: 1

rawreadok: 0x00000001 (1)
pollrate: 0x00000014 (20)

mon_debug: 0x0000000000000000 (0)
net_debug: 0x0000000000000000 (0)
tty_debug: 0x0000000000000000 (0)
comm_debug: 0x0000000000000000 (0)
ports_debug: 0x0000000000000000 (0)
[user@oxxxxxo_app dgrp]$ cat nodeinfo

HW HW SW

ID State Version ID Version Description

c READY 0.0 2 3.1 PortServer TS 4 Version 82000747_W
b READY 0.0 2 3.1 PortServer TS 2 Version 82000747_W1
a READY 0.0 2 1.0 ConnectPort TS 8

------ CRASH ------

  KERNEL: /usr/lib/debug/lib/modules/2.6.18-348.el5/vmlinux
DUMPFILE: /root/vmcore_xxx-xxx_02.10.2017  [PARTIAL DUMP]
    CPUS: 4
    DATE: Mon Oct  2 10:08:55 2017
  UPTIME: 25 days, 19:05:33

LOAD AVERAGE: 1.67, 1.35, 1.28
TASKS: 976
NODENAME: oxxxxxo_app
RELEASE: 2.6.18-348.el5
VERSION: #1 SMP Wed Nov 28 21:22:00 EST 2012
MACHINE: x86_64 (2297 Mhz)
MEMORY: 23.7 GB
PANIC: "Oops: 0000 [1] SMP " (check log for details)
PID: 5831
COMMAND: “drpd”
TASK: ffff810558c110c0 [THREAD_INFO: ffff810554778000]
CPU: 0
STATE: TASK_RUNNING (PANIC)

------ LOG ------

Unable to handle kernel NULL pointer dereference at 0000000000000000 RIP:
[] __wake_up_common+0x24/0x68
PGD 5540dc067 PUD 5540dd067 PMD 0
Oops: 0000 [1] SMP
last sysfs file: /devices/pci0000:00/0000:00:00.0/irq
CPU 0
Modules linked in: ppdev dgrp(U) autofs4 sunrpc be2iscsi ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp bnx2i cnic ipv6 xfrm_nalgo crypto_api uio cxgb3i libcxgbi cxgb3 8021q libiscsi_tcp libiscsi2 scsi_transport_iscsi2 scsi_transport_iscsi vmsync(U) vsock(U) vmmemctl(U) acpiphp dm_mirror dm_multipath scsi_dh video backlight sbs power_meter hwmon i2c_ec dell_wmi wmi button battery asus_acpi acpi_memhotplug ac lp floppy sg parport_pc parport i2c_piix4 i2c_core pcspkr tpm_tis tpm vmci(U) serio_raw tpm_bios ide_cd cdrom vmxnet(U) pvscsi(U) vmxnet3(U) dm_raid45 dm_message dm_region_hash dm_log dm_mod dm_mem_cache ata_piix libata shpchp mptsas mptscsih mptbase scsi_transport_sas sd_mod scsi_mod ext3 jbd uhci_hcd ohci_hcd ehci_hcd
Pid: 5831, comm: drpd Tainted: G ---- 2.6.18-348.el5 #1
RIP: 0010:[] [] __wake_up_common+0x24/0x68
RSP: 0018:ffff810554779c88 EFLAGS: 00010096
RAX: 0000000000000292 RBX: ffff81040c79e9f0 RCX: 0000000000000000
RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff81040c79e9f0
RBP: ffff810554779cb8 R08: 0000000000000000 R09: 000000000000003c
R10: ffff810554779b18 R11: 0000000000000300 R12: 0000000000000001
R13: 0000000000000001 R14: ffff81040c79e9f0 R15: 0000000000000000
FS: 00002b86ceb690e0(0000) GS:ffffffff80435000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000000 CR3: 0000000554107000 CR4: 00000000000006a0
Process drpd (pid: 5831, threadinfo ffff810554778000, task ffff810558c110c0)
Stack: 0000000100000000 ffff81040c79e9f0 0000000000000000 0000000000000001
0000000000000292 0000000000000001 ffff810554779cf8 ffffffff8002e4bc
0000000000000008 ffff81040c79e800 ffff8105547b2598 0000000000000000
Call Trace:
[] __wake_up+0x38/0x4f
[] :dgrp:dgrp_net_read+0x155f/0x1b89
[] proc_reg_read+0x7e/0x99
[] vfs_read+0xcb/0x171
[] sys_read+0x45/0x6e
[] tracesys+0xd5/0xdf

Code: 49 8b 18 eb 2a 49 8d 78 e8 45 8b 68 e8 4c 89 f9 8b 55 d0 8b
RIP [] __wake_up_common+0x24/0x68
RSP

------ BACK TRACE ------

PID: 5831 TASK: ffff810558c110c0 CPU: 0 COMMAND: “drpd”
#0 [ffff8105547799e0] crash_kexec at ffffffff800b116d
#1 [ffff810554779aa0] __die at ffffffff80065137
#2 [ffff810554779ae0] do_page_fault at ffffffff8006741e
#3 [ffff810554779bd0] error_exit at ffffffff8005ddf9
[exception RIP: __wake_up_common+36]
RIP: ffffffff8008d755 RSP: ffff810554779c88 RFLAGS: 00010096
RAX: 0000000000000292 RBX: ffff81040c79e9f0 RCX: 0000000000000000
RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff81040c79e9f0
RBP: ffff810554779cb8 R8: 0000000000000000 R9: 000000000000003c
R10: ffff810554779b18 R11: 0000000000000300 R12: 0000000000000001
R13: 0000000000000001 R14: ffff81040c79e9f0 R15: 0000000000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#4 [ffff810554779cc0] __wake_up at ffffffff8002e4bc
#5 [ffff810554779d00] dgrp_net_read at ffffffff8867c4de [dgrp]
#6 [ffff810554779ed0] proc_reg_read at ffffffff8010c05e
#7 [ffff810554779f10] vfs_read at ffffffff8000b72f
#8 [ffff810554779f40] sys_read at ffffffff80011da1
#9 [ffff810554779f80] tracesys at ffffffff8005d29e (via system_call)
RIP: 0000003ce6ac7220 RSP: 00007fffca93c008 RFLAGS: 00000246
RAX: ffffffffffffffda RBX: ffffffff8005d29e RCX: ffffffffffffffff
RDX: 0000000000001f40 RSI: 00007fffca93c010 RDI: 0000000000000003
RBP: 00007fffca93e020 R8: 0000000000000000 R9: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 0000000000000031 R14: 00000000ffffffff R15: 00000000ca93e0d0
ORIG_RAX: 0000000000000000 CS: 0033 SS: 002b

------ DISASSEMBLE dgrp_net_read ------

crash> dis -l ffffffff8867c4de
0xffffffff8867c4de : mov %rbx,%rdi

------ DISASSEMBLE __wake_up ------

crash> dis -l ffffffff8002e4bc
/usr/src/debug/kernel-2.6.18/linux-2.6.18-348.el5.x86_64/kernel/sched.c: 3814
0xffffffff8002e4bc <__wake_up+56>: mov %r14,%rsi

------ DISASSEMBLE __wake_up_common ------

crash> dis -l __wake_up_common+36
/usr/src/debug/kernel-2.6.18/linux-2.6.18-348.el5.x86_64/kernel/sched.c: 3790
0xffffffff8008d755 <__wake_up_common+36>: mov (%r8),%rbx <---- move the content of pointer R8: 0000000000000000 to RBX -> NULL pointer

------ SOURCE CODE sched.c ------

3807 void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
3808 int nr_exclusive, void *key)
3809 {
3810 unsigned long flags;
3811
3812 spin_lock_irqsave(&q->lock, flags);
3813 __wake_up_common(q, mode, nr_exclusive, 0, key);
3814 spin_unlock_irqrestore(&q->lock, flags);
3815 }
3816 EXPORT_SYMBOL(__wake_up);

3785 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
3786 int nr_exclusive, int sync, void *key)
3787 {
3788 struct list_head *tmp, *next;
3789
3790 list_for_each_safe(tmp, next, &q->task_list) {
3791 wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
3792 unsigned flags = curr->flags;
3793
3794 if (curr->func(curr, mode, sync, key) &&
3795 (flags & WQ_FLAG_EXCLUSIVE) && !–nr_exclusive)
3796 break;
3797 }
3798 }