Description
Etn40ff on #ci20 mentioned cpu stalling after a while.
Traced it to WiFi possibly.
Long file download triggers it reproducibly on 3.16 and 3.18.
iperf triggers it as well.
rootfs is on SD card.
3.18 completely stalls without any error messages.
But 3.16 does give error messages. On ttyS0.
Some race condition/locking issue perhaps.
root@ci20:~#
[ 138.344000] INFO: rcu_sched self-detected stall on CPU { 0} (t=5250 jiffies g=5697 c=5696 q=21)
[ 138.344000] CPU: 0 PID: 46 Comm: kworker/u4:3 Not tainted 3.16.0-00104-g2f7dfd4 #1
[ 138.344000] Workqueue: brcmf_wq brcmf_sdio_dataworker
[ 138.344000] Stack : 00000006 00000000 00000000 805ff35c 00000001 80ddf518 00000002 00000000
8b464a80 80751860 80620000 807e0d88 807e8680 807e000 8121c420 00000000
807e000 805ff35c 8085359c 800439a0 00000000 00000000 8078e608 8b45572c
8b45572c 80021c84 00000000 8003e52c 8075751c 80750000 800707c0 00000000
00000000 00000000 00000000 00000000 00000000 00000000 6d637262 71775f66
...
[ 138.344000] Call Trace:
[ 138.344000] [<8000a028>] show_stack+0x64/0x7c
[ 138.344000] [<806032c0>] dump_stack+0x70/0x9c
[ 138.344000] [<8007e8b8>] rcu_check_callbacks+0x4cc/0x840
[ 138.344000] [<8002e414>] update_process_times+0x48/0x88
[ 138.344000] [<800898a4>] tick_handle_periodic+0x38/0xe4
[ 138.344000] [<8049af28>] jz47xx_tcu_single_channel_irq+0x98/0xcc
[ 138.344000] [<800732cc>] handle_irq_event_percpu+0x64/0x190
[ 138.344000] [<8007344c>] handle_irq_event+0x54/0x98
[ 138.344000] [<80076828>] handle_level_irq+0xdc/0x1a4
[ 138.344000] [<8007292c>] generic_handle_irq+0x38/0x4c
[ 138.344000] [<8007292c>] generic_handle_irq+0x38/0x4c
[ 138.344000] [<800063b0>] do_IRQ+0x18/0x28
[ 138.344000] [<80004390>] ret_from_irq+0x0/0x4
[ 138.344000] [<80026b24>] __do_softirq+0xcc/0x2a8
[ 138.344000] [<80026fbc>] irq_exit+0x78/0x84
[ 138.344000] [<80004390>] ret_from_irq+0x0/0x4
[ 138.344000] [<80499e88>] jz47xx_mmc_request+0x64/0xb0
[ 138.344000] [<8048699c>] __mmc_start_req+0x68/0x94
[ 138.344000] [<80486ed0>] mmc_wait_for_req+0x1c/0x38
[ 138.344000] [<804922e0>] mmc_io_rw_extended+0x32c/0x360
[ 138.344000] [<80493b28>] sdio_io_rw_ext_helper+0x1dc/0x270
[ 138.344000] [<80493e18>] sdio_readsb+0x1c/0x28
[ 138.344000] [<8043ea1c>] brcmf_sdiod_buffrw.isra.10+0x48/0xc8
[ 138.344000] [<8043f9a0>] brcmf_sdiod_recv_pkt+0x54/0x68
[ 138.344000] [<8043d73c>] brcmf_sdio_dataworker+0x1228/0x1d14
[ 138.344000] [<8003bafc>] process_one_work+0x1c0/0x45c
[ 138.344000] [<8003c6c0>] worker_thread+0x15c/0x57c
[ 138.344000] [<800430d4>] kthread+0xd0/0xe8
[ 138.344000] [<800043d8>] ret_from_kernel_thread+0x14/0x1c
[ 138.344000]
[ 154.592000] mmcblk0: error -145 sending status command, retrying
[ 159.712000] mmcblk0: error -145 sending status command, retrying
[ 164.832000] mmcblk0: error -145 sending status command, aborting
[ 164.836000] end_request: I/O error, dev mmcblk0, sector 4463120
[ 164.840000] end_request: I/O error, dev mmcblk0, sector 4463128
[ 164.848000] end_request: I/O error, dev mmcblk0, sector 4463136
[ 164.852000] Aborting journal on device mmcblk0p1-8.
[ 175.072000] mmcblk0: error -145 sending status command, retrying
[ 180.192000] mmcblk0: error -145 sending status command, retrying
[ 185.312000] mmcblk0: error -145 sending status command, aborting
[ 185.316000] end_request: I/O error, dev mmcblk0, sector 8694616
[ 185.320000] end_request: I/O error, dev mmcblk0, sector 8694624
[ 185.328000] end_request: I/O error, dev mmcblk0, sector 8694632
[ 185.332000] end_request: I/O error, dev mmcblk0, sector 8694640
[ 195.552000] mmcblk0: error -145 sending status command, retrying
[ 200.672000] mmcblk0: error -145 sending status command, retrying
[ 201.356000] INFO: rcu_sched self-detected stall on CPU { 0} (t=21003 jiffies g=5697 c=5696 q=84)
[ 201.356000] CPU: 0 PID: 46 Comm: kworker/u4:3 Not tainted 3.16.0-00104-g2f7dfd4 #1
[ 201.356000] Workqueue: brcmf_wq brcmf_sdio_dataworker
[ 201.356000] Stack : 00000006 00000000 00000000 805ff35c 00000001 80ddf518 00000002 00000000
8b464a80 80751860 80620000 807e0d88 807e8680 807e000 8121c420 00000000
807e000 805ff35c 8085359c 800439a0 00000000 00000000 8078e608 8b45572c
8b45572c 80021c84 00000000 8003e52c 8075751c 80750000 800707c0 00000000
00000000 00000000 00000000 00000000 00000000 00000000 6d637262 71775f66
...
[ 201.356000] Call Trace:
[ 201.356000] [<8000a028>] show_stack+0x64/0x7c
[ 201.356000] [<806032c0>] dump_stack+0x70/0x9c
[ 201.356000] [<8007e8b8>] rcu_check_callbacks+0x4cc/0x840
[ 201.356000] [<8002e414>] update_process_times+0x48/0x88
[ 201.356000] [<800898a4>] tick_handle_periodic+0x38/0xe4
[ 201.356000] [<8049af28>] jz47xx_tcu_single_channel_irq+0x98/0xcc
[ 201.356000] [<800732cc>] handle_irq_event_percpu+0x64/0x190
[ 201.356000] [<8007344c>] handle_irq_event+0x54/0x98
[ 201.356000] [<80076828>] handle_level_irq+0xdc/0x1a4
[ 201.356000] [<8007292c>] generic_handle_irq+0x38/0x4c
[ 201.356000] [<8007292c>] generic_handle_irq+0x38/0x4c
[ 201.356000] [<800063b0>] do_IRQ+0x18/0x28
[ 201.356000] [<80004390>] ret_from_irq+0x0/0x4
[ 201.356000] [<80026b24>] __do_softirq+0xcc/0x2a8
[ 201.356000] [<80026fbc>] irq_exit+0x78/0x84
[ 201.356000] [<80004390>] ret_from_irq+0x0/0x4
[ 201.356000] [<80499e88>] jz47xx_mmc_request+0x64/0xb0
[ 201.356000] [<8048699c>] __mmc_start_req+0x68/0x94
[ 201.356000] [<80486ed0>] mmc_wait_for_req+0x1c/0x38
[ 201.356000] [<804922e0>] mmc_io_rw_extended+0x32c/0x360
[ 201.356000] [<80493b28>] sdio_io_rw_ext_helper+0x1dc/0x270
[ 201.356000] [<80493e18>] sdio_readsb+0x1c/0x28
[ 201.356000] [<8043ea1c>] brcmf_sdiod_buffrw.isra.10+0x48/0xc8
[ 201.356000] [<8043f9a0>] brcmf_sdiod_recv_pkt+0x54/0x68
[ 201.356000] [<8043d73c>] brcmf_sdio_dataworker+0x1228/0x1d14
[ 201.356000] [<8003bafc>] process_one_work+0x1c0/0x45c
[ 201.356000] [<8003c6c0>] worker_thread+0x15c/0x57c
[ 201.356000] [<800430d4>] kthread+0xd0/0xe8
[ 201.356000] [<800043d8>] ret_from_kernel_thread+0x14/0x1c
[ 201.356000]
[ 205.792000] mmcblk0: error -145 sending status command, aborting
[ 205.796000] end_request: I/O error, dev mmcblk0, sector 8694744
[ 205.800000] end_request: I/O error, dev mmcblk0, sector 8694752
[ 205.808000] end_request: I/O error, dev mmcblk0, sector 8694760
[ 205.812000] end_request: I/O error, dev mmcblk0, sector 8694768
[ 205.820000] end_request: I/O error, dev mmcblk0, sector 8694776
[ 205.824000] end_request: I/O error, dev mmcblk0, sector 8694784
[ 216.032000] mmcblk0: error -145 sending status command, retrying
[ 221.152000] mmcblk0: error -145 sending status command, retrying
[ 226.272000] mmcblk0: error -145 sending status command, aborting
[ 226.276000] end_request: I/O error, dev mmcblk0, sector 4460544
[ 226.280000] Buffer I/O error on device mmcblk0p1, logical block 557056
[ 226.288000] lost page write due to I/O error on mmcblk0p1
[ 226.292000] JBD2: Error -5 detected when updating journal superblock for mmcblk0p1-8.