Joel Stanley | b550374 | 2020-03-13 16:15:01 +1030 | [diff] [blame^] | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| 2 | From: Wen Xiong <wenxiong@linux.vnet.ibm.com> |
| 3 | Date: Fri, 6 Mar 2020 09:57:28 -0600 |
| 4 | Subject: [PATCH 2/3] scsi: ipr: Fix softlockup when rescanning devices in |
| 5 | petitboot |
| 6 | |
| 7 | When trying to rescan disks in petitboot shell, we hit the following |
| 8 | softlockup stacktrace: |
| 9 | |
| 10 | Kernel panic - not syncing: System is deadlocked on memory |
| 11 | [ 241.223394] CPU: 32 PID: 693 Comm: sh Not tainted 5.4.16-openpower1 #1 |
| 12 | [ 241.223406] Call Trace: |
| 13 | [ 241.223415] [c0000003f07c3180] [c000000000493fc4] dump_stack+0xa4/0xd8 (unreliable) |
| 14 | [ 241.223432] [c0000003f07c31c0] [c00000000007d4ac] panic+0x148/0x3cc |
| 15 | [ 241.223446] [c0000003f07c3260] [c000000000114b10] out_of_memory+0x468/0x4c4 |
| 16 | [ 241.223461] [c0000003f07c3300] [c0000000001472b0] __alloc_pages_slowpath+0x594/0x6d8 |
| 17 | [ 241.223476] [c0000003f07c3420] [c00000000014757c] __alloc_pages_nodemask+0x188/0x1a4 |
| 18 | [ 241.223492] [c0000003f07c34a0] [c000000000153e10] alloc_pages_current+0xcc/0xd8 |
| 19 | [ 241.223508] [c0000003f07c34e0] [c0000000001577ac] alloc_slab_page+0x30/0x98 |
| 20 | [ 241.223524] [c0000003f07c3520] [c0000000001597fc] new_slab+0x138/0x40c |
| 21 | [ 241.223538] [c0000003f07c35f0] [c00000000015b204] ___slab_alloc+0x1e4/0x404 |
| 22 | [ 241.223552] [c0000003f07c36c0] [c00000000015b450] __slab_alloc+0x2c/0x48 |
| 23 | [ 241.223566] [c0000003f07c36f0] [c00000000015b754] kmem_cache_alloc_node+0x9c/0x1b4 |
| 24 | [ 241.223582] [c0000003f07c3760] [c000000000218c48] blk_alloc_queue_node+0x34/0x270 |
| 25 | [ 241.223599] [c0000003f07c37b0] [c000000000226574] blk_mq_init_queue+0x2c/0x78 |
| 26 | [ 241.223615] [c0000003f07c37e0] [c0000000002ff710] scsi_mq_alloc_queue+0x28/0x70 |
| 27 | [ 241.223631] [c0000003f07c3810] [c0000000003005b8] scsi_alloc_sdev+0x184/0x264 |
| 28 | [ 241.223647] [c0000003f07c38a0] [c000000000300ba0] scsi_probe_and_add_lun+0x288/0xa3c |
| 29 | [ 241.223663] [c0000003f07c3a00] [c000000000301768] __scsi_scan_target+0xcc/0x478 |
| 30 | [ 241.223679] [c0000003f07c3b20] [c000000000301c64] scsi_scan_channel.part.9+0x74/0x7c |
| 31 | [ 241.223696] [c0000003f07c3b70] [c000000000301df4] scsi_scan_host_selected+0xe0/0x158 |
| 32 | [ 241.223712] [c0000003f07c3bd0] [c000000000303f04] store_scan+0x104/0x114 |
| 33 | [ 241.223727] [c0000003f07c3cb0] [c0000000002d5ac4] dev_attr_store+0x30/0x4c |
| 34 | [ 241.223741] [c0000003f07c3cd0] [c0000000001dbc34] sysfs_kf_write+0x64/0x78 |
| 35 | [ 241.223756] [c0000003f07c3cf0] [c0000000001da858] kernfs_fop_write+0x170/0x1b8 |
| 36 | [ 241.223773] [c0000003f07c3d40] [c0000000001621fc] __vfs_write+0x34/0x60 |
| 37 | [ 241.223787] [c0000003f07c3d60] [c000000000163c2c] vfs_write+0xa8/0xcc |
| 38 | [ 241.223802] [c0000003f07c3db0] [c000000000163df4] ksys_write+0x70/0xbc |
| 39 | [ 241.223816] [c0000003f07c3e20] [c00000000000b40c] system_call+0x5c/0x68 |
| 40 | |
| 41 | As a part of the scan process Linux will allocate and configure a |
| 42 | scsi_device for each target to be scanned. If the device is not present, |
| 43 | then the scsi_device is torn down. As a part of scsi_device teardown a |
| 44 | workqueue item will be scheduled and the lockups we see are because there |
| 45 | are 250k workqueue items to be processed. Accoding to the specification of |
| 46 | SIS-64 sas controller, max_channel should be decreased on SIS-64 adapters |
| 47 | to 4. |
| 48 | |
| 49 | The patch fixes softlockup issue. |
| 50 | |
| 51 | Thanks for Oliver Halloran's help with debugging and explanation! |
| 52 | |
| 53 | Link: https://lore.kernel.org/r/1583510248-23672-1-git-send-email-wenxiong@linux.vnet.ibm.com |
| 54 | Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com> |
| 55 | Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> |
| 56 | Signed-off-by: Joel Stanley <joel@jms.id.au> |
| 57 | --- |
| 58 | drivers/scsi/ipr.c | 3 ++- |
| 59 | drivers/scsi/ipr.h | 1 + |
| 60 | 2 files changed, 3 insertions(+), 1 deletion(-) |
| 61 | |
| 62 | diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c |
| 63 | index 079c04bc448a..7a57b61f0340 100644 |
| 64 | --- a/drivers/scsi/ipr.c |
| 65 | +++ b/drivers/scsi/ipr.c |
| 66 | @@ -9947,6 +9947,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, |
| 67 | ioa_cfg->max_devs_supported = ipr_max_devs; |
| 68 | |
| 69 | if (ioa_cfg->sis64) { |
| 70 | + host->max_channel = IPR_MAX_SIS64_BUSES; |
| 71 | host->max_id = IPR_MAX_SIS64_TARGETS_PER_BUS; |
| 72 | host->max_lun = IPR_MAX_SIS64_LUNS_PER_TARGET; |
| 73 | if (ipr_max_devs > IPR_MAX_SIS64_DEVS) |
| 74 | @@ -9955,6 +9956,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, |
| 75 | + ((sizeof(struct ipr_config_table_entry64) |
| 76 | * ioa_cfg->max_devs_supported))); |
| 77 | } else { |
| 78 | + host->max_channel = IPR_VSET_BUS; |
| 79 | host->max_id = IPR_MAX_NUM_TARGETS_PER_BUS; |
| 80 | host->max_lun = IPR_MAX_NUM_LUNS_PER_TARGET; |
| 81 | if (ipr_max_devs > IPR_MAX_PHYSICAL_DEVS) |
| 82 | @@ -9964,7 +9966,6 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, |
| 83 | * ioa_cfg->max_devs_supported))); |
| 84 | } |
| 85 | |
| 86 | - host->max_channel = IPR_VSET_BUS; |
| 87 | host->unique_id = host->host_no; |
| 88 | host->max_cmd_len = IPR_MAX_CDB_LEN; |
| 89 | host->can_queue = ioa_cfg->max_cmds; |
| 90 | diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h |
| 91 | index a67baeb36d1f..b97aa9ac2ffe 100644 |
| 92 | --- a/drivers/scsi/ipr.h |
| 93 | +++ b/drivers/scsi/ipr.h |
| 94 | @@ -1300,6 +1300,7 @@ struct ipr_resource_entry { |
| 95 | #define IPR_ARRAY_VIRTUAL_BUS 0x1 |
| 96 | #define IPR_VSET_VIRTUAL_BUS 0x2 |
| 97 | #define IPR_IOAFP_VIRTUAL_BUS 0x3 |
| 98 | +#define IPR_MAX_SIS64_BUSES 0x4 |
| 99 | |
| 100 | #define IPR_GET_RES_PHYS_LOC(res) \ |
| 101 | (((res)->bus << 24) | ((res)->target << 8) | (res)->lun) |