devcoredump.c source code [linux/drivers/base/devcoredump.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright(c) 2014 Intel Mobile Communications GmbH
4	* Copyright(c) 2015 Intel Deutschland GmbH
5	*
6	* Author: Johannes Berg <johannes@sipsolutions.net>
7	*/
8	#include <linux/module.h>
9	#include <linux/device.h>
10	#include <linux/devcoredump.h>
11	#include <linux/list.h>
12	#include <linux/slab.h>
13	#include <linux/fs.h>
14	#include <linux/workqueue.h>
15
16	static struct class devcd_class;
17
18	/ global disable flag, for security purposes /
19	static bool devcd_disabled;
20
21	struct devcd_entry {
22	struct device devcd_dev;
23	void *data;
24	size_t datalen;
25	/*
26	* There are 2 races for which mutex is required.
27	*
28	* The first race is between device creation and userspace writing to
29	* schedule immediately destruction.
30	*
31	* This race is handled by arming the timer before device creation, but
32	* when device creation fails the timer still exists.
33	*
34	* To solve this, hold the mutex during device_add(), and set
35	* init_completed on success before releasing the mutex.
36	*
37	* That way the timer will never fire until device_add() is called,
38	* it will do nothing if init_completed is not set. The timer is also
39	* cancelled in that case.
40	*
41	* The second race involves multiple parallel invocations of devcd_free(),
42	* add a deleted flag so only 1 can call the destructor.
43	*/
44	struct mutex mutex;
45	bool init_completed, deleted;
46	struct module *owner;
47	ssize_t (read)(char* *buffer, loff_t offset, size_t count,
48	void *data, size_t datalen);
49	void (free)(void* *data);
50	/*
51	* If nothing interferes and device_add() was returns success,
52	* del_wk will destroy the device after the timer fires.
53	*
54	* Multiple userspace processes can interfere in the working of the timer:
55	* - Writing to the coredump will reschedule the timer to run immediately,
56	* if still armed.
57	*
58	* This is handled by using "if (cancel_delayed_work()) {
59	* schedule_delayed_work() }", to prevent re-arming after having
60	* been previously fired.
61	* - Writing to /sys/class/devcoredump/disabled will destroy the
62	* coredump synchronously.
63	* This is handled by using disable_delayed_work_sync(), and then
64	* checking if deleted flag is set with &devcd->mutex held.
65	*/
66	struct delayed_work del_wk;
67	struct device *failing_dev;
68	};
69
70	static struct devcd_entry dev_to_devcd(struct* device *dev)
71	{
72	return container_of(dev, struct devcd_entry, devcd_dev);
73	}
74
75	static void devcd_dev_release(struct device *dev)
76	{
77	struct devcd_entry *devcd = dev_to_devcd(dev);
78
79	devcd->free(devcd->data);
80	module_put(module: devcd->owner);
81
82	/*
83	* this seems racy, but I don't see a notifier or such on
84	* a struct device to know when it goes away?
85	*/
86	if (devcd->failing_dev->kobj.sd)
87	sysfs_delete_link(dir: &devcd->failing_dev->kobj, targ: &dev->kobj,
88	name: "devcoredump");
89
90	put_device(dev: devcd->failing_dev);
91	kfree(objp: devcd);
92	}
93
94	static void __devcd_del(struct devcd_entry *devcd)
95	{
96	devcd->deleted = true;
97	device_del(dev: &devcd->devcd_dev);
98	put_device(dev: &devcd->devcd_dev);
99	}
100
101	static void devcd_del(struct work_struct *wk)
102	{
103	struct devcd_entry *devcd;
104	bool init_completed;
105
106	devcd = container_of(wk, struct devcd_entry, del_wk.work);
107
108	/ devcd->mutex serializes against dev_coredumpm_timeout /
109	mutex_lock(&devcd->mutex);
110	init_completed = devcd->init_completed;
111	mutex_unlock(lock: &devcd->mutex);
112
113	if (init_completed)
114	__devcd_del(devcd);
115	}
116
117	static ssize_t devcd_data_read(struct file filp, struct* kobject *kobj,
118	const struct bin_attribute *bin_attr,
119	char *buffer, loff_t offset, size_t count)
120	{
121	struct device *dev = kobj_to_dev(kobj);
122	struct devcd_entry *devcd = dev_to_devcd(dev);
123
124	return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
125	}
126
127	static ssize_t devcd_data_write(struct file filp, struct* kobject *kobj,
128	const struct bin_attribute *bin_attr,
129	char *buffer, loff_t offset, size_t count)
130	{
131	struct device *dev = kobj_to_dev(kobj);
132	struct devcd_entry *devcd = dev_to_devcd(dev);
133
134	/*
135	* Although it's tempting to use mod_delayed work here,
136	* that will cause a reschedule if the timer already fired.
137	*/
138	if (cancel_delayed_work(dwork: &devcd->del_wk))
139	schedule_delayed_work(dwork: &devcd->del_wk, delay: `0`);
140
141	return count;
142	}
143
144	static const struct bin_attribute devcd_attr_data =
145	__BIN_ATTR(data, `0600`, devcd_data_read, devcd_data_write, `0`);
146
147	static const struct bin_attribute *const devcd_dev_bin_attrs[] = {
148	&devcd_attr_data, NULL,
149	};
150
151	static const struct attribute_group devcd_dev_group = {
152	.bin_attrs = devcd_dev_bin_attrs,
153	};
154
155	static const struct attribute_group *devcd_dev_groups[] = {
156	&devcd_dev_group, NULL,
157	};
158
159	static int devcd_free(struct device dev, void* *data)
160	{
161	struct devcd_entry *devcd = dev_to_devcd(dev);
162
163	/*
164	* To prevent a race with devcd_data_write(), disable work and
165	* complete manually instead.
166	*
167	* We cannot rely on the return value of
168	* disable_delayed_work_sync() here, because it might be in the
169	* middle of a cancel_delayed_work + schedule_delayed_work pair.
170	*
171	* devcd->mutex here guards against multiple parallel invocations
172	* of devcd_free().
173	*/
174	disable_delayed_work_sync(dwork: &devcd->del_wk);
175	mutex_lock(&devcd->mutex);
176	if (!devcd->deleted)
177	__devcd_del(devcd);
178	mutex_unlock(lock: &devcd->mutex);
179	return `0`;
180	}
181
182	static ssize_t disabled_show(const struct class class, const* struct class_attribute *attr,
183	char *buf)
184	{
185	return sysfs_emit(buf, fmt: "%d\n", devcd_disabled);
186	}
187
188	/*
189	*
190	* disabled_store() worker()
191	* class_for_each_device(&devcd_class,
192	* NULL, NULL, devcd_free)
193	* ...
194	* ...
195	* while ((dev = class_dev_iter_next(&iter))
196	* devcd_del()
197	* device_del()
198	* put_device() <- last reference
199	* error = fn(dev, data) devcd_dev_release()
200	* devcd_free(dev, data) kfree(devcd)
201	*
202	*
203	* In the above diagram, it looks like disabled_store() would be racing with parallelly
204	* running devcd_del() and result in memory abort after dropping its last reference with
205	* put_device(). However, this will not happens as fn(dev, data) runs
206	* with its own reference to device via klist_node so it is not its last reference.
207	* so, above situation would not occur.
208	*/
209
210	static ssize_t disabled_store(const struct class class, const* struct class_attribute *attr,
211	const char *buf, size_t count)
212	{
213	long tmp = simple_strtol(buf, NULL, `10`);
214
215	/*
216	* This essentially makes the attribute write-once, since you can't
217	* go back to not having it disabled. This is intentional, it serves
218	* as a system lockdown feature.
219	*/
220	if (tmp != `1`)
221	return -EINVAL;
222
223	devcd_disabled = true;
224
225	class_for_each_device(class: &devcd_class, NULL, NULL, fn: devcd_free);
226
227	return count;
228	}
229	static CLASS_ATTR_RW(disabled);
230
231	static struct attribute *devcd_class_attrs[] = {
232	&class_attr_disabled.attr,
233	NULL,
234	};
235	ATTRIBUTE_GROUPS(devcd_class);
236
237	static struct class devcd_class = {
238	.name = "devcoredump",
239	.dev_release = devcd_dev_release,
240	.dev_groups = devcd_dev_groups,
241	.class_groups = devcd_class_groups,
242	};
243
244	static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count,
245	void *data, size_t datalen)
246	{
247	return memory_read_from_buffer(to: buffer, count, ppos: &offset, from: data, available: datalen);
248	}
249
250	static void devcd_freev(void *data)
251	{
252	vfree(addr: data);
253	}
254
255	/**
256	* dev_coredumpv - create device coredump with vmalloc data
257	* @dev: the struct device for the crashed device
258	* @data: vmalloc data containing the device coredump
259	* @datalen: length of the data
260	* @gfp: allocation flags
261	*
262	* This function takes ownership of the vmalloc'ed data and will free
263	* it when it is no longer used. See dev_coredumpm() for more information.
264	*/
265	void dev_coredumpv(struct device dev, void* *data, size_t datalen,
266	gfp_t gfp)
267	{
268	dev_coredumpm(dev, NULL, data, datalen, gfp, read: devcd_readv, free: devcd_freev);
269	}
270	EXPORT_SYMBOL_GPL(dev_coredumpv);
271
272	static int devcd_match_failing(struct device dev, const* void *failing)
273	{
274	struct devcd_entry *devcd = dev_to_devcd(dev);
275
276	return devcd->failing_dev == failing;
277	}
278
279	/**
280	* devcd_free_sgtable - free all the memory of the given scatterlist table
281	* (i.e. both pages and scatterlist instances)
282	* NOTE: if two tables allocated with devcd_alloc_sgtable and then chained
283	* using the sg_chain function then that function should be called only once
284	* on the chained table
285	* @data: pointer to sg_table to free
286	*/
287	static void devcd_free_sgtable(void *data)
288	{
289	_devcd_free_sgtable(table: data);
290	}
291
292	/**
293	* devcd_read_from_sgtable - copy data from sg_table to a given buffer
294	* and return the number of bytes read
295	* @buffer: the buffer to copy the data to it
296	* @buf_len: the length of the buffer
297	* @data: the scatterlist table to copy from
298	* @offset: start copy from @offset@ bytes from the head of the data
299	* in the given scatterlist
300	* @data_len: the length of the data in the sg_table
301	*
302	* Returns: the number of bytes copied
303	*/
304	static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
305	size_t buf_len, void *data,
306	size_t data_len)
307	{
308	struct scatterlist *table = data;
309
310	if (offset > data_len)
311	return -EINVAL;
312
313	if (offset + buf_len > data_len)
314	buf_len = data_len - offset;
315	return sg_pcopy_to_buffer(sgl: table, nents: sg_nents(sg: table), buf: buffer, buflen: buf_len,
316	skip: offset);
317	}
318
319	/**
320	* dev_coredump_put - remove device coredump
321	* @dev: the struct device for the crashed device
322	*
323	* dev_coredump_put() removes coredump, if exists, for a given device from
324	* the file system and free its associated data otherwise, does nothing.
325	*
326	* It is useful for modules that do not want to keep coredump
327	* available after its unload.
328	*/
329	void dev_coredump_put(struct device *dev)
330	{
331	struct device *existing;
332
333	existing = class_find_device(class: &devcd_class, NULL, data: dev,
334	match: devcd_match_failing);
335	if (existing) {
336	devcd_free(dev: existing, NULL);
337	put_device(dev: existing);
338	}
339	}
340	EXPORT_SYMBOL_GPL(dev_coredump_put);
341
342	/**
343	* dev_coredumpm_timeout - create device coredump with read/free methods with a
344	* custom timeout.
345	* @dev: the struct device for the crashed device
346	* @owner: the module that contains the read/free functions, use %THIS_MODULE
347	* @data: data cookie for the @read/@free functions
348	* @datalen: length of the data
349	* @gfp: allocation flags
350	* @read: function to read from the given buffer
351	* @free: function to free the given buffer
352	* @timeout: time in jiffies to remove coredump
353	*
354	* Creates a new device coredump for the given device. If a previous one hasn't
355	* been read yet, the new coredump is discarded. The data lifetime is determined
356	* by the device coredump framework and when it is no longer needed the @free
357	* function will be called to free the data.
358	*/
359	void dev_coredumpm_timeout(struct device dev, struct* module *owner,
360	void *data, size_t datalen, gfp_t gfp,
361	ssize_t (read)(char* *buffer, loff_t offset,
362	size_t count, void *data,
363	size_t datalen),
364	void (free)(void* *data),
365	unsigned long timeout)
366	{
367	static atomic_t devcd_count = ATOMIC_INIT(`0`);
368	struct devcd_entry *devcd;
369	struct device *existing;
370
371	if (devcd_disabled)
372	goto free;
373
374	existing = class_find_device(class: &devcd_class, NULL, data: dev,
375	match: devcd_match_failing);
376	if (existing) {
377	put_device(dev: existing);
378	goto free;
379	}
380
381	if (!try_module_get(module: owner))
382	goto free;
383
384	devcd = kzalloc(sizeof(*devcd), gfp);
385	if (!devcd)
386	goto put_module;
387
388	devcd->owner = owner;
389	devcd->data = data;
390	devcd->datalen = datalen;
391	devcd->read = read;
392	devcd->free = free;
393	devcd->failing_dev = get_device(dev);
394	devcd->deleted = false;
395
396	mutex_init(&devcd->mutex);
397	device_initialize(dev: &devcd->devcd_dev);
398
399	dev_set_name(dev: &devcd->devcd_dev, name: "devcd%d",
400	atomic_inc_return(v: &devcd_count));
401	devcd->devcd_dev.class = &devcd_class;
402
403	dev_set_uevent_suppress(dev: &devcd->devcd_dev, val: true);
404
405	/ devcd->mutex prevents devcd_del() completing until init finishes /
406	mutex_lock(&devcd->mutex);
407	devcd->init_completed = false;
408	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
409	schedule_delayed_work(dwork: &devcd->del_wk, delay: timeout);
410
411	if (device_add(dev: &devcd->devcd_dev))
412	goto put_device;
413
414	/*
415	* These should normally not fail, but there is no problem
416	* continuing without the links, so just warn instead of
417	* failing.
418	*/
419	if (sysfs_create_link(kobj: &devcd->devcd_dev.kobj, target: &dev->kobj,
420	name: "failing_device") \|\|
421	sysfs_create_link(kobj: &dev->kobj, target: &devcd->devcd_dev.kobj,
422	name: "devcoredump"))
423	dev_warn(dev, "devcoredump create_link failed\n");
424
425	dev_set_uevent_suppress(dev: &devcd->devcd_dev, val: false);
426	kobject_uevent(kobj: &devcd->devcd_dev.kobj, action: KOBJ_ADD);
427
428	/*
429	* Safe to run devcd_del() now that we are done with devcd_dev.
430	* Alternatively we could have taken a ref on devcd_dev before
431	* dropping the lock.
432	*/
433	devcd->init_completed = true;
434	mutex_unlock(lock: &devcd->mutex);
435	return;
436	put_device:
437	mutex_unlock(lock: &devcd->mutex);
438	cancel_delayed_work_sync(dwork: &devcd->del_wk);
439	put_device(dev: &devcd->devcd_dev);
440
441	put_module:
442	module_put(module: owner);
443	free:
444	free(data);
445	}
446	EXPORT_SYMBOL_GPL(dev_coredumpm_timeout);
447
448	/**
449	* dev_coredumpsg - create device coredump that uses scatterlist as data
450	* parameter
451	* @dev: the struct device for the crashed device
452	* @table: the dump data
453	* @datalen: length of the data
454	* @gfp: allocation flags
455	*
456	* Creates a new device coredump for the given device. If a previous one hasn't
457	* been read yet, the new coredump is discarded. The data lifetime is determined
458	* by the device coredump framework and when it is no longer needed
459	* it will free the data.
460	*/
461	void dev_coredumpsg(struct device dev, struct* scatterlist *table,
462	size_t datalen, gfp_t gfp)
463	{
464	dev_coredumpm(dev, NULL, data: table, datalen, gfp, read: devcd_read_from_sgtable,
465	free: devcd_free_sgtable);
466	}
467	EXPORT_SYMBOL_GPL(dev_coredumpsg);
468
469	static int __init devcoredump_init(void)
470	{
471	return class_register(class: &devcd_class);
472	}
473	__initcall(devcoredump_init);
474
475	static void __exit devcoredump_exit(void)
476	{
477	class_for_each_device(class: &devcd_class, NULL, NULL, fn: devcd_free);
478	class_unregister(class: &devcd_class);
479	}
480	__exitcall(devcoredump_exit);
481

source code of linux/drivers/base/devcoredump.c