Upload
akozy
View
223
Download
0
Embed Size (px)
Citation preview
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
1/103
Advanced Char Driver
Operations
Ted Baker Andy Wang
COP 5641 / CIS 4930
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
2/103
Topics
Managing ioctl command numbers
Block/unblocking a process
Seeking on a device
Access control
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
3/103
ioctl
For operations beyond simple data transfers
Eject the media
Report error information
Change hardware settings
Self destruct
Alternatives
Embedded commands in the data stream Driver-specific file systems
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
4/103
ioctl
User-level interfaceint ioctl(int fd, unsigned long cmd, ...);
...
Variable number of arguments Problematic for the system call interface
In this context, it is meant to pass a single optional argument
Just a way to bypass the type checking
Difficult to audit ioctl calls
E.g., 32-bit vs. 64-bit modes
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
5/103
ioctl
Driver-level interfaceint (*ioctl) (struct inode *inode,
struct file *filp,
unsigned int cmd,unsigned long arg);
cmdis passed from the user unchanged
arg can be an integer or a pointer
Compiler does not type check
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
6/103
Choosing the ioctl Commands
Need a numbering scheme to avoid mistakes
E.g., issuing a command to the wrong device
(changing the baud rate of an audio device)
Check include/asm/ioctl.h andDocumentation/ioctl-decoding.txt
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
7/103
Choosing the ioctl Commands
A command number uses four bitfields
Defined in
< direction, type, number, size>
direction: direction of data transfer_IOC_NONE
_IOC_READ
_IOC_WRITE
_IOC_READ | WRITE
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
8/103
Choosing the ioctl Commands
type (ioctl device type) 8-bit (_IOC_TYPEBITS) magic number
Associated with the device
number 8-bit (_IOC_NRBITS) sequential number
Unique within device
size: size of user data involved
The width is either 13 or 14 bits (_IOC_SIZEBITS)
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
9/103
Choosing the ioctl Commands
Useful macros to create ioctl command
numbers
_IO(type, nr)
_IOR(type, nr, datatype)
_IOW(type, nr, datatype)
_IOWR(type, nr, datatype)
size = sizeof(datatype)
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
10/103
Choosing the ioctl Commands
Useful macros to decode ioctl command
numbers
_IOC_DIR(nr)
_IOC_TYPE(nr)
_IOC_NR(nr)
_IOC_SIZE(nr)
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
11/103
Choosing the ioctl Commands
The scull example
/* Use 'k' as magic number */
#define SCULL_IOC_MAGIC 'k
/* Please use a different 8-bit number in your code */
#define SCULL_IOCRESET _IO(SCULL_IOC_MAGIC, 0)
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
12/103
Choosing the ioctl Commands
The scull example/*
* S means "Set" through a ptr,
* T means "Tell" directly with the argument value
* G means "Get": reply by setting through a pointer* Q means "Query": response is on the return value
* X means "eXchange": switch G and S atomically
* H means "sHift": switch T and Q atomically
*/
#define SCULL_IOCSQUANTUM _IOW(SCULL_IOC_MAGIC, 1, int)
#define SCULL_IOCSQSET _IOW(SCULL_IOC_MAGIC, 2, int)
#define SCULL_IOCTQUANTUM _IO(SCULL_IOC_MAGIC, 3)
#define SCULL_IOCTQSET _IO(SCULL_IOC_MAGIC, 4)
#define SCULL_IOCGQUANTUM _IOR(SCULL_IOC_MAGIC, 5, int)
Set newvalue and
return the
old value
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
13/103
Choosing the ioctl Commands
The scull example
#define SCULL_IOCGQSET _IOR(SCULL_IOC_MAGIC, 6, int)
#define SCULL_IOCQQUANTUM _IO(SCULL_IOC_MAGIC, 7)
#define SCULL_IOCQQSET _IO(SCULL_IOC_MAGIC, 8)#define SCULL_IOCXQUANTUM _IOWR(SCULL_IOC_MAGIC, 9, int)
#define SCULL_IOCXQSET _IOWR(SCULL_IOC_MAGIC,10, int)
#define SCULL_IOCHQUANTUM _IO(SCULL_IOC_MAGIC, 11)
#define SCULL_IOCHQSET _IO(SCULL_IOC_MAGIC, 12)
#define SCULL_IOC_MAXNR 14
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
14/103
The Return Value
When the command number is not supported
Return EINVAL
OrENOTTY (according to the POSIX standard)
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
15/103
The Predefined Commands
Handled by the kernel first
Will not be passed down to device drivers
Three groups
For any file (regular, device, FIFO, socket)
Magic number: T.
For regular files only
Specific to the file system type
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
16/103
Using the ioctl Argument
If it is an integer, just use it directly
If it is a pointer
Need to check for valid user addressint access_ok(int type, const void *addr,
unsigned long size);
type: eitherVERIFY_READ orVERIFY_WRITE
Returns 1 for success, 0 for failure
Driver then results EFAULT to the caller
Defined in
Mostly called by memory-access routines
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
17/103
Using the ioctl Argument
The scull exampleint scull_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg) {
int err = 0, tmp;
int retval = 0;
/* check the magic number and whether the command is defined */
if (_IOC_TYPE(cmd) != SCULL_IOC_MAGIC) {
return -ENOTTY;
}
if (_IOC_NR(cmd) > SCULL_IOC_MAXNR) {
return -ENOTTY;
}
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
18/103
Using the ioctl Argument
The scull example
/* the concept of "read" and "write" is reversed here */
if (_IOC_DIR(cmd) & _IOC_READ) {
err = !access_ok(VERIFY_WRITE, (void __user *) arg,_IOC_SIZE(cmd));
} else if (_IOC_DIR(cmd) & _IOC_WRITE) {
err = !access_ok(VERIFY_READ, (void __user *) arg,
_IOC_SIZE(cmd));
}
if (err) return -EFAULT;
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
19/103
Using the ioctl Argument
Data transfer functions optimized for most
used data sizes (1, 2, 4, and 8 bytes)
If the size mismatches
Cryptic compiler error message: Conversion to non-scalar type requested
Use copy_to_user and copy_from_user
#include put_user(datum, ptr)
Writes to a user-space address
Calls access_ok()
Returns 0 on success, -EFAULT on error
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
20/103
Using the ioctl Argument
__put_user(datum, ptr)
Does not check access_ok()
Can still fail if the user-space memory is not writable
get_user(local, ptr)
Reads from a user-space address
Calls access_ok()
Stores the retrieved value in local
Returns 0 on success, -EFAULT on error
__get_user(local, ptr) Does not check access_ok()
Can still fail if the user-space memory is not readable
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
21/103
Capabilities and Restricted Operations
Limit certain ioctl operations to privileged users
Seefor the full set ofcapabilities
To check a certain capability callint capable(int capability);
In the scull exampleif (!capable(CAP_SYS_ADMIN)) {
return EPERM;} A catch-all capability for many
system administration
operations
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
22/103
The Implementation of the ioctlCommands
A giant switch statement
switch(cmd) {
case SCULL_IOCRESET:
scull_quantum = SCULL_QUANTUM;scull_qset = SCULL_QSET;
break;
case SCULL_IOCSQUANTUM: /* Set: arg points to the value */
if (!capable(CAP_SYS_ADMIN)) {
return -EPERM;
}
retval = __get_user(scull_quantum, (int __user *)arg);
break;
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
23/103
The Implementation of the ioctlCommands
case SCULL_IOCTQUANTUM: /* Tell: arg is the value */
if (!capable(CAP_SYS_ADMIN)) {
return -EPERM;
}
scull_quantum = arg;break;
case SCULL_IOCGQUANTUM: /* Get: arg is pointer to result */
retval = __put_user(scull_quantum, (int __user *) arg);
break;
case SCULL_IOCQQUANTUM: /* Query: return it (> 0) */
return scull_quantum;
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
24/103
The Implementation of the ioctlCommands
case SCULL_IOCXQUANTUM: /* eXchange: use arg as pointer */
if (!capable(CAP_SYS_ADMIN)) {
return -EPERM;
}
tmp = scull_quantum;retval = __get_user(scull_quantum, (int __user *) arg);
if (retval == 0) {
retval = __put_user(tmp, (int __user *) arg);
}
break;
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
25/103
The Implementation of the ioctlCommands
case SCULL_IOCHQUANTUM: /* sHift: like Tell + Query */
if (!capable(CAP_SYS_ADMIN)) {
return -EPERM;
}
tmp = scull_quantum;scull_quantum = arg;
return tmp;
default: /* redundant, as cmd was checked against MAXNR */
return -ENOTTY;
} /* switch */
return retval;
} /* scull_ioctl */
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
26/103
The Implementation of the ioctlCommands
Six ways to pass and receive arguments from
the user space
Need to know command number
int quantum;
ioctl(fd,SCULL_IOCSQUANTUM, &quantum); /* Set by pointer */
ioctl(fd,SCULL_IOCTQUANTUM, quantum); /* Set by value */
ioctl(fd,SCULL_IOCGQUANTUM, &quantum); /* Get by pointer */
quantum = ioctl(fd,SCULL_IOCQQUANTUM); /* Get by return value */
ioctl(fd,SCULL_IOCXQUANTUM, &quantum); /* Exchange by pointer */
/* Exchange by value */
quantum = ioctl(fd,SCULL_IOCHQUANTUM, quantum);
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
27/103
Device Control Without ioctl
Writing control sequences into the data
stream itself
Example: console escape sequences
Advantages: No need to implement ioctl methods
Disadvantages:
Need to make sure that escape sequences do not
appear in the normal data stream (e.g., cat a binary file)
Need to parse the data stream
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
28/103
Blocking I/O
Needed when no data is available for reads
When the device is not ready to accept data
Output buffer is full
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
29/103
Introduction to Sleeping
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
30/103
Introduction to Sleeping
A process is removed from the schedulers
run queue
Certain rules
Never sleep when running in an atomic context
Multiple steps must be performed without concurrent
accesses
Not while holding a spinlock, seqlock, or RCU lock
Not while disabling interrupts
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
31/103
Introduction to Sleeping
Okay to sleep while holding a semaphore
Other threads waiting for the semaphore will also sleep
Need to keep it short
Make sure that it is not blocking the process that will wake
it up After waking up
Make no assumptions about the state of the system
The resource one is waiting for might be gone again
Must check the wait condition again
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
32/103
Introduction to Sleeping
Wait queue: contains a list of processes
waiting for a specific event
#include
To initialize statically, callDECLARE_WAIT_QUEUE_HEAD(my_queue);
To initialize dynamically, callwait_queue_head_t my_queue;
init_waitqueue_head(&my_queue);
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
33/103
Simple Sleeping
Call variants ofwait_event macroswait_event(queue, condition)
queue = wait queue head
Passed by value
Waits until the boolean condition becomes true Puts into an uninterruptible sleep
Usually is not what you want
wait_event_interruptible(queue, condition)
Can be interrupted by signals
Returns nonzero if sleep was interrupted Your driver should return -ERESTARTSYS
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
34/103
Simple Sleeping
wait_event_timeout(queue, condition, timeout)
Wait for a limited time (in jiffies)
Returns 0 regardless of condition evaluations
wait_event_interruptible_timeout(queue,
condition,timeout)
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
35/103
Simple Sleeping
To wake up, call variants ofwake_up
functionsvoid wake_up(wait_queue_head_t *queue);
Wakes up all processes waiting on the queue
void wake_up_interruptible(wait_queue_head_t *queue);
Wakes up processes that perform an interruptible sleep
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
36/103
Simple Sleeping
Example module: sleepystatic DECLARE_WAIT_QUEUE_HEAD(wq);
static int flag = 0;
ssize_t sleepy_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos) {
printk(KERN_DEBUG "process %i (%s) going to sleep\n",
current->pid, current->comm);
wait_event_interruptible(wq, flag != 0);
flag = 0;
printk(KERN_DEBUG "awoken %i (%s)\n", current->pid,current->comm);
return 0; /* EOF */
}
Multiple
threads can
wake up atthis point
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
37/103
Simple Sleeping
Example module: sleepyssize_t sleepy_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos) {
printk(KERN_DEBUG "process %i (%s) awakening the readers...\n",
current->pid, current->comm);
flag = 1;
wake_up_interruptible(&wq);
return count; /* succeed, to avoid retrial */
}
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
38/103
Blocking and Nonblocking Operations
By default, operations block If no data is available for reads
If no space is available for writes
Non-blocking I/O is indicated by theO_NONBLOCK flag in filp->flags Defined in
Only open, read, andwrite calls are affected
Returns EAGAIN immediately instead of block Applications need to distinguish non-blocking
returns vs. EOFs
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
39/103
A Blocking I/O Example
scullpipe
A read process
Blocks when no data is available
Wakes a blocking write when buffer space becomesavailable
A write process
Blocks when no buffer space is available
Wakes a blocking read process when data arrives
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
40/103
A Blocking I/O Example
scullpipe data structure
struct scull_pipe {
wait_queue_head_t inq, outq; /* read and write queues */
char *buffer, *end; /* begin of buf, end of buf */
int buffersize; /* used in pointer arithmetic */
char *rp, *wp; /* where to read, where to write */
int nreaders, nwriters; /* number of openings for r/w */
struct fasync_struct *async_queue; /* asynchronous readers */
struct semaphore sem; /* mutual exclusion semaphore */
struct cdev cdev; /* Char device structure */};
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
41/103
A Blocking I/O Example
static ssize_t scull_p_read(struct file *filp, char __user *buf,
size_t count, loff_t *f_pos) {
struct scull_pipe *dev = filp->private_data;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
while (dev->rp == dev->wp) { /* nothing to read */
up(&dev->sem); /* release the lock */
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
if (wait_event_interruptible(dev->inq, (dev->rp != dev->wp)))
return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
42/103
A Blocking I/O Example
if (dev->wp > dev->rp)
count = min(count, (size_t)(dev->wp - dev->rp));
else /* the write pointer has wrapped */
count = min(count, (size_t)(dev->end - dev->rp));
if (copy_to_user(buf, dev->rp, count)) {
up (&dev->sem);return -EFAULT;
}
dev->rp += count;
if (dev->rp == dev->end) dev->rp = dev->buffer; /* wrapped */
up (&dev->sem);
/* finally, awake any writers and return */
wake_up_interruptible(&dev->outq);
return count;
}
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
43/103
Advanced Sleeping
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
44/103
Advanced Sleeping
Uses low-level functions to affect a sleep
How a process sleeps
1. Allocate and initialize await_queue_t structure
DEFINE_WAIT(my_wait);
Or
wait_queue_t my_wait;
init_wait(&my_wait);
Queue element
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
45/103
Advanced Sleeping
2. Add to the proper wait queue and mark a processas being asleep TASK_RUNNINGTASK_INTERRUPTIBLE or
TASK_UNINTERRUPTIBLE
Call
void prepare_to_wait(wait_queue_head_t *queue,
wait_queue_t *wait,
int state);
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
46/103
Advanced Sleeping
3. Give up the processor
Double check the sleeping condition before going to
sleep
The wakeup thread might have changed the condition
between steps 1 and 2if (/* sleeping condition */) {
schedule(); /* yield the CPU */
}
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
47/103
Advanced Sleeping
4. Return from sleep
Remove the process from the wait queue ifschedule() was not called
void finish_wait(wait_queue_head_t *queue,wait_queue_t *wait);
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
48/103
Advanced Sleeping
scullpipewrite method
/* How much space is free? */
static int spacefree(struct scull_pipe *dev) {
if (dev->rp == dev->wp)
return dev->buffersize - 1;
return ((dev->rp + dev->buffersize - dev->wp)
% dev->buffersize) - 1;
}
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
49/103
Advanced Sleeping
static ssize_t
scull_p_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos) {
struct scull_pipe *dev = filp->private_data;
int result;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
/* Wait for space for writing */
result = scull_getwritespace(dev, filp);
if (result)
return result; /* scull_getwritespace called up(&dev->sem) */
/* ok, space is there, accept something */
count = min(count, (size_t)spacefree(dev));
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
50/103
Advanced Sleeping
if (dev->wp >= dev->rp)
count = min(count, (size_t)(dev->end - dev->wp));
else /* the write pointer has wrapped, fill up to rp - 1 */
count = min(count, (size_t)(dev->rp - dev->wp - 1));
if (copy_from_user(dev->wp, buf, count)) {
up (&dev->sem); return -EFAULT;}
dev->wp += count;
if (dev->wp == dev->end) dev->wp = dev->buffer; /* wrapped */
up(&dev->sem);
wake_up_interruptible(&dev->inq);
if (dev->async_queue)
kill_fasync(&dev->async_queue, SIGIO, POLL_IN);
return count;
}
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
51/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.
* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: RUNNINGQueue: full
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
52/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.
* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: RUNNING INTERRUPTIBLEQueue: full
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
53/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.
* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: INTERRUPTIBLE /* sleep */Queue: full
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
54/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.
* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: RUNNINGQueue: full
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
55/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.
* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: RUNNING RUNNING
wake
up
Queue: !full
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
56/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.
* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: RUNNING INTERRUPTIBLEQueue: !full
Ad d l
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
57/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.
* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: INTERRUPTIBLE /* no sleep */Queue: !full
Ad d Sl i
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
58/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.
* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: RUNNINGQueue: full
Ad d Sl i
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
59/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: RUNNING INTERRUPTIBLEQueue: full
Ad d Sl i
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
60/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: INTERRUPTIBLE RUNNING
wake
up
Queue: !full
Ad d Sl i
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
61/103
Advanced Sleeping
/* Wait for space for writing; caller must hold device semaphore.* On error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev,
struct file *filp) {
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait);up(&dev->sem);
if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) schedule();
finish_wait(&dev->outq, &wait);
if (signal_pending(current)) return -ERESTARTSYS;
if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
}
return 0;
}
Task state: RUNNING /* do not sleep */Queue: !full
E l i W i
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
62/103
Exclusive Waits
Avoid waking up all processes waiting on a
queue
Wakes up only one process
Callvoid prepare_to_wait_exclusive(wait_queue_heat_t *queue,
wait_queue_t *wait, int state);
Set theWQ_FLAG_EXCLUSIVE flag
Add the queue entry to the end of the wait queuewake_up stops after waking the first process with
the flag set
Th D il f W ki U
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
63/103
The Details of Waking Up
/* wakes up all processes waiting on the queue */void wake_up(wait_queue_head_t *queue);
/* wakes up processes that perform an interruptible sleep */void wake_up_interruptible(wait_queue_head_t *queue);
/* wake up to nr exclusive waiters */void wake_up_nr(wait_queue_head_t *queue, int nr);void wake_up_interruptible_nr(wait_queue_head_t *queue, int nr);
/* wake up all exclusive waiters */void wake_up_all(wait_queue_head_t *queue);void wake_up_interruptible_all(wait_queue_head_t *queue);
/* do not lose the CPU during this call */void wake_up_interruptible_sync(wait_queue_head_t *queue);
A i Hi l
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
64/103
Ancient History: sleep_on
Not safe
Deprecated
T i h ll i D i
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
65/103
Testing the scullpipe Driver
Window 1% cat /dev/scullpipe
Window2%
T ti th ll i D i
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
66/103
Testing the scullpipe Driver
Window 1% cat /dev/scullpipe
Window2% ls aF > /dev/scullpipe
T ti th sc llpipe D i
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
67/103
Testing the scullpipe Driver
Window 1% cat /dev/scullpipe
./
../
file1
file2
Window2% ls aF > /dev/scullpipe
poll d select
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
68/103
poll and select
Nonblocking I/Os often involve the use ofpoll, select, and epoll system calls Allow a process to determine whether it can read
or write one or more open files without blocking
Can block a process until any of a set of filedescriptors becomes available for reading andwriting
select introduced in BSD Linux
poll introduced in System V
epoll added in 2.5.45 for better scaling
poll d select
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
69/103
poll and select
All three calls supported through the poll
methodunsigned int (*poll) (struct file *filp,
poll_table *wait);1. Callpoll_wait on one or more wait queues that could
indicate a change in the poll status
If no file descriptors are available, wait
2. Return a bit mask describing the operations that could
be immediately performed without blocking
poll nd select
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
70/103
poll and select
poll_table defined in
To add a wait queue into thepoll_table,
call
void poll_wait(struct file *,wait_queue_head_t *,
poll_table *);
Bit mask flags defined in
POLLIN
Set if the device can be read without blocking
poll and select
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
71/103
poll and select
POLLOUT Set if the device can be written without blocking
POLLRDNORM
Set if normal data is available for reading
A readable device returns (POLLIN | POLLRDNORM)
POLLWRNORM
Same meaning as POLLOUT
A writable device returns (POLLOUT | POLLWRNORM) POLLPRI
High-priority data can be read without blocking
poll and select
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
72/103
poll and select
POLLHUP Returns when a process reads the end-of-file
POLLERR
An error condition has occurred
POLLRDBAND
Out-of-band data is available for reading
Associated with sockets
POLLWRBAND Data with nonzero priority can be written to the device
poll and select
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
73/103
poll and select
Examplestatic unsigned int scull_p_poll(struct file *filp,
poll_table *wait) {
struct scull_pipe *dev = filp->private_data;
unsigned int mask = 0;
down(&dev->sem);
poll_wait(filp, &dev->inq, wait);
poll_wait(filp, &dev->outq, wait);
if (dev->rp != dev->wp) /* circular buffer not empty */
mask |= POLLIN | POLLRDNORM; /* readable */if (spacefree(dev)) /* circular buffer not full */
mask |= POLLOUT | POLLWRNORM; /* writable */
up(&dev->sem);
return mask;
}
poll and select
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
74/103
poll and select
No end-of-file support
The reader sees an end-of-file when all writers
close the file
Checkdev->nwriters
inread
andpoll
Problem when a reader opens the scullpipe before
the writer
Need blocking within open
Interaction with read and write
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
75/103
Interaction with read and write
Reading from the device
If there is data in the input buffer, return at least
one byte
pollreturns
POLLIN | POLLRDNORM If no data is available
IfO_NONBLOCK is set, return EAGAIN
poll must report the device unreadable until one byte
arrives At the end-of-file, readreturns 0,poll returnsPOLLHUP
Interaction with read and write
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
76/103
Interaction with read and write
Writing to the device
If there is space in the output buffer, accept at
least one byte
pollreports that the devices is writable by returning
POLLOUT | POLLWRNORM
If the output buffer is full,write blocks IfO_NONBLOCK is set,write returns EAGAIN
poll reports that the file is not writable If the device is full,write returns -ENOSPC
Interaction with read and write
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
77/103
Interaction with read and write
In write, never wait for data transmission beforereturning
Or, select may block
To make sure the output buffer is actuallytransmitted, use fsync call
Interaction with read and write
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
78/103
Interaction with read and write
To flush pending output, call fsyncint (*fsync) (struct file *file,struct dentry *dentry, int datasync);
Should return only when the device has beencompletely flushed
datasync:
Used by file systems, ignored by drivers
The Underlying Data Structure
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
79/103
The Underlying Data Structure
The Underlying Data Structure
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
80/103
The Underlying Data Structure
When thepoll call completes,poll_tableis deallocated with all wait queue entries
removed
epoll reduces this overhead of setting up andtearing down the data structure between every I/O
Asynchronous Notification
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
81/103
Asynchronous Notification
Polling
Inefficient for rare events
A solution: asynchronous notification
Application receives a signal whenever databecomes available
Two steps
Specify a process as the owner of the file (so that the
kernel knows whom to notify)
Set the FASYNC flag in the device via fcntl command
Asynchronous Notification
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
82/103
Asynchronous Notification
Example (user space)/* create a signal handler */
signal(SIGIO, &input_handler);
/* set current pid the owner of the stdin */
fcntl(STDIN_FILENO, F_SETOWN, getpid());/* obtain the current file control flags */
oflags = fcntl(STDIN_FILENO, F_GETFL);
/* set the asynchronous flag */
fcntl(STDIN_FILENO, F_SETFL, oflags | FASYNC);
Asynchronous Notification
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
83/103
Asynchronous Notification
Some catches
Not all devices support asynchronous notification
Usually available forsockets and ttys
Need to know which input file to process Still need to usepoll orselect
The Drivers Point of View
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
84/103
The Driver s Point of View
1. When F_SETOWN is invoked, a value isassigned to filp->f_owner
2. When F_SETFL is executed to change the
status ofFASYNC The drivers fasync method is calledstatic int
scull_p_fasync(int fd, struct file *filp, int mode) {
struct scull_pipe *dev = filp->private_data;
return fasync_helper(fd, filp, mode, &dev->async_queue);
}
The Drivers Point of View
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
85/103
The Driver s Point of View
fasync_helper adds or removes processes fromthe asynchronous list
void fasync_helper(int fd, struct file *filp, int mode,
struct fasync_struct **fa);
3. When data arrives, send a SIGNO signal toall processes registered for asynchronous
notification
Near the end ofwrite, notify blocked readersif (dev->async_queue)
kill_fasync(&dev->async_queue, SIGIO, POLL_IN);
Similarly forread, as needed
The Drivers Point of View
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
86/103
The Driver s Point of View
4. When the file is closed, remove the file fromthe list of asynchronous readers in therelease methodscull_p_fasync(-1, filp, 0);
The llseek Implementation
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
87/103
The llseek Implementation
Implements lseek and llseek system calls Modifies filp->f_pos
loff_t scull_llseek(struct file *filp, loff_t off, int whence) {
struct scull_dev *dev = filp->private_data;
loff_t newpos;
switch(whence) {
case 0: /* SEEK_SET */
newpos = off;
break;
case 1: /* SEEK_CUR, relative to the current position */
newpos = filp->f_pos + off;
break;
The llseek Implementation
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
88/103
The llseek Implementation
case 2: /* SEEK_END, relative to the end of the file */newpos = dev->size + off;
break;
default: /* can't happen */
return -EINVAL;
}
if (newpos < 0) return -EINVAL;
filp->f_pos = newpos;
return newpos;
}
The llseek Implementation
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
89/103
The llseek Implementation
Does not make sense for serial ports andkeyboard inputs
Need to inform the kernel via calling
nonseekable_open in the open methodint nonseekable_open(struct inode *inode, struct file *filp);
Replace llseek method with no_llseek(defined inin your
file_operationsstructure
Access Control on a Device File
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
90/103
Access Control on a Device File
Prevents unauthorized users from using thedevice
Sometimes permits only one authorized user
to open the device at a time
Single-Open Devices
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
91/103
Single Open Devices
Example: scullsinglestatic atomic_t scull_s_available = ATOMIC_INIT(1);
static int scull_s_open(struct inode *inode, struct file *filp) {
struct scull_dev *dev = &scull_s_device;
if (!atomic_dec_and_test(&scull_s_available)) {
atomic_inc(&scull_s_available);
return -EBUSY; /* already open */
}
/* then, everything else is the same as before */if ((filp->f_flags & O_ACCMODE) == O_WRONLY) scull_trim(dev);
filp->private_data = dev;
return 0; /* success */
}
Returns true, if the
tested value is 0
Single-Open Devices
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
92/103
Single Open Devices
In the release call, marks the device idle
static int
scull_s_release(struct inode *inode, struct file *filp) {
atomic_inc(&scull_s_available); /* release the device */
return 0;
}
Restricting Access to a Single User (with
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
93/103
g g (
multiple processes) at a Time
Example: sculluid Includes the following in the open callspin_lock(&scull_u_lock);
if (scull_u_count && /* someone is using the device */
(scull_u_owner != current->uid) && /* not the same user */(scull_u_owner != current->euid) && /* not the same effectiveuid (for su) */
!capable(CAP_DAC_OVERRIDE)) { /* not root override */
spin_unlock(&scull_u_lock);
return -EBUSY; /* -EPERM would confuse the user */
}
if (scull_u_count == 0) scull_u_owner = current->uid;
scull_u_count++;
spin_unlock(&scull_u_lock);
Restricting Access to a Single User (with
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
94/103
g g (
Multiple Processes) at a Time
Includes the following in the release call
static int scull_u_release(struct inode *inode,
struct file *filp) {
spin_lock(&scull_u_lock);
scull_u_count--; /* nothing else */spin_unlock(&scull_u_lock);
return 0;
}
Blockingopen as an Alternative to
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
95/103
g pEBUSY (scullwuid)
A user might prefer to wait over getting errors E.g., data communication channel
spin_lock(&scull_w_lock);
while (!scull_w_available()) {
spin_unlock(&scull_w_lock);if (filp->f_flags & O_NONBLOCK) return -EAGAIN;
if (wait_event_interruptible(scull_w_wait,
scull_w_available()))
return -ERESTARTSYS; /* tell the fs layer to handle it */
spin_lock(&scull_w_lock);
}
if (scull_w_count == 0) scull_w_owner = current->uid;
scull_w_count++;
spin_unlock(&scull_w_lock);
Blockingopen as an Alternative to
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
96/103
g pEBUSY (scullwuid)
The release method wakes pendingprocesses
static int scull_w_release(struct inode *inode,
struct file *filp) {
int temp;
spin_lock(&scull_w_lock);
scull_w_count--;
temp = scull_w_count;
spin_unlock(&scull_w_lock);
if (temp == 0)
wake_up_interruptible_sync(&scull_w_wait);
return 0;
}
Blockingopen as an Alternative to
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
97/103
g pEBUSY
Might not be the right semantics forinteractive users
Blocking on cp vs. getting a return value EBUSY
or -EPERM Incompatible policies for the same device
One solution: one device node per policy
Cloning the Device on open
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
98/103
g p
Allows the creation of private, virtual devices E.g., One virtual scull device for each process
with different tty device number
Example: scullpriv
Cloning the Device on open
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
99/103
g p
static int scull_c_open(struct inode *inode, struct file *filp) {struct scull_dev *dev;
dev_t key;
if (!current->signal->tty) {
PDEBUG("Process \"%s\" has no ctl tty\n", current->comm);
return -EINVAL;
}
key = tty_devnum(current->signal->tty);
spin_lock(&scull_c_lock);
dev = scull_c_lookfor_device(key);
spin_unlock(&scull_c_lock);if (!dev) return -ENOMEM;
.../* then, everything else is the same as before */
}
Cloning the Device on open
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
100/103
g p
/* The clone-specific data structure includes a key field */struct scull_listitem {
struct scull_dev device;
dev_t key;
struct list_head list;
};
/* The list of devices, and a lock to protect it */
static LIST_HEAD(scull_c_list);
static spinlock_t scull_c_lock = SPIN_LOCK_UNLOCKED;
Cloning the Device on open
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
101/103
g p
/* Look for a device or create one if missing */static struct scull_dev *scull_c_lookfor_device(dev_t key) {
struct scull_listitem *lptr;
list_for_each_entry(lptr, &scull_c_list, list) {
if (lptr->key == key)
return &(lptr->device);
}
/* not found */
lptr = kmalloc(sizeof(struct scull_listitem), GFP_KERNEL);
if (!lptr) return NULL;
Cloning the Device on open
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
102/103
g p
/* initialize the device */memset(lptr, 0, sizeof(struct scull_listitem));
lptr->key = key;
scull_trim(&(lptr->device)); /* initialize it */
init_MUTEX(&(lptr->device.sem));
/* place it in the list */
list_add(&lptr->list, &scull_c_list);
return &(lptr->device);
}
Whats going on?
7/30/2019 Lecture_ch6 Advanced Char Driver Operations
103/103
g g
scull_c_liststruct list_head {
struct list_head *next;
struct list_head *prev;
};
struct list_head {
struct list_head *next;
struct list_head *prev;
} list;
scull_listitem
struct scull_dev device;
dev_t key;