[cpp] int dm_io_async_bvec(unsigned int num_regions, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) struct dm_io_region *where, #else struct io_region *where, #endif int rw, struct bio_vec *bvec, io_notify_fn fn, void *context) { struct dm_io_request iorq; iorq.bi_rw = rw; iorq.mem.type = DM_IO_BVEC; iorq.mem.ptr.bvec = bvec; iorq.notify.fn = fn; iorq.notify.context = context; iorq.client = flashcache_io_client; return dm_io(&iorq, num_regions, where, NULL); } The user must set up an io_region structure to describe the desired location of the I/O. Each io_region indicates a block-device along with the starting sector and size of the region. 但是不同的內核版本io_region 結構體的表示不同。 2.6.26以後的版本用dm_io_region表示如下: [cpp] struct dm_io_region { struct block_device *bdev; sector_t sector; sector_t count; /* If this is zero the region is ignored. */ }; 2.6.26之前的版本用io_region表示如下: [cpp] struct io_region { struct block_device *bdev; sector_t sector; sector_t count; }; 雖然形式不同,但是裡面的內容是一樣的,都含有一個指向block_device的指針,以及區域的起始扇區和區域的大小。 bio_vec結構體如下: [cpp] struct bio_vec { struct page *bv_page;<span style="white-space:pre"> </span>//指向段的頁框中頁描述符的指針 unsigned int bv_len;<span style="white-space:pre"> </span>//段的字節長度 unsigned int bv_offset;<span style="white-space:pre"> </span>//頁框中段數據的偏移量 }; io_notify_fn是一個回調函數指針的類型,其定義如下: [cpp] typedef void (*io_notify_fn)(unsigned long error, void *context); The "error" parameter in this callback,, is a bitset (instead of a simple error value). In the case of an write-I/O to multiple regions, this bitset allows dm-io to indicate success or failure on each individual region. dm_io_request結構體如下:(通過dm_io_request結構來封裝請求的類型,如果設置了dm_io_notify.fn則是異步IO,否則是同步IO。) [cpp] struct dm_io_request { int bi_rw; /* READ|WRITE - not READA */ struct dm_io_memory mem; /* Memory to use for io */ struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ struct dm_io_client *client; /* Client memory handler */ }; 從上面的分析可以看出,dm_io_async_bvec通過io_notify_fn fn來確定是同步操作還是異步操作,通過bio_vec *bvec確定dm_io的服務類型,dm_io有3種服務類型: [cpp] //The first I/O service type takes a list of memory pages as the data buffer for the I/O, along with an offset into the first page. struct page_list { struct page_list *next; struct page *page; }; int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw, struct page_list *pl, unsigned int offset, unsigned long *error_bits); int dm_io_async(unsigned int num_regions, struct io_region *where, int rw, struct page_list *pl, unsigned int offset, io_notify_fn fn, void *context); [cpp] //The second I/O service type takes an array of bio vectors as the data buffer for the I/O. This service can be handy if the caller has a pre-assembled bio, but wants to direct different portions of the bio to different devices. int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw, struct bio_vec *bvec, unsigned long *error_bits); int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw, struct bio_vec *bvec, io_notify_fn fn, void *context); [cpp] //The third I/O service type takes a pointer to a vmalloc'd memory buffer as the data buffer for the I/O. This service can be handy if the caller needs to do I/O to a large region but doesn't want to allocate a large number of individual memory pages. int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw, void *data, unsigned long *error_bits); int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw, void *data, io_notify_fn fn, void *context); dm_io_async_bvec通過dm_io_request封裝請求之後,確定了請求的各種類型,然後由dm_io()函數來完成操作。dm_io函數如下: [cpp] int dm_io(struct dm_io_request *io_req, unsigned num_regions, struct dm_io_region *where, unsigned long *sync_error_bits) { int r; struct dpages dp; r = dp_init(io_req, &dp); if (r) return r; if (!io_req->notify.fn) return sync_io(io_req->client, num_regions, where, io_req->bi_rw, &dp, sync_error_bits); return async_io(io_req->client, num_regions, where, io_req->bi_rw, &dp, io_req->notify.fn, io_req->notify.context); } dpages結構體如下: [cpp] struct dpages { void (*get_page)(struct dpages *dp, struct page **p, unsigned long *len, unsigned *offset);//是一種函數指針的形式,根據參數獲取某一區域,區域類型由上下文確定 void (*next_page)(struct dpages *dp);//同樣是函數指針的形式,根據參數獲取某一區域的下一區域 unsigned context_u; void *context_ptr;//私有數據成員,可根據上下文確定其類型,也即上面的區域類型 }; dpages結構體provide an abstraction for getting a new destination page for io. dp_init()函數如下: [cpp] static int dp_init(struct dm_io_request *io_req, struct dpages *dp) { /* Set up dpages based on memory type */ switch (io_req->mem.type) { case DM_IO_PAGE_LIST: list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); break; case DM_IO_BVEC:<span style="white-space:pre"> </span>//可以看到我們的io_req->mem.type是此種類型 bvec_dp_init(dp, io_req->mem.ptr.bvec); break; case DM_IO_VMA: vm_dp_init(dp, io_req->mem.ptr.vma); break; case DM_IO_KMEM: km_dp_init(dp, io_req->mem.ptr.addr); break; default: return -EINVAL; } return 0; } bvce_dp_init()函數如下: [cpp] static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec) { dp->get_page = bvec_get_page;//獲取某一個bio_vec dp->next_page = bvec_next_page;//獲取下一個bio_vec dp->context_ptr = bvec;//確定區域類型為bio_vec } bvec_get_page()函數如下: [cpp] static void bvec_get_page(struct dpages *dp, struct page **p, unsigned long *len, unsigned *offset) { struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; *p = bvec->bv_page; *len = bvec->bv_len; *offset = bvec->bv_offset; } bvec_next_page()函數如下: [cpp] static void bvec_next_page(struct dpages *dp) { struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; dp->context_ptr = bvec + 1; } 處理完dm_io的服務類型之後,然後根據io_req->notify.fn是否設置,來確定dm_io的操作類型是同步的還是異步的。 同步操作調用sync_io;異步操作調用async_io。 sync_io()函數如下: [cpp] static int sync_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, int rw, struct dpages *dp, unsigned long *error_bits) { struct io io;//將dm_io_request請求進一步封裝 if (num_regions > 1 && (rw & RW_MASK) != WRITE) {//dm_io不能讀多個io_region WARN_ON(1); return -EIO; } retry: io.error_bits = 0; io.eopnotsupp_bits = 0; atomic_set(&io.count, 1); /* see dispatch_io() */ io.sleeper = current; io.client = client; dispatch_io(rw, num_regions, where, dp, &io, 1); while (1) { set_current_state(TASK_UNINTERRUPTIBLE); if (!atomic_read(&io.count)) break; io_schedule(); } set_current_state(TASK_RUNNING); if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { rw &= ~(1 << BIO_RW_BARRIER); goto retry; } if (error_bits) *error_bits = io.error_bits; return io.error_bits ? -EIO : 0; } async_io()函數如下: [cpp] static int async_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, int rw, struct dpages *dp, io_notify_fn fn, void *context) { struct io *io;//之所以加入struct io,是為了將上面的dm_io_request重新封裝,加入線程,以便io分發和處理 if (num_regions > 1 && (rw & RW_MASK) != WRITE) {//Dm-io can read from one io_region or write to one or more io_regions. Writes to multiple regions are specified by an array of io_region structures,dm_io不能讀多個io_region WARN_ON(1); fn(1, context); return -EIO; } io = mempool_alloc(client->pool, GFP_NOIO); io->error_bits = 0; io->eopnotsupp_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ io->sleeper = NULL; io->client = client; io->callback = fn; io->context = context; dispatch_io(rw, num_regions, where, dp, io, 0); return 0; }