ia64/xen-unstable

view tools/libfsimage/zfs/fsys_zfs.c @ 19648:f0e2df69a8eb

x86 hvm: Allow cross-vendor migration

Intercept #UD and emulate SYSCALL/SYSENTER/SYSEXIT as necessary.

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 15:01:36 2009 +0100 (2009-05-26)
parents 40e8684a34bf
children
line source
1 /*
2 * GRUB -- GRand Unified Bootloader
3 * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 /*
20 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
21 * Use is subject to license terms.
22 */
24 /*
25 * All files in the zfs directory are derived from the OpenSolaris
26 * zfs grub files. All files in the zfs-include directory were
27 * included without changes.
28 */
30 /*
31 * The zfs plug-in routines for GRUB are:
32 *
33 * zfs_mount() - locates a valid uberblock of the root pool and reads
34 * in its MOS at the memory address MOS.
35 *
36 * zfs_open() - locates a plain file object by following the MOS
37 * and places its dnode at the memory address DNODE.
38 *
39 * zfs_read() - read in the data blocks pointed by the DNODE.
40 *
41 * ZFS_SCRATCH is used as a working area.
42 *
43 * (memory addr) MOS DNODE ZFS_SCRATCH
44 * | | |
45 * +-------V---------V----------V---------------+
46 * memory | | dnode | dnode | scratch |
47 * | | 512B | 512B | area |
48 * +--------------------------------------------+
49 */
51 #include <stdio.h>
52 #include <strings.h>
54 /* From "shared.h" */
55 #include "mb_info.h"
57 /* Boot signature related defines for the findroot command */
58 #define BOOTSIGN_DIR "/boot/grub/bootsign"
59 #define BOOTSIGN_BACKUP "/etc/bootsign"
61 /* Maybe redirect memory requests through grub_scratch_mem. */
62 #define RAW_ADDR(x) (x)
63 #define RAW_SEG(x) (x)
65 /* ZFS will use the top 4 Meg of physical memory (below 4Gig) for sratch */
66 #define ZFS_SCRATCH_SIZE 0x400000
68 #define MIN(x, y) ((x) < (y) ? (x) : (y))
69 /* End from shared.h */
71 #include "fsys_zfs.h"
73 /* cache for a file block of the currently zfs_open()-ed file */
74 #define file_buf zfs_ba->zfs_file_buf
75 #define file_start zfs_ba->zfs_file_start
76 #define file_end zfs_ba->zfs_file_end
78 /* cache for a dnode block */
79 #define dnode_buf zfs_ba->zfs_dnode_buf
80 #define dnode_mdn zfs_ba->zfs_dnode_mdn
81 #define dnode_start zfs_ba->zfs_dnode_start
82 #define dnode_end zfs_ba->zfs_dnode_end
84 #define stackbase zfs_ba->zfs_stackbase
86 decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] =
87 {
88 {"noop", 0},
89 {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */
90 {"off", 0},
91 {"lzjb", lzjb_decompress} /* ZIO_COMPRESS_LZJB */
92 };
94 /* From disk_io.c */
95 /* ZFS root filesystem for booting */
96 #define current_bootpath zfs_ba->zfs_current_bootpath
97 #define current_rootpool zfs_ba->zfs_current_rootpool
98 #define current_bootfs zfs_ba->zfs_current_bootfs
99 #define current_bootfs_obj zfs_ba->zfs_current_bootfs_obj
100 #define is_zfs_mount (*fsig_int1(ffi))
101 /* End from disk_io.c */
103 #define is_zfs_open zfs_ba->zfs_open
105 /*
106 * Our own version of bcmp().
107 */
108 static int
109 zfs_bcmp(const void *s1, const void *s2, size_t n)
110 {
111 const unsigned char *ps1 = s1;
112 const unsigned char *ps2 = s2;
114 if (s1 != s2 && n != 0) {
115 do {
116 if (*ps1++ != *ps2++)
117 return (1);
118 } while (--n != 0);
119 }
121 return (0);
122 }
124 /*
125 * Our own version of log2(). Same thing as highbit()-1.
126 */
127 static int
128 zfs_log2(uint64_t num)
129 {
130 int i = 0;
132 while (num > 1) {
133 i++;
134 num = num >> 1;
135 }
137 return (i);
138 }
140 /* Checksum Functions */
141 static void
142 zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
143 {
144 ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
145 }
147 /* Checksum Table and Values */
148 zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
149 {{NULL, NULL}, 0, 0, "inherit"},
150 {{NULL, NULL}, 0, 0, "on"},
151 {{zio_checksum_off, zio_checksum_off}, 0, 0, "off"},
152 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"},
153 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"},
154 {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, "zilog"},
155 {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"},
156 {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"},
157 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"}
158 };
160 /*
161 * zio_checksum_verify: Provides support for checksum verification.
162 *
163 * Fletcher2, Fletcher4, and SHA256 are supported.
164 *
165 * Return:
166 * -1 = Failure
167 * 0 = Success
168 */
169 static int
170 zio_checksum_verify(blkptr_t *bp, char *data, int size)
171 {
172 zio_cksum_t zc = bp->blk_cksum;
173 uint32_t checksum = BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER :
174 BP_GET_CHECKSUM(bp);
175 int byteswap = BP_SHOULD_BYTESWAP(bp);
176 zio_block_tail_t *zbt = (zio_block_tail_t *)(data + size) - 1;
177 zio_checksum_info_t *ci = &zio_checksum_table[checksum];
178 zio_cksum_t actual_cksum, expected_cksum;
180 /* byteswap is not supported */
181 if (byteswap)
182 return (-1);
184 if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
185 return (-1);
187 if (ci->ci_zbt) {
188 if (checksum == ZIO_CHECKSUM_GANG_HEADER) {
189 /*
190 * 'gang blocks' is not supported.
191 */
192 return (-1);
193 }
195 if (zbt->zbt_magic == BSWAP_64(ZBT_MAGIC)) {
196 /* byte swapping is not supported */
197 return (-1);
198 } else {
199 expected_cksum = zbt->zbt_cksum;
200 zbt->zbt_cksum = zc;
201 ci->ci_func[0](data, size, &actual_cksum);
202 zbt->zbt_cksum = expected_cksum;
203 }
204 zc = expected_cksum;
206 } else {
207 if (BP_IS_GANG(bp))
208 return (-1);
209 ci->ci_func[byteswap](data, size, &actual_cksum);
210 }
212 if ((actual_cksum.zc_word[0] - zc.zc_word[0]) |
213 (actual_cksum.zc_word[1] - zc.zc_word[1]) |
214 (actual_cksum.zc_word[2] - zc.zc_word[2]) |
215 (actual_cksum.zc_word[3] - zc.zc_word[3]))
216 return (-1);
218 return (0);
219 }
221 /*
222 * vdev_label_offset takes "offset" (the offset within a vdev_label) and
223 * returns its physical disk offset (starting from the beginning of the vdev).
224 *
225 * Input:
226 * psize : Physical size of this vdev
227 * l : Label Number (0-3)
228 * offset : The offset with a vdev_label in which we want the physical
229 * address
230 * Return:
231 * Success : physical disk offset
232 * Failure : errnum = ERR_BAD_ARGUMENT, return value is meaningless
233 */
234 static uint64_t
235 vdev_label_offset(fsi_file_t *ffi, uint64_t psize, int l, uint64_t offset)
236 {
237 /* XXX Need to add back label support! */
238 if (l >= VDEV_LABELS/2 || offset > sizeof (vdev_label_t)) {
239 errnum = ERR_BAD_ARGUMENT;
240 return (0);
241 }
243 return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
244 0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
246 }
248 /*
249 * vdev_uberblock_compare takes two uberblock structures and returns an integer
250 * indicating the more recent of the two.
251 * Return Value = 1 if ub2 is more recent
252 * Return Value = -1 if ub1 is more recent
253 * The most recent uberblock is determined using its transaction number and
254 * timestamp. The uberblock with the highest transaction number is
255 * considered "newer". If the transaction numbers of the two blocks match, the
256 * timestamps are compared to determine the "newer" of the two.
257 */
258 static int
259 vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
260 {
261 if (ub1->ub_txg < ub2->ub_txg)
262 return (-1);
263 if (ub1->ub_txg > ub2->ub_txg)
264 return (1);
266 if (ub1->ub_timestamp < ub2->ub_timestamp)
267 return (-1);
268 if (ub1->ub_timestamp > ub2->ub_timestamp)
269 return (1);
271 return (0);
272 }
274 /*
275 * Three pieces of information are needed to verify an uberblock: the magic
276 * number, the version number, and the checksum.
277 *
278 * Currently Implemented: version number, magic number
279 * Need to Implement: checksum
280 *
281 * Return:
282 * 0 - Success
283 * -1 - Failure
284 */
285 static int
286 uberblock_verify(uberblock_phys_t *ub, int offset)
287 {
289 uberblock_t *uber = &ub->ubp_uberblock;
290 blkptr_t bp;
292 BP_ZERO(&bp);
293 BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
294 BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER);
295 ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0);
297 if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0)
298 return (-1);
300 if (uber->ub_magic == UBERBLOCK_MAGIC &&
301 uber->ub_version > 0 && uber->ub_version <= SPA_VERSION)
302 return (0);
304 return (-1);
305 }
307 /*
308 * Find the best uberblock.
309 * Return:
310 * Success - Pointer to the best uberblock.
311 * Failure - NULL
312 */
313 static uberblock_phys_t *
314 find_bestub(fsi_file_t *ffi, uberblock_phys_t *ub_array, int label)
315 {
316 uberblock_phys_t *ubbest = NULL;
317 int i, offset;
319 for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) {
320 offset = vdev_label_offset(ffi, 0, label,
321 VDEV_UBERBLOCK_OFFSET(i));
322 if (errnum == ERR_BAD_ARGUMENT)
323 return (NULL);
324 if (uberblock_verify(&ub_array[i], offset) == 0) {
325 if (ubbest == NULL) {
326 ubbest = &ub_array[i];
327 } else if (vdev_uberblock_compare(
328 &(ub_array[i].ubp_uberblock),
329 &(ubbest->ubp_uberblock)) > 0) {
330 ubbest = &ub_array[i];
331 }
332 }
333 }
335 return (ubbest);
336 }
338 /*
339 * Read in a block and put its uncompressed data in buf.
340 *
341 * Return:
342 * 0 - success
343 * errnum - failure
344 */
345 static int
346 zio_read(fsi_file_t *ffi, blkptr_t *bp, void *buf, char *stack)
347 {
348 uint64_t offset, sector;
349 int psize, lsize;
350 int i, comp, cksum;
352 psize = BP_GET_PSIZE(bp);
353 lsize = BP_GET_LSIZE(bp);
354 comp = BP_GET_COMPRESS(bp);
355 cksum = BP_GET_CHECKSUM(bp);
357 if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS ||
358 (comp != ZIO_COMPRESS_OFF &&
359 decomp_table[comp].decomp_func == NULL))
360 return (ERR_FSYS_CORRUPT);
362 /* pick a good dva from the block pointer */
363 for (i = 0; i < SPA_DVAS_PER_BP; i++) {
365 if (bp->blk_dva[i].dva_word[0] == 0 &&
366 bp->blk_dva[i].dva_word[1] == 0)
367 continue;
369 /* read in a block */
370 offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
371 sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
373 if (comp != ZIO_COMPRESS_OFF) {
375 if (devread(ffi, sector, 0, psize, stack) == 0)
376 continue;
377 if (zio_checksum_verify(bp, stack, psize) != 0)
378 continue;
379 decomp_table[comp].decomp_func(stack, buf, psize,
380 lsize);
381 } else {
382 if (devread(ffi, sector, 0, psize, buf) == 0)
383 continue;
384 if (zio_checksum_verify(bp, buf, psize) != 0)
385 continue;
386 }
387 return (0);
388 }
390 return (ERR_FSYS_CORRUPT);
391 }
393 /*
394 * Get the block from a block id.
395 * push the block onto the stack.
396 *
397 * Return:
398 * 0 - success
399 * errnum - failure
400 */
401 static int
402 dmu_read(fsi_file_t *ffi, dnode_phys_t *dn, uint64_t blkid, void *buf,
403 char *stack)
404 {
405 int idx, level;
406 blkptr_t *bp_array = dn->dn_blkptr;
407 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
408 blkptr_t *bp, *tmpbuf;
410 bp = (blkptr_t *)stack;
411 stack += sizeof (blkptr_t);
413 tmpbuf = (blkptr_t *)stack;
414 stack += 1<<dn->dn_indblkshift;
416 for (level = dn->dn_nlevels - 1; level >= 0; level--) {
417 idx = (blkid >> (epbs * level)) & ((1<<epbs)-1);
418 *bp = bp_array[idx];
419 if (level == 0)
420 tmpbuf = buf;
421 if (BP_IS_HOLE(bp)) {
422 grub_memset(buf, 0,
423 dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
424 break;
425 } else if ((errnum = zio_read(ffi, bp, tmpbuf, stack))) {
426 return (errnum);
427 }
428 bp_array = tmpbuf;
429 }
431 return (0);
432 }
434 /*
435 * mzap_lookup: Looks up property described by "name" and returns the value
436 * in "value".
437 *
438 * Return:
439 * 0 - success
440 * errnum - failure
441 */
442 static int
443 mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name,
444 uint64_t *value)
445 {
446 int i, chunks;
447 mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
449 chunks = objsize/MZAP_ENT_LEN - 1;
450 for (i = 0; i < chunks; i++) {
451 if (strcmp(mzap_ent[i].mze_name, name) == 0) {
452 *value = mzap_ent[i].mze_value;
453 return (0);
454 }
455 }
457 return (ERR_FSYS_CORRUPT);
458 }
460 static uint64_t
461 zap_hash(fsi_file_t *ffi, uint64_t salt, const char *name)
462 {
463 static uint64_t table[256];
464 const uint8_t *cp;
465 uint8_t c;
466 uint64_t crc = salt;
468 if (table[128] == 0) {
469 uint64_t *ct;
470 int i, j;
471 for (i = 0; i < 256; i++) {
472 for (ct = table + i, *ct = i, j = 8; j > 0; j--)
473 *ct = (*ct >> 1) ^ (-(*ct & 1) &
474 ZFS_CRC64_POLY);
475 }
476 }
478 if (crc == 0 || table[128] != ZFS_CRC64_POLY) {
479 errnum = ERR_FSYS_CORRUPT;
480 return (0);
481 }
483 for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++)
484 crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF];
486 /*
487 * Only use 28 bits, since we need 4 bits in the cookie for the
488 * collision differentiator. We MUST use the high bits, since
489 * those are the onces that we first pay attention to when
490 * chosing the bucket.
491 */
492 crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
494 return (crc);
495 }
497 /*
498 * Only to be used on 8-bit arrays.
499 * array_len is actual len in bytes (not encoded le_value_length).
500 * buf is null-terminated.
501 */
502 static int
503 zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk,
504 int array_len, const char *buf)
505 {
506 int bseen = 0;
508 while (bseen < array_len) {
509 struct zap_leaf_array *la =
510 &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array;
511 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
513 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
514 return (0);
516 if (zfs_bcmp(la->la_array, buf + bseen, toread) != 0)
517 break;
518 chunk = la->la_next;
519 bseen += toread;
520 }
521 return (bseen == array_len);
522 }
524 /*
525 * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
526 * value for the property "name".
527 *
528 * Return:
529 * 0 - success
530 * errnum - failure
531 */
532 static int
533 zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h,
534 const char *name, uint64_t *value)
535 {
536 uint16_t chunk;
537 struct zap_leaf_entry *le;
539 /* Verify if this is a valid leaf block */
540 if (l->l_hdr.lh_block_type != ZBT_LEAF)
541 return (ERR_FSYS_CORRUPT);
542 if (l->l_hdr.lh_magic != ZAP_LEAF_MAGIC)
543 return (ERR_FSYS_CORRUPT);
545 for (chunk = l->l_hash[LEAF_HASH(blksft, h)];
546 chunk != CHAIN_END; chunk = le->le_next) {
548 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
549 return (ERR_FSYS_CORRUPT);
551 le = ZAP_LEAF_ENTRY(l, blksft, chunk);
553 /* Verify the chunk entry */
554 if (le->le_type != ZAP_CHUNK_ENTRY)
555 return (ERR_FSYS_CORRUPT);
557 if (le->le_hash != h)
558 continue;
560 if (zap_leaf_array_equal(l, blksft, le->le_name_chunk,
561 le->le_name_length, name)) {
563 struct zap_leaf_array *la;
564 uint8_t *ip;
566 if (le->le_int_size != 8 || le->le_value_length != 1)
567 return (ERR_FSYS_CORRUPT);
569 /* get the uint64_t property value */
570 la = &ZAP_LEAF_CHUNK(l, blksft,
571 le->le_value_chunk).l_array;
572 ip = la->la_array;
574 *value = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
575 (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
576 (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
577 (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
579 return (0);
580 }
581 }
583 return (ERR_FSYS_CORRUPT);
584 }
586 /*
587 * Fat ZAP lookup
588 *
589 * Return:
590 * 0 - success
591 * errnum - failure
592 */
593 static int
594 fzap_lookup(fsi_file_t *ffi, dnode_phys_t *zap_dnode, zap_phys_t *zap,
595 char *name, uint64_t *value, char *stack)
596 {
597 zap_leaf_phys_t *l;
598 uint64_t hash, idx, blkid;
599 int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT);
601 /* Verify if this is a fat zap header block */
602 if (zap->zap_magic != (uint64_t)ZAP_MAGIC)
603 return (ERR_FSYS_CORRUPT);
605 hash = zap_hash(ffi, zap->zap_salt, name);
606 if (errnum)
607 return (errnum);
609 /* get block id from index */
610 if (zap->zap_ptrtbl.zt_numblks != 0) {
611 /* external pointer tables not supported */
612 return (ERR_FSYS_CORRUPT);
613 }
614 idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift);
615 blkid = ((uint64_t *)zap)[idx + (1<<(blksft-3-1))];
617 /* Get the leaf block */
618 l = (zap_leaf_phys_t *)stack;
619 stack += 1<<blksft;
620 if ((errnum = dmu_read(ffi, zap_dnode, blkid, l, stack)))
621 return (errnum);
623 return (zap_leaf_lookup(l, blksft, hash, name, value));
624 }
626 /*
627 * Read in the data of a zap object and find the value for a matching
628 * property name.
629 *
630 * Return:
631 * 0 - success
632 * errnum - failure
633 */
634 static int
635 zap_lookup(fsi_file_t *ffi, dnode_phys_t *zap_dnode, char *name,
636 uint64_t *val, char *stack)
637 {
638 uint64_t block_type;
639 int size;
640 void *zapbuf;
642 /* Read in the first block of the zap object data. */
643 zapbuf = stack;
644 size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
645 stack += size;
646 if ((errnum = dmu_read(ffi, zap_dnode, 0, zapbuf, stack)))
647 return (errnum);
649 block_type = *((uint64_t *)zapbuf);
651 if (block_type == ZBT_MICRO) {
652 return (mzap_lookup(zapbuf, size, name, val));
653 } else if (block_type == ZBT_HEADER) {
654 /* this is a fat zap */
655 return (fzap_lookup(ffi, zap_dnode, zapbuf, name,
656 val, stack));
657 }
659 return (ERR_FSYS_CORRUPT);
660 }
662 /*
663 * Get the dnode of an object number from the metadnode of an object set.
664 *
665 * Input
666 * mdn - metadnode to get the object dnode
667 * objnum - object number for the object dnode
668 * buf - data buffer that holds the returning dnode
669 * stack - scratch area
670 *
671 * Return:
672 * 0 - success
673 * errnum - failure
674 */
675 static int
676 dnode_get(fsi_file_t *ffi, dnode_phys_t *mdn, uint64_t objnum,
677 uint8_t type, dnode_phys_t *buf, char *stack)
678 {
679 uint64_t blkid, blksz; /* the block id this object dnode is in */
680 int epbs; /* shift of number of dnodes in a block */
681 int idx; /* index within a block */
682 dnode_phys_t *dnbuf;
683 zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data;
685 blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT;
686 epbs = zfs_log2(blksz) - DNODE_SHIFT;
687 blkid = objnum >> epbs;
688 idx = objnum & ((1<<epbs)-1);
690 if (dnode_buf != NULL && dnode_mdn == mdn &&
691 objnum >= dnode_start && objnum < dnode_end) {
692 grub_memmove(buf, &dnode_buf[idx], DNODE_SIZE);
693 VERIFY_DN_TYPE(buf, type);
694 return (0);
695 }
697 if (dnode_buf && blksz == 1<<DNODE_BLOCK_SHIFT) {
698 dnbuf = dnode_buf;
699 dnode_mdn = mdn;
700 dnode_start = blkid << epbs;
701 dnode_end = (blkid + 1) << epbs;
702 } else {
703 dnbuf = (dnode_phys_t *)stack;
704 stack += blksz;
705 }
707 if ((errnum = dmu_read(ffi, mdn, blkid, (char *)dnbuf, stack)))
708 return (errnum);
710 grub_memmove(buf, &dnbuf[idx], DNODE_SIZE);
711 VERIFY_DN_TYPE(buf, type);
713 return (0);
714 }
716 /*
717 * Check if this is a special file that resides at the top
718 * dataset of the pool. Currently this is the GRUB menu,
719 * boot signature and boot signature backup.
720 * str starts with '/'.
721 */
722 static int
723 is_top_dataset_file(char *str)
724 {
725 char *tptr;
727 if (((tptr = strstr(str, "menu.lst"))) &&
728 (tptr[8] == '\0' || tptr[8] == ' ') &&
729 *(tptr-1) == '/')
730 return (1);
732 if (strncmp(str, BOOTSIGN_DIR"/",
733 strlen(BOOTSIGN_DIR) + 1) == 0)
734 return (1);
736 if (strcmp(str, BOOTSIGN_BACKUP) == 0)
737 return (1);
739 return (0);
740 }
742 /*
743 * Get the file dnode for a given file name where mdn is the meta dnode
744 * for this ZFS object set. When found, place the file dnode in dn.
745 * The 'path' argument will be mangled.
746 *
747 * Return:
748 * 0 - success
749 * errnum - failure
750 */
751 static int
752 dnode_get_path(fsi_file_t *ffi, dnode_phys_t *mdn, char *path,
753 dnode_phys_t *dn, char *stack)
754 {
755 uint64_t objnum, version;
756 char *cname, ch;
758 if ((errnum = dnode_get(ffi, mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE,
759 dn, stack)))
760 return (errnum);
762 if ((errnum = zap_lookup(ffi, dn, ZPL_VERSION_STR, &version, stack)))
763 return (errnum);
764 if (version > ZPL_VERSION)
765 return (-1);
767 if ((errnum = zap_lookup(ffi, dn, ZFS_ROOT_OBJ, &objnum, stack)))
768 return (errnum);
770 if ((errnum = dnode_get(ffi, mdn, objnum, DMU_OT_DIRECTORY_CONTENTS,
771 dn, stack)))
772 return (errnum);
774 /* skip leading slashes */
775 while (*path == '/')
776 path++;
778 while (*path && !isspace((uint8_t)*path)) {
780 /* get the next component name */
781 cname = path;
782 while (*path && !isspace((uint8_t)*path) && *path != '/')
783 path++;
784 ch = *path;
785 *path = 0; /* ensure null termination */
787 if ((errnum = zap_lookup(ffi, dn, cname, &objnum, stack)))
788 return (errnum);
790 objnum = ZFS_DIRENT_OBJ(objnum);
791 if ((errnum = dnode_get(ffi, mdn, objnum, 0, dn, stack)))
792 return (errnum);
794 *path = ch;
795 while (*path == '/')
796 path++;
797 }
799 /* We found the dnode for this file. Verify if it is a plain file. */
800 VERIFY_DN_TYPE(dn, DMU_OT_PLAIN_FILE_CONTENTS);
802 return (0);
803 }
805 /*
806 * Get the default 'bootfs' property value from the rootpool.
807 *
808 * Return:
809 * 0 - success
810 * errnum -failure
811 */
812 static int
813 get_default_bootfsobj(fsi_file_t *ffi, dnode_phys_t *mosmdn,
814 uint64_t *obj, char *stack)
815 {
816 uint64_t objnum = 0;
817 dnode_phys_t *dn = (dnode_phys_t *)stack;
818 stack += DNODE_SIZE;
820 if ((errnum = dnode_get(ffi, mosmdn, DMU_POOL_DIRECTORY_OBJECT,
821 DMU_OT_OBJECT_DIRECTORY, dn, stack)))
822 return (errnum);
824 /*
825 * find the object number for 'pool_props', and get the dnode
826 * of the 'pool_props'.
827 */
828 if (zap_lookup(ffi, dn, DMU_POOL_PROPS, &objnum, stack))
829 return (ERR_FILESYSTEM_NOT_FOUND);
831 if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_POOL_PROPS, dn,
832 stack)))
833 return (errnum);
835 if (zap_lookup(ffi, dn, ZPOOL_PROP_BOOTFS, &objnum, stack))
836 return (ERR_FILESYSTEM_NOT_FOUND);
838 if (!objnum)
839 return (ERR_FILESYSTEM_NOT_FOUND);
842 *obj = objnum;
843 return (0);
844 }
846 /*
847 * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
848 * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
849 * of pool/rootfs.
850 *
851 * If no fsname and no obj are given, return the DSL_DIR metadnode.
852 * If fsname is given, return its metadnode and its matching object number.
853 * If only obj is given, return the metadnode for this object number.
854 *
855 * Return:
856 * 0 - success
857 * errnum - failure
858 */
859 static int
860 get_objset_mdn(fsi_file_t *ffi, dnode_phys_t *mosmdn, char *fsname,
861 uint64_t *obj, dnode_phys_t *mdn, char *stack)
862 {
863 uint64_t objnum, headobj;
864 char *cname, ch;
865 blkptr_t *bp;
866 objset_phys_t *osp;
868 if (fsname == NULL && obj) {
869 headobj = *obj;
870 goto skip;
871 }
873 if ((errnum = dnode_get(ffi, mosmdn, DMU_POOL_DIRECTORY_OBJECT,
874 DMU_OT_OBJECT_DIRECTORY, mdn, stack)))
875 return (errnum);
877 if ((errnum = zap_lookup(ffi, mdn, DMU_POOL_ROOT_DATASET, &objnum,
878 stack)))
879 return (errnum);
881 if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_DSL_DIR, mdn,
882 stack)))
883 return (errnum);
885 if (fsname == NULL) {
886 headobj =
887 ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
888 goto skip;
889 }
891 /* take out the pool name */
892 while (*fsname && !isspace((uint8_t)*fsname) && *fsname != '/')
893 fsname++;
895 while (*fsname && !isspace((uint8_t)*fsname)) {
896 uint64_t childobj;
898 while (*fsname == '/')
899 fsname++;
901 cname = fsname;
902 while (*fsname && !isspace((uint8_t)*fsname) && *fsname != '/')
903 fsname++;
904 ch = *fsname;
905 *fsname = 0;
907 childobj =
908 ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj;
909 if ((errnum = dnode_get(ffi, mosmdn, childobj,
910 DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack)))
911 return (errnum);
913 if (zap_lookup(ffi, mdn, cname, &objnum, stack))
914 return (ERR_FILESYSTEM_NOT_FOUND);
916 if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_DSL_DIR,
917 mdn, stack)))
918 return (errnum);
920 *fsname = ch;
921 }
922 headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
923 if (obj)
924 *obj = headobj;
926 skip:
927 if ((errnum = dnode_get(ffi, mosmdn, headobj, DMU_OT_DSL_DATASET, mdn,
928 stack)))
929 return (errnum);
931 /* TODO: Add snapshot support here - for fsname=snapshot-name */
933 bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp;
934 osp = (objset_phys_t *)stack;
935 stack += sizeof (objset_phys_t);
936 if ((errnum = zio_read(ffi, bp, osp, stack)))
937 return (errnum);
939 grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE);
941 return (0);
942 }
944 /*
945 * For a given XDR packed nvlist, verify the first 4 bytes and move on.
946 *
947 * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
948 *
949 * encoding method/host endian (4 bytes)
950 * nvl_version (4 bytes)
951 * nvl_nvflag (4 bytes)
952 * encoded nvpairs:
953 * encoded size of the nvpair (4 bytes)
954 * decoded size of the nvpair (4 bytes)
955 * name string size (4 bytes)
956 * name string data (sizeof(NV_ALIGN4(string))
957 * data type (4 bytes)
958 * # of elements in the nvpair (4 bytes)
959 * data
960 * 2 zero's for the last nvpair
961 * (end of the entire list) (8 bytes)
962 *
963 * Return:
964 * 0 - success
965 * 1 - failure
966 */
967 static int
968 nvlist_unpack(char *nvlist, char **out)
969 {
970 /* Verify if the 1st and 2nd byte in the nvlist are valid. */
971 if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN)
972 return (1);
974 nvlist += 4;
975 *out = nvlist;
976 return (0);
977 }
979 static char *
980 nvlist_array(char *nvlist, int index)
981 {
982 int i, encode_size;
984 for (i = 0; i < index; i++) {
985 /* skip the header, nvl_version, and nvl_nvflag */
986 nvlist = nvlist + 4 * 2;
988 while ((encode_size = BSWAP_32(*(uint32_t *)nvlist)))
989 nvlist += encode_size; /* goto the next nvpair */
991 nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */
992 }
994 return (nvlist);
995 }
997 static int
998 nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype,
999 int *nelmp)
1001 int name_len, type, slen, encode_size;
1002 char *nvpair, *nvp_name, *strval = val;
1003 uint64_t *intval = val;
1005 /* skip the header, nvl_version, and nvl_nvflag */
1006 nvlist = nvlist + 4 * 2;
1008 /*
1009 * Loop thru the nvpair list
1010 * The XDR representation of an integer is in big-endian byte order.
1011 */
1012 while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) {
1014 nvpair = nvlist + 4 * 2; /* skip the encode/decode size */
1016 name_len = BSWAP_32(*(uint32_t *)nvpair);
1017 nvpair += 4;
1019 nvp_name = nvpair;
1020 nvpair = nvpair + ((name_len + 3) & ~3); /* align */
1022 type = BSWAP_32(*(uint32_t *)nvpair);
1023 nvpair += 4;
1025 if (((strncmp(nvp_name, name, name_len) == 0) &&
1026 type == valtype)) {
1027 int nelm;
1029 if (((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1))
1030 return (1);
1031 nvpair += 4;
1033 switch (valtype) {
1034 case DATA_TYPE_STRING:
1035 slen = BSWAP_32(*(uint32_t *)nvpair);
1036 nvpair += 4;
1037 grub_memmove(strval, nvpair, slen);
1038 strval[slen] = '\0';
1039 return (0);
1041 case DATA_TYPE_UINT64:
1042 *intval = BSWAP_64(*(uint64_t *)nvpair);
1043 return (0);
1045 case DATA_TYPE_NVLIST:
1046 *(void **)val = (void *)nvpair;
1047 return (0);
1049 case DATA_TYPE_NVLIST_ARRAY:
1050 *(void **)val = (void *)nvpair;
1051 if (nelmp)
1052 *nelmp = nelm;
1053 return (0);
1057 nvlist += encode_size; /* goto the next nvpair */
1060 return (1);
1063 /*
1064 * Check if this vdev is online and is in a good state.
1065 */
1066 static int
1067 vdev_validate(char *nv)
1069 uint64_t ival;
1071 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival,
1072 DATA_TYPE_UINT64, NULL) == 0 ||
1073 nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival,
1074 DATA_TYPE_UINT64, NULL) == 0 ||
1075 nvlist_lookup_value(nv, ZPOOL_CONFIG_DEGRADED, &ival,
1076 DATA_TYPE_UINT64, NULL) == 0 ||
1077 nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival,
1078 DATA_TYPE_UINT64, NULL) == 0)
1079 return (ERR_DEV_VALUES);
1081 return (0);
1084 /*
1085 * Get a list of valid vdev pathname from the boot device.
1086 * The caller should already allocate MAXNAMELEN memory for bootpath.
1087 */
1088 static int
1089 vdev_get_bootpath(char *nv, char *bootpath)
1091 char type[16];
1093 bootpath[0] = '\0';
1094 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING,
1095 NULL))
1096 return (ERR_FSYS_CORRUPT);
1098 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1099 if (vdev_validate(nv) != 0 ||
1100 nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, bootpath,
1101 DATA_TYPE_STRING, NULL) != 0)
1102 return (ERR_NO_BOOTPATH);
1104 } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
1105 int nelm, i;
1106 char *child;
1108 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child,
1109 DATA_TYPE_NVLIST_ARRAY, &nelm))
1110 return (ERR_FSYS_CORRUPT);
1112 for (i = 0; i < nelm; i++) {
1113 char tmp_path[MAXNAMELEN];
1114 char *child_i;
1116 child_i = nvlist_array(child, i);
1117 if (vdev_validate(child_i) != 0)
1118 continue;
1120 if (nvlist_lookup_value(child_i, ZPOOL_CONFIG_PHYS_PATH,
1121 tmp_path, DATA_TYPE_STRING, NULL) != 0)
1122 return (ERR_NO_BOOTPATH);
1124 if ((strlen(bootpath) + strlen(tmp_path)) > MAXNAMELEN)
1125 return (ERR_WONT_FIT);
1127 if (strlen(bootpath) == 0)
1128 sprintf(bootpath, "%s", tmp_path);
1129 else
1130 sprintf(bootpath, "%s %s", bootpath, tmp_path);
1134 return (strlen(bootpath) > 0 ? 0 : ERR_NO_BOOTPATH);
1137 /*
1138 * Check the disk label information and retrieve needed vdev name-value pairs.
1140 * Return:
1141 * 0 - success
1142 * ERR_* - failure
1143 */
1144 static int
1145 check_pool_label(fsi_file_t *ffi, int label, char *stack)
1147 vdev_phys_t *vdev;
1148 uint64_t sector, pool_state, txg = 0;
1149 char *nvlist, *nv;
1150 zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data;
1152 sector = (label * sizeof (vdev_label_t) + VDEV_SKIP_SIZE +
1153 VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT;
1155 /* Read in the vdev name-value pair list (112K). */
1156 if (devread(ffi, sector, 0, VDEV_PHYS_SIZE, stack) == 0)
1157 return (ERR_READ);
1159 vdev = (vdev_phys_t *)stack;
1161 if (nvlist_unpack(vdev->vp_nvlist, &nvlist))
1162 return (ERR_FSYS_CORRUPT);
1164 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state,
1165 DATA_TYPE_UINT64, NULL))
1166 return (ERR_FSYS_CORRUPT);
1168 if (pool_state == POOL_STATE_DESTROYED)
1169 return (ERR_FILESYSTEM_NOT_FOUND);
1171 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME,
1172 current_rootpool, DATA_TYPE_STRING, NULL))
1173 return (ERR_FSYS_CORRUPT);
1175 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg,
1176 DATA_TYPE_UINT64, NULL))
1177 return (ERR_FSYS_CORRUPT);
1179 /* not an active device */
1180 if (txg == 0)
1181 return (ERR_NO_BOOTPATH);
1183 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
1184 DATA_TYPE_NVLIST, NULL))
1185 return (ERR_FSYS_CORRUPT);
1187 if (vdev_get_bootpath(nv, current_bootpath))
1188 return (ERR_NO_BOOTPATH);
1190 return (0);
1193 /*
1194 * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
1195 * to the memory address MOS.
1197 * Return:
1198 * 1 - success
1199 * 0 - failure
1200 */
1201 static int
1202 zfs_mount(fsi_file_t *ffi, const char *options)
1204 char *stack;
1205 int label = 0;
1206 uberblock_phys_t *ub_array, *ubbest = NULL;
1207 objset_phys_t *osp;
1208 zfs_bootarea_t *zfs_ba;
1210 /* if zfs is already mounted, don't do it again */
1211 if (is_zfs_mount == 1)
1212 return (1);
1214 /* get much bigger data block for zfs */
1215 if (((zfs_ba = malloc(sizeof (zfs_bootarea_t))) == NULL)) {
1216 return (1);
1218 bzero(zfs_ba, sizeof (zfs_bootarea_t));
1220 /* replace small data area in fsi with big one */
1221 free(ffi->ff_fsi->f_data);
1222 ffi->ff_fsi->f_data = (void *)zfs_ba;
1224 /* If an boot filesystem is passed in, set it to current_bootfs */
1225 if (options != NULL) {
1226 if (strlen(options) < MAXNAMELEN) {
1227 strcpy(current_bootfs, options);
1231 stackbase = ZFS_SCRATCH;
1232 stack = stackbase;
1233 ub_array = (uberblock_phys_t *)stack;
1234 stack += VDEV_UBERBLOCK_RING;
1236 osp = (objset_phys_t *)stack;
1237 stack += sizeof (objset_phys_t);
1239 /* XXX add back labels support? */
1240 for (label = 0; ubbest == NULL && label < (VDEV_LABELS/2); label++) {
1241 uint64_t sector = (label * sizeof (vdev_label_t) +
1242 VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE +
1243 VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT;
1246 /* Read in the uberblock ring (128K). */
1247 if (devread(ffi, sector, 0, VDEV_UBERBLOCK_RING,
1248 (char *)ub_array) == 0)
1249 continue;
1251 if ((ubbest = find_bestub(ffi, ub_array, label)) != NULL &&
1252 zio_read(ffi, &ubbest->ubp_uberblock.ub_rootbp, osp, stack)
1253 == 0) {
1255 VERIFY_OS_TYPE(osp, DMU_OST_META);
1257 /* Got the MOS. Save it at the memory addr MOS. */
1258 grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE);
1260 if (check_pool_label(ffi, label, stack))
1261 return (0);
1263 /*
1264 * Copy fsi->f_data to ffi->ff_data since
1265 * fsig_mount copies from ff_data to f_data
1266 * overwriting fsi->f_data.
1267 */
1268 bcopy(zfs_ba, fsig_file_buf(ffi), FSYS_BUFLEN);
1270 is_zfs_mount = 1;
1271 return (1);
1275 return (0);
1278 /*
1279 * zfs_open() locates a file in the rootpool by following the
1280 * MOS and places the dnode of the file in the memory address DNODE.
1282 * Return:
1283 * 1 - success
1284 * 0 - failure
1285 */
1286 static int
1287 zfs_open(fsi_file_t *ffi, char *filename)
1289 char *stack;
1290 dnode_phys_t *mdn;
1291 char *bootstring;
1292 zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data;
1294 file_buf = NULL;
1295 stackbase = ZFS_SCRATCH;
1296 stack = stackbase;
1298 mdn = (dnode_phys_t *)stack;
1299 stack += sizeof (dnode_phys_t);
1301 dnode_mdn = NULL;
1302 dnode_buf = (dnode_phys_t *)stack;
1303 stack += 1<<DNODE_BLOCK_SHIFT;
1305 /*
1306 * menu.lst is placed at the root pool filesystem level,
1307 * do not goto 'current_bootfs'.
1308 */
1309 if (is_top_dataset_file(filename)) {
1310 if ((errnum = get_objset_mdn(ffi, MOS, NULL, NULL, mdn, stack)))
1311 return (0);
1313 current_bootfs_obj = 0;
1314 } else {
1315 if (current_bootfs[0] == '\0') {
1316 /* Get the default root filesystem object number */
1317 if ((errnum = get_default_bootfsobj(ffi, MOS,
1318 &current_bootfs_obj, stack)))
1319 return (0);
1320 if ((errnum = get_objset_mdn(ffi, MOS, NULL,
1321 &current_bootfs_obj, mdn, stack)))
1322 return (0);
1323 } else {
1324 if ((errnum = get_objset_mdn(ffi, MOS,
1325 current_bootfs, &current_bootfs_obj, mdn, stack)))
1326 return (0);
1329 /*
1330 * Put zfs rootpool and boot obj number into bootstring.
1331 */
1332 if (is_zfs_open == 0) {
1333 char temp[25]; /* needs to hold long long */
1334 int alloc_size;
1335 char zfs_bootstr[] = "zfs-bootfs=";
1336 char zfs_bootpath[] = ",bootpath='";
1338 snprintf(temp, sizeof(temp), "%llu", (unsigned long long)
1339 current_bootfs_obj);
1340 alloc_size = strlen(zfs_bootstr) +
1341 strlen(current_rootpool) +
1342 strlen(temp) + strlen(zfs_bootpath) +
1343 strlen(current_bootpath) + 3;
1344 bootstring = fsi_bootstring_alloc(ffi->ff_fsi,
1345 alloc_size);
1346 if (bootstring != NULL) {
1347 strcpy(bootstring, zfs_bootstr);
1348 strcat(bootstring, current_rootpool);
1349 strcat(bootstring, "/");
1350 strcat(bootstring, temp);
1351 strcat(bootstring, zfs_bootpath);
1352 strcat(bootstring, current_bootpath);
1353 strcat(bootstring, "'");
1354 is_zfs_open = 1;
1359 if (dnode_get_path(ffi, mdn, filename, DNODE, stack)) {
1360 errnum = ERR_FILE_NOT_FOUND;
1361 return (0);
1364 /* get the file size and set the file position to 0 */
1365 filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size;
1366 filepos = 0;
1368 dnode_buf = NULL;
1369 return (1);
1372 /*
1373 * zfs_read reads in the data blocks pointed by the DNODE.
1375 * Return:
1376 * len - the length successfully read in to the buffer
1377 * 0 - failure
1378 */
1379 static int
1380 zfs_read(fsi_file_t *ffi, char *buf, int len)
1382 char *stack;
1383 int blksz, length, movesize;
1384 zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data;
1386 if (file_buf == NULL) {
1387 file_buf = stackbase;
1388 stackbase += SPA_MAXBLOCKSIZE;
1389 file_start = file_end = 0;
1391 stack = stackbase;
1393 /*
1394 * If offset is in memory, move it into the buffer provided and return.
1395 */
1396 if (filepos >= file_start && filepos+len <= file_end) {
1397 grub_memmove(buf, file_buf + filepos - file_start, len);
1398 filepos += len;
1399 return (len);
1402 blksz = DNODE->dn_datablkszsec << SPA_MINBLOCKSHIFT;
1404 /*
1405 * Entire Dnode is too big to fit into the space available. We
1406 * will need to read it in chunks. This could be optimized to
1407 * read in as large a chunk as there is space available, but for
1408 * now, this only reads in one data block at a time.
1409 */
1410 length = len;
1411 while (length) {
1412 /*
1413 * Find requested blkid and the offset within that block.
1414 */
1415 uint64_t blkid = filepos / blksz;
1417 if ((errnum = dmu_read(ffi, DNODE, blkid, file_buf, stack)))
1418 return (0);
1420 file_start = blkid * blksz;
1421 file_end = file_start + blksz;
1423 movesize = MIN(length, file_end - filepos);
1425 grub_memmove(buf, file_buf + filepos - file_start,
1426 movesize);
1427 buf += movesize;
1428 length -= movesize;
1429 filepos += movesize;
1432 return (len);
1435 /*
1436 * No-Op
1437 */
1438 int
1439 zfs_embed(int *start_sector, int needed_sectors)
1441 return (1);
1444 fsi_plugin_ops_t *
1445 fsi_init_plugin(int version, fsi_plugin_t *fp, const char **name)
1447 static fsig_plugin_ops_t ops = {
1448 FSIMAGE_PLUGIN_VERSION,
1449 .fpo_mount = zfs_mount,
1450 .fpo_dir = zfs_open,
1451 .fpo_read = zfs_read
1452 };
1454 *name = "zfs";
1455 return (fsig_init(fp, &ops));