ia64/xen-unstable

view tools/libfsimage/zfs/fsys_zfs.c @ 18059:4b882c41c9b9

tools: Make functions static which should not be exported.
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jul 15 15:03:58 2008 +0100 (2008-07-15)
parents e5c9c8e6e726
children 40e8684a34bf
line source
1 /*
2 * GRUB -- GRand Unified Bootloader
3 * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 /*
20 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
21 * Use is subject to license terms.
22 */
24 /*
25 * All files in the zfs directory are derived from the OpenSolaris
26 * zfs grub files. All files in the zfs-include directory were
27 * included without changes.
28 */
30 /*
31 * The zfs plug-in routines for GRUB are:
32 *
33 * zfs_mount() - locates a valid uberblock of the root pool and reads
34 * in its MOS at the memory address MOS.
35 *
36 * zfs_open() - locates a plain file object by following the MOS
37 * and places its dnode at the memory address DNODE.
38 *
39 * zfs_read() - read in the data blocks pointed by the DNODE.
40 *
41 * ZFS_SCRATCH is used as a working area.
42 *
43 * (memory addr) MOS DNODE ZFS_SCRATCH
44 * | | |
45 * +-------V---------V----------V---------------+
46 * memory | | dnode | dnode | scratch |
47 * | | 512B | 512B | area |
48 * +--------------------------------------------+
49 */
51 #include <stdio.h>
52 #include <strings.h>
54 /* From "shared.h" */
55 #include "mb_info.h"
57 /* Boot signature related defines for the findroot command */
58 #define BOOTSIGN_DIR "/boot/grub/bootsign"
59 #define BOOTSIGN_BACKUP "/etc/bootsign"
61 /* Maybe redirect memory requests through grub_scratch_mem. */
62 #define RAW_ADDR(x) (x)
63 #define RAW_SEG(x) (x)
65 /* ZFS will use the top 4 Meg of physical memory (below 4Gig) for sratch */
66 #define ZFS_SCRATCH_SIZE 0x400000
68 #define MIN(x, y) ((x) < (y) ? (x) : (y))
69 /* End from shared.h */
71 #include "fsys_zfs.h"
73 /* cache for a file block of the currently zfs_open()-ed file */
74 #define file_buf zfs_ba->zfs_file_buf
75 #define file_start zfs_ba->zfs_file_start
76 #define file_end zfs_ba->zfs_file_end
78 /* cache for a dnode block */
79 #define dnode_buf zfs_ba->zfs_dnode_buf
80 #define dnode_mdn zfs_ba->zfs_dnode_mdn
81 #define dnode_start zfs_ba->zfs_dnode_start
82 #define dnode_end zfs_ba->zfs_dnode_end
84 #define stackbase zfs_ba->zfs_stackbase
86 decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] =
87 {
88 {"noop", 0},
89 {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */
90 {"off", 0},
91 {"lzjb", lzjb_decompress} /* ZIO_COMPRESS_LZJB */
92 };
94 /* From disk_io.c */
95 /* ZFS root filesystem for booting */
96 #define current_bootpath zfs_ba->zfs_current_bootpath
97 #define current_rootpool zfs_ba->zfs_current_rootpool
98 #define current_bootfs zfs_ba->zfs_current_bootfs
99 #define current_bootfs_obj zfs_ba->zfs_current_bootfs_obj
100 #define is_zfs_mount (*fsig_int1(ffi))
101 /* End from disk_io.c */
103 #define is_zfs_open zfs_ba->zfs_open
105 /*
106 * Our own version of bcmp().
107 */
108 static int
109 zfs_bcmp(const void *s1, const void *s2, size_t n)
110 {
111 const unsigned char *ps1 = s1;
112 const unsigned char *ps2 = s2;
114 if (s1 != s2 && n != 0) {
115 do {
116 if (*ps1++ != *ps2++)
117 return (1);
118 } while (--n != 0);
119 }
121 return (0);
122 }
124 /*
125 * Our own version of log2(). Same thing as highbit()-1.
126 */
127 static int
128 zfs_log2(uint64_t num)
129 {
130 int i = 0;
132 while (num > 1) {
133 i++;
134 num = num >> 1;
135 }
137 return (i);
138 }
140 /* Checksum Functions */
141 static void
142 zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
143 {
144 ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
145 }
147 /* Checksum Table and Values */
148 zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
149 {{NULL, NULL}, 0, 0, "inherit"},
150 {{NULL, NULL}, 0, 0, "on"},
151 {{zio_checksum_off, zio_checksum_off}, 0, 0, "off"},
152 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"},
153 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"},
154 {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, "zilog"},
155 {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"},
156 {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"},
157 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"}
158 };
160 /*
161 * zio_checksum_verify: Provides support for checksum verification.
162 *
163 * Fletcher2, Fletcher4, and SHA256 are supported.
164 *
165 * Return:
166 * -1 = Failure
167 * 0 = Success
168 */
169 static int
170 zio_checksum_verify(blkptr_t *bp, char *data, int size)
171 {
172 zio_cksum_t zc = bp->blk_cksum;
173 uint32_t checksum = BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER :
174 BP_GET_CHECKSUM(bp);
175 int byteswap = BP_SHOULD_BYTESWAP(bp);
176 zio_block_tail_t *zbt = (zio_block_tail_t *)(data + size) - 1;
177 zio_checksum_info_t *ci = &zio_checksum_table[checksum];
178 zio_cksum_t actual_cksum, expected_cksum;
180 /* byteswap is not supported */
181 if (byteswap)
182 return (-1);
184 if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
185 return (-1);
187 if (ci->ci_zbt) {
188 if (checksum == ZIO_CHECKSUM_GANG_HEADER) {
189 /*
190 * 'gang blocks' is not supported.
191 */
192 return (-1);
193 }
195 if (zbt->zbt_magic == BSWAP_64(ZBT_MAGIC)) {
196 /* byte swapping is not supported */
197 return (-1);
198 } else {
199 expected_cksum = zbt->zbt_cksum;
200 zbt->zbt_cksum = zc;
201 ci->ci_func[0](data, size, &actual_cksum);
202 zbt->zbt_cksum = expected_cksum;
203 }
204 zc = expected_cksum;
206 } else {
207 if (BP_IS_GANG(bp))
208 return (-1);
209 ci->ci_func[byteswap](data, size, &actual_cksum);
210 }
212 if ((actual_cksum.zc_word[0] - zc.zc_word[0]) |
213 (actual_cksum.zc_word[1] - zc.zc_word[1]) |
214 (actual_cksum.zc_word[2] - zc.zc_word[2]) |
215 (actual_cksum.zc_word[3] - zc.zc_word[3]))
216 return (-1);
218 return (0);
219 }
221 /*
222 * vdev_label_offset takes "offset" (the offset within a vdev_label) and
223 * returns its physical disk offset (starting from the beginning of the vdev).
224 *
225 * Input:
226 * psize : Physical size of this vdev
227 * l : Label Number (0-3)
228 * offset : The offset with a vdev_label in which we want the physical
229 * address
230 * Return:
231 * Success : physical disk offset
232 * Failure : errnum = ERR_BAD_ARGUMENT, return value is meaningless
233 */
234 static uint64_t
235 vdev_label_offset(fsi_file_t *ffi, uint64_t psize, int l, uint64_t offset)
236 {
237 /* XXX Need to add back label support! */
238 if (l >= VDEV_LABELS/2 || offset > sizeof (vdev_label_t)) {
239 errnum = ERR_BAD_ARGUMENT;
240 return (0);
241 }
243 return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
244 0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
246 }
248 /*
249 * vdev_uberblock_compare takes two uberblock structures and returns an integer
250 * indicating the more recent of the two.
251 * Return Value = 1 if ub2 is more recent
252 * Return Value = -1 if ub1 is more recent
253 * The most recent uberblock is determined using its transaction number and
254 * timestamp. The uberblock with the highest transaction number is
255 * considered "newer". If the transaction numbers of the two blocks match, the
256 * timestamps are compared to determine the "newer" of the two.
257 */
258 static int
259 vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
260 {
261 if (ub1->ub_txg < ub2->ub_txg)
262 return (-1);
263 if (ub1->ub_txg > ub2->ub_txg)
264 return (1);
266 if (ub1->ub_timestamp < ub2->ub_timestamp)
267 return (-1);
268 if (ub1->ub_timestamp > ub2->ub_timestamp)
269 return (1);
271 return (0);
272 }
274 /*
275 * Three pieces of information are needed to verify an uberblock: the magic
276 * number, the version number, and the checksum.
277 *
278 * Currently Implemented: version number, magic number
279 * Need to Implement: checksum
280 *
281 * Return:
282 * 0 - Success
283 * -1 - Failure
284 */
285 static int
286 uberblock_verify(uberblock_phys_t *ub, int offset)
287 {
289 uberblock_t *uber = &ub->ubp_uberblock;
290 blkptr_t bp;
292 BP_ZERO(&bp);
293 BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
294 BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER);
295 ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0);
297 if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0)
298 return (-1);
300 if (uber->ub_magic == UBERBLOCK_MAGIC &&
301 uber->ub_version >= SPA_VERSION_1 &&
302 uber->ub_version <= SPA_VERSION)
303 return (0);
305 return (-1);
306 }
308 /*
309 * Find the best uberblock.
310 * Return:
311 * Success - Pointer to the best uberblock.
312 * Failure - NULL
313 */
314 static uberblock_phys_t *
315 find_bestub(fsi_file_t *ffi, uberblock_phys_t *ub_array, int label)
316 {
317 uberblock_phys_t *ubbest = NULL;
318 int i, offset;
320 for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) {
321 offset = vdev_label_offset(ffi, 0, label,
322 VDEV_UBERBLOCK_OFFSET(i));
323 if (errnum == ERR_BAD_ARGUMENT)
324 return (NULL);
325 if (uberblock_verify(&ub_array[i], offset) == 0) {
326 if (ubbest == NULL) {
327 ubbest = &ub_array[i];
328 } else if (vdev_uberblock_compare(
329 &(ub_array[i].ubp_uberblock),
330 &(ubbest->ubp_uberblock)) > 0) {
331 ubbest = &ub_array[i];
332 }
333 }
334 }
336 return (ubbest);
337 }
339 /*
340 * Read in a block and put its uncompressed data in buf.
341 *
342 * Return:
343 * 0 - success
344 * errnum - failure
345 */
346 static int
347 zio_read(fsi_file_t *ffi, blkptr_t *bp, void *buf, char *stack)
348 {
349 uint64_t offset, sector;
350 int psize, lsize;
351 int i, comp, cksum;
353 psize = BP_GET_PSIZE(bp);
354 lsize = BP_GET_LSIZE(bp);
355 comp = BP_GET_COMPRESS(bp);
356 cksum = BP_GET_CHECKSUM(bp);
358 if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS ||
359 (comp != ZIO_COMPRESS_OFF &&
360 decomp_table[comp].decomp_func == NULL))
361 return (ERR_FSYS_CORRUPT);
363 /* pick a good dva from the block pointer */
364 for (i = 0; i < SPA_DVAS_PER_BP; i++) {
366 if (bp->blk_dva[i].dva_word[0] == 0 &&
367 bp->blk_dva[i].dva_word[1] == 0)
368 continue;
370 /* read in a block */
371 offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
372 sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
374 if (comp != ZIO_COMPRESS_OFF) {
376 if (devread(ffi, sector, 0, psize, stack) == 0)
377 continue;
378 if (zio_checksum_verify(bp, stack, psize) != 0)
379 continue;
380 decomp_table[comp].decomp_func(stack, buf, psize,
381 lsize);
382 } else {
383 if (devread(ffi, sector, 0, psize, buf) == 0)
384 continue;
385 if (zio_checksum_verify(bp, buf, psize) != 0)
386 continue;
387 }
388 return (0);
389 }
391 return (ERR_FSYS_CORRUPT);
392 }
394 /*
395 * Get the block from a block id.
396 * push the block onto the stack.
397 *
398 * Return:
399 * 0 - success
400 * errnum - failure
401 */
402 static int
403 dmu_read(fsi_file_t *ffi, dnode_phys_t *dn, uint64_t blkid, void *buf,
404 char *stack)
405 {
406 int idx, level;
407 blkptr_t *bp_array = dn->dn_blkptr;
408 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
409 blkptr_t *bp, *tmpbuf;
411 bp = (blkptr_t *)stack;
412 stack += sizeof (blkptr_t);
414 tmpbuf = (blkptr_t *)stack;
415 stack += 1<<dn->dn_indblkshift;
417 for (level = dn->dn_nlevels - 1; level >= 0; level--) {
418 idx = (blkid >> (epbs * level)) & ((1<<epbs)-1);
419 *bp = bp_array[idx];
420 if (level == 0)
421 tmpbuf = buf;
422 if (BP_IS_HOLE(bp)) {
423 grub_memset(buf, 0,
424 dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
425 break;
426 } else if ((errnum = zio_read(ffi, bp, tmpbuf, stack))) {
427 return (errnum);
428 }
429 bp_array = tmpbuf;
430 }
432 return (0);
433 }
435 /*
436 * mzap_lookup: Looks up property described by "name" and returns the value
437 * in "value".
438 *
439 * Return:
440 * 0 - success
441 * errnum - failure
442 */
443 static int
444 mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name,
445 uint64_t *value)
446 {
447 int i, chunks;
448 mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
450 chunks = objsize/MZAP_ENT_LEN - 1;
451 for (i = 0; i < chunks; i++) {
452 if (strcmp(mzap_ent[i].mze_name, name) == 0) {
453 *value = mzap_ent[i].mze_value;
454 return (0);
455 }
456 }
458 return (ERR_FSYS_CORRUPT);
459 }
461 static uint64_t
462 zap_hash(fsi_file_t *ffi, uint64_t salt, const char *name)
463 {
464 static uint64_t table[256];
465 const uint8_t *cp;
466 uint8_t c;
467 uint64_t crc = salt;
469 if (table[128] == 0) {
470 uint64_t *ct;
471 int i, j;
472 for (i = 0; i < 256; i++) {
473 for (ct = table + i, *ct = i, j = 8; j > 0; j--)
474 *ct = (*ct >> 1) ^ (-(*ct & 1) &
475 ZFS_CRC64_POLY);
476 }
477 }
479 if (crc == 0 || table[128] != ZFS_CRC64_POLY) {
480 errnum = ERR_FSYS_CORRUPT;
481 return (0);
482 }
484 for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++)
485 crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF];
487 /*
488 * Only use 28 bits, since we need 4 bits in the cookie for the
489 * collision differentiator. We MUST use the high bits, since
490 * those are the onces that we first pay attention to when
491 * chosing the bucket.
492 */
493 crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
495 return (crc);
496 }
498 /*
499 * Only to be used on 8-bit arrays.
500 * array_len is actual len in bytes (not encoded le_value_length).
501 * buf is null-terminated.
502 */
503 static int
504 zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk,
505 int array_len, const char *buf)
506 {
507 int bseen = 0;
509 while (bseen < array_len) {
510 struct zap_leaf_array *la =
511 &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array;
512 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
514 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
515 return (0);
517 if (zfs_bcmp(la->la_array, buf + bseen, toread) != 0)
518 break;
519 chunk = la->la_next;
520 bseen += toread;
521 }
522 return (bseen == array_len);
523 }
525 /*
526 * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
527 * value for the property "name".
528 *
529 * Return:
530 * 0 - success
531 * errnum - failure
532 */
533 static int
534 zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h,
535 const char *name, uint64_t *value)
536 {
537 uint16_t chunk;
538 struct zap_leaf_entry *le;
540 /* Verify if this is a valid leaf block */
541 if (l->l_hdr.lh_block_type != ZBT_LEAF)
542 return (ERR_FSYS_CORRUPT);
543 if (l->l_hdr.lh_magic != ZAP_LEAF_MAGIC)
544 return (ERR_FSYS_CORRUPT);
546 for (chunk = l->l_hash[LEAF_HASH(blksft, h)];
547 chunk != CHAIN_END; chunk = le->le_next) {
549 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
550 return (ERR_FSYS_CORRUPT);
552 le = ZAP_LEAF_ENTRY(l, blksft, chunk);
554 /* Verify the chunk entry */
555 if (le->le_type != ZAP_CHUNK_ENTRY)
556 return (ERR_FSYS_CORRUPT);
558 if (le->le_hash != h)
559 continue;
561 if (zap_leaf_array_equal(l, blksft, le->le_name_chunk,
562 le->le_name_length, name)) {
564 struct zap_leaf_array *la;
565 uint8_t *ip;
567 if (le->le_int_size != 8 || le->le_value_length != 1)
568 return (ERR_FSYS_CORRUPT);
570 /* get the uint64_t property value */
571 la = &ZAP_LEAF_CHUNK(l, blksft,
572 le->le_value_chunk).l_array;
573 ip = la->la_array;
575 *value = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
576 (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
577 (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
578 (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
580 return (0);
581 }
582 }
584 return (ERR_FSYS_CORRUPT);
585 }
587 /*
588 * Fat ZAP lookup
589 *
590 * Return:
591 * 0 - success
592 * errnum - failure
593 */
594 static int
595 fzap_lookup(fsi_file_t *ffi, dnode_phys_t *zap_dnode, zap_phys_t *zap,
596 char *name, uint64_t *value, char *stack)
597 {
598 zap_leaf_phys_t *l;
599 uint64_t hash, idx, blkid;
600 int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT);
602 /* Verify if this is a fat zap header block */
603 if (zap->zap_magic != (uint64_t)ZAP_MAGIC)
604 return (ERR_FSYS_CORRUPT);
606 hash = zap_hash(ffi, zap->zap_salt, name);
607 if (errnum)
608 return (errnum);
610 /* get block id from index */
611 if (zap->zap_ptrtbl.zt_numblks != 0) {
612 /* external pointer tables not supported */
613 return (ERR_FSYS_CORRUPT);
614 }
615 idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift);
616 blkid = ((uint64_t *)zap)[idx + (1<<(blksft-3-1))];
618 /* Get the leaf block */
619 l = (zap_leaf_phys_t *)stack;
620 stack += 1<<blksft;
621 if ((errnum = dmu_read(ffi, zap_dnode, blkid, l, stack)))
622 return (errnum);
624 return (zap_leaf_lookup(l, blksft, hash, name, value));
625 }
627 /*
628 * Read in the data of a zap object and find the value for a matching
629 * property name.
630 *
631 * Return:
632 * 0 - success
633 * errnum - failure
634 */
635 static int
636 zap_lookup(fsi_file_t *ffi, dnode_phys_t *zap_dnode, char *name,
637 uint64_t *val, char *stack)
638 {
639 uint64_t block_type;
640 int size;
641 void *zapbuf;
643 /* Read in the first block of the zap object data. */
644 zapbuf = stack;
645 size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
646 stack += size;
647 if ((errnum = dmu_read(ffi, zap_dnode, 0, zapbuf, stack)))
648 return (errnum);
650 block_type = *((uint64_t *)zapbuf);
652 if (block_type == ZBT_MICRO) {
653 return (mzap_lookup(zapbuf, size, name, val));
654 } else if (block_type == ZBT_HEADER) {
655 /* this is a fat zap */
656 return (fzap_lookup(ffi, zap_dnode, zapbuf, name,
657 val, stack));
658 }
660 return (ERR_FSYS_CORRUPT);
661 }
663 /*
664 * Get the dnode of an object number from the metadnode of an object set.
665 *
666 * Input
667 * mdn - metadnode to get the object dnode
668 * objnum - object number for the object dnode
669 * buf - data buffer that holds the returning dnode
670 * stack - scratch area
671 *
672 * Return:
673 * 0 - success
674 * errnum - failure
675 */
676 static int
677 dnode_get(fsi_file_t *ffi, dnode_phys_t *mdn, uint64_t objnum,
678 uint8_t type, dnode_phys_t *buf, char *stack)
679 {
680 uint64_t blkid, blksz; /* the block id this object dnode is in */
681 int epbs; /* shift of number of dnodes in a block */
682 int idx; /* index within a block */
683 dnode_phys_t *dnbuf;
684 zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data;
686 blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT;
687 epbs = zfs_log2(blksz) - DNODE_SHIFT;
688 blkid = objnum >> epbs;
689 idx = objnum & ((1<<epbs)-1);
691 if (dnode_buf != NULL && dnode_mdn == mdn &&
692 objnum >= dnode_start && objnum < dnode_end) {
693 grub_memmove(buf, &dnode_buf[idx], DNODE_SIZE);
694 VERIFY_DN_TYPE(buf, type);
695 return (0);
696 }
698 if (dnode_buf && blksz == 1<<DNODE_BLOCK_SHIFT) {
699 dnbuf = dnode_buf;
700 dnode_mdn = mdn;
701 dnode_start = blkid << epbs;
702 dnode_end = (blkid + 1) << epbs;
703 } else {
704 dnbuf = (dnode_phys_t *)stack;
705 stack += blksz;
706 }
708 if ((errnum = dmu_read(ffi, mdn, blkid, (char *)dnbuf, stack)))
709 return (errnum);
711 grub_memmove(buf, &dnbuf[idx], DNODE_SIZE);
712 VERIFY_DN_TYPE(buf, type);
714 return (0);
715 }
717 /*
718 * Check if this is a special file that resides at the top
719 * dataset of the pool. Currently this is the GRUB menu,
720 * boot signature and boot signature backup.
721 * str starts with '/'.
722 */
723 static int
724 is_top_dataset_file(char *str)
725 {
726 char *tptr;
728 if (((tptr = strstr(str, "menu.lst"))) &&
729 (tptr[8] == '\0' || tptr[8] == ' ') &&
730 *(tptr-1) == '/')
731 return (1);
733 if (strncmp(str, BOOTSIGN_DIR"/",
734 strlen(BOOTSIGN_DIR) + 1) == 0)
735 return (1);
737 if (strcmp(str, BOOTSIGN_BACKUP) == 0)
738 return (1);
740 return (0);
741 }
743 /*
744 * Get the file dnode for a given file name where mdn is the meta dnode
745 * for this ZFS object set. When found, place the file dnode in dn.
746 * The 'path' argument will be mangled.
747 *
748 * Return:
749 * 0 - success
750 * errnum - failure
751 */
752 static int
753 dnode_get_path(fsi_file_t *ffi, dnode_phys_t *mdn, char *path,
754 dnode_phys_t *dn, char *stack)
755 {
756 uint64_t objnum, version;
757 char *cname, ch;
759 if ((errnum = dnode_get(ffi, mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE,
760 dn, stack)))
761 return (errnum);
763 if ((errnum = zap_lookup(ffi, dn, ZPL_VERSION_STR, &version, stack)))
764 return (errnum);
765 if (version > ZPL_VERSION)
766 return (-1);
768 if ((errnum = zap_lookup(ffi, dn, ZFS_ROOT_OBJ, &objnum, stack)))
769 return (errnum);
771 if ((errnum = dnode_get(ffi, mdn, objnum, DMU_OT_DIRECTORY_CONTENTS,
772 dn, stack)))
773 return (errnum);
775 /* skip leading slashes */
776 while (*path == '/')
777 path++;
779 while (*path && !isspace((uint8_t)*path)) {
781 /* get the next component name */
782 cname = path;
783 while (*path && !isspace((uint8_t)*path) && *path != '/')
784 path++;
785 ch = *path;
786 *path = 0; /* ensure null termination */
788 if ((errnum = zap_lookup(ffi, dn, cname, &objnum, stack)))
789 return (errnum);
791 objnum = ZFS_DIRENT_OBJ(objnum);
792 if ((errnum = dnode_get(ffi, mdn, objnum, 0, dn, stack)))
793 return (errnum);
795 *path = ch;
796 while (*path == '/')
797 path++;
798 }
800 /* We found the dnode for this file. Verify if it is a plain file. */
801 VERIFY_DN_TYPE(dn, DMU_OT_PLAIN_FILE_CONTENTS);
803 return (0);
804 }
806 /*
807 * Get the default 'bootfs' property value from the rootpool.
808 *
809 * Return:
810 * 0 - success
811 * errnum -failure
812 */
813 static int
814 get_default_bootfsobj(fsi_file_t *ffi, dnode_phys_t *mosmdn,
815 uint64_t *obj, char *stack)
816 {
817 uint64_t objnum = 0;
818 dnode_phys_t *dn = (dnode_phys_t *)stack;
819 stack += DNODE_SIZE;
821 if ((errnum = dnode_get(ffi, mosmdn, DMU_POOL_DIRECTORY_OBJECT,
822 DMU_OT_OBJECT_DIRECTORY, dn, stack)))
823 return (errnum);
825 /*
826 * find the object number for 'pool_props', and get the dnode
827 * of the 'pool_props'.
828 */
829 if (zap_lookup(ffi, dn, DMU_POOL_PROPS, &objnum, stack))
830 return (ERR_FILESYSTEM_NOT_FOUND);
832 if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_POOL_PROPS, dn,
833 stack)))
834 return (errnum);
836 if (zap_lookup(ffi, dn, ZPOOL_PROP_BOOTFS, &objnum, stack))
837 return (ERR_FILESYSTEM_NOT_FOUND);
839 if (!objnum)
840 return (ERR_FILESYSTEM_NOT_FOUND);
843 *obj = objnum;
844 return (0);
845 }
847 /*
848 * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
849 * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
850 * of pool/rootfs.
851 *
852 * If no fsname and no obj are given, return the DSL_DIR metadnode.
853 * If fsname is given, return its metadnode and its matching object number.
854 * If only obj is given, return the metadnode for this object number.
855 *
856 * Return:
857 * 0 - success
858 * errnum - failure
859 */
860 static int
861 get_objset_mdn(fsi_file_t *ffi, dnode_phys_t *mosmdn, char *fsname,
862 uint64_t *obj, dnode_phys_t *mdn, char *stack)
863 {
864 uint64_t objnum, headobj;
865 char *cname, ch;
866 blkptr_t *bp;
867 objset_phys_t *osp;
869 if (fsname == NULL && obj) {
870 headobj = *obj;
871 goto skip;
872 }
874 if ((errnum = dnode_get(ffi, mosmdn, DMU_POOL_DIRECTORY_OBJECT,
875 DMU_OT_OBJECT_DIRECTORY, mdn, stack)))
876 return (errnum);
878 if ((errnum = zap_lookup(ffi, mdn, DMU_POOL_ROOT_DATASET, &objnum,
879 stack)))
880 return (errnum);
882 if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_DSL_DIR, mdn,
883 stack)))
884 return (errnum);
886 if (fsname == NULL) {
887 headobj =
888 ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
889 goto skip;
890 }
892 /* take out the pool name */
893 while (*fsname && !isspace((uint8_t)*fsname) && *fsname != '/')
894 fsname++;
896 while (*fsname && !isspace((uint8_t)*fsname)) {
897 uint64_t childobj;
899 while (*fsname == '/')
900 fsname++;
902 cname = fsname;
903 while (*fsname && !isspace((uint8_t)*fsname) && *fsname != '/')
904 fsname++;
905 ch = *fsname;
906 *fsname = 0;
908 childobj =
909 ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj;
910 if ((errnum = dnode_get(ffi, mosmdn, childobj,
911 DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack)))
912 return (errnum);
914 if (zap_lookup(ffi, mdn, cname, &objnum, stack))
915 return (ERR_FILESYSTEM_NOT_FOUND);
917 if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_DSL_DIR,
918 mdn, stack)))
919 return (errnum);
921 *fsname = ch;
922 }
923 headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
924 if (obj)
925 *obj = headobj;
927 skip:
928 if ((errnum = dnode_get(ffi, mosmdn, headobj, DMU_OT_DSL_DATASET, mdn,
929 stack)))
930 return (errnum);
932 /* TODO: Add snapshot support here - for fsname=snapshot-name */
934 bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp;
935 osp = (objset_phys_t *)stack;
936 stack += sizeof (objset_phys_t);
937 if ((errnum = zio_read(ffi, bp, osp, stack)))
938 return (errnum);
940 grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE);
942 return (0);
943 }
945 /*
946 * For a given XDR packed nvlist, verify the first 4 bytes and move on.
947 *
948 * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
949 *
950 * encoding method/host endian (4 bytes)
951 * nvl_version (4 bytes)
952 * nvl_nvflag (4 bytes)
953 * encoded nvpairs:
954 * encoded size of the nvpair (4 bytes)
955 * decoded size of the nvpair (4 bytes)
956 * name string size (4 bytes)
957 * name string data (sizeof(NV_ALIGN4(string))
958 * data type (4 bytes)
959 * # of elements in the nvpair (4 bytes)
960 * data
961 * 2 zero's for the last nvpair
962 * (end of the entire list) (8 bytes)
963 *
964 * Return:
965 * 0 - success
966 * 1 - failure
967 */
968 static int
969 nvlist_unpack(char *nvlist, char **out)
970 {
971 /* Verify if the 1st and 2nd byte in the nvlist are valid. */
972 if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN)
973 return (1);
975 nvlist += 4;
976 *out = nvlist;
977 return (0);
978 }
980 static char *
981 nvlist_array(char *nvlist, int index)
982 {
983 int i, encode_size;
985 for (i = 0; i < index; i++) {
986 /* skip the header, nvl_version, and nvl_nvflag */
987 nvlist = nvlist + 4 * 2;
989 while ((encode_size = BSWAP_32(*(uint32_t *)nvlist)))
990 nvlist += encode_size; /* goto the next nvpair */
992 nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */
993 }
995 return (nvlist);
996 }
998 static int
999 nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype,
1000 int *nelmp)
1002 int name_len, type, slen, encode_size;
1003 char *nvpair, *nvp_name, *strval = val;
1004 uint64_t *intval = val;
1006 /* skip the header, nvl_version, and nvl_nvflag */
1007 nvlist = nvlist + 4 * 2;
1009 /*
1010 * Loop thru the nvpair list
1011 * The XDR representation of an integer is in big-endian byte order.
1012 */
1013 while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) {
1015 nvpair = nvlist + 4 * 2; /* skip the encode/decode size */
1017 name_len = BSWAP_32(*(uint32_t *)nvpair);
1018 nvpair += 4;
1020 nvp_name = nvpair;
1021 nvpair = nvpair + ((name_len + 3) & ~3); /* align */
1023 type = BSWAP_32(*(uint32_t *)nvpair);
1024 nvpair += 4;
1026 if (((strncmp(nvp_name, name, name_len) == 0) &&
1027 type == valtype)) {
1028 int nelm;
1030 if (((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1))
1031 return (1);
1032 nvpair += 4;
1034 switch (valtype) {
1035 case DATA_TYPE_STRING:
1036 slen = BSWAP_32(*(uint32_t *)nvpair);
1037 nvpair += 4;
1038 grub_memmove(strval, nvpair, slen);
1039 strval[slen] = '\0';
1040 return (0);
1042 case DATA_TYPE_UINT64:
1043 *intval = BSWAP_64(*(uint64_t *)nvpair);
1044 return (0);
1046 case DATA_TYPE_NVLIST:
1047 *(void **)val = (void *)nvpair;
1048 return (0);
1050 case DATA_TYPE_NVLIST_ARRAY:
1051 *(void **)val = (void *)nvpair;
1052 if (nelmp)
1053 *nelmp = nelm;
1054 return (0);
1058 nvlist += encode_size; /* goto the next nvpair */
1061 return (1);
1064 /*
1065 * Check if this vdev is online and is in a good state.
1066 */
1067 static int
1068 vdev_validate(char *nv)
1070 uint64_t ival;
1072 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival,
1073 DATA_TYPE_UINT64, NULL) == 0 ||
1074 nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival,
1075 DATA_TYPE_UINT64, NULL) == 0 ||
1076 nvlist_lookup_value(nv, ZPOOL_CONFIG_DEGRADED, &ival,
1077 DATA_TYPE_UINT64, NULL) == 0 ||
1078 nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival,
1079 DATA_TYPE_UINT64, NULL) == 0)
1080 return (ERR_DEV_VALUES);
1082 return (0);
1085 /*
1086 * Get a list of valid vdev pathname from the boot device.
1087 * The caller should already allocate MAXNAMELEN memory for bootpath.
1088 */
1089 static int
1090 vdev_get_bootpath(char *nv, char *bootpath)
1092 char type[16];
1094 bootpath[0] = '\0';
1095 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING,
1096 NULL))
1097 return (ERR_FSYS_CORRUPT);
1099 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1100 if (vdev_validate(nv) != 0 ||
1101 nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, bootpath,
1102 DATA_TYPE_STRING, NULL) != 0)
1103 return (ERR_NO_BOOTPATH);
1105 } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
1106 int nelm, i;
1107 char *child;
1109 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child,
1110 DATA_TYPE_NVLIST_ARRAY, &nelm))
1111 return (ERR_FSYS_CORRUPT);
1113 for (i = 0; i < nelm; i++) {
1114 char tmp_path[MAXNAMELEN];
1115 char *child_i;
1117 child_i = nvlist_array(child, i);
1118 if (vdev_validate(child_i) != 0)
1119 continue;
1121 if (nvlist_lookup_value(child_i, ZPOOL_CONFIG_PHYS_PATH,
1122 tmp_path, DATA_TYPE_STRING, NULL) != 0)
1123 return (ERR_NO_BOOTPATH);
1125 if ((strlen(bootpath) + strlen(tmp_path)) > MAXNAMELEN)
1126 return (ERR_WONT_FIT);
1128 if (strlen(bootpath) == 0)
1129 sprintf(bootpath, "%s", tmp_path);
1130 else
1131 sprintf(bootpath, "%s %s", bootpath, tmp_path);
1135 return (strlen(bootpath) > 0 ? 0 : ERR_NO_BOOTPATH);
1138 /*
1139 * Check the disk label information and retrieve needed vdev name-value pairs.
1141 * Return:
1142 * 0 - success
1143 * ERR_* - failure
1144 */
1145 static int
1146 check_pool_label(fsi_file_t *ffi, int label, char *stack)
1148 vdev_phys_t *vdev;
1149 uint64_t sector, pool_state, txg = 0;
1150 char *nvlist, *nv;
1151 zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data;
1153 sector = (label * sizeof (vdev_label_t) + VDEV_SKIP_SIZE +
1154 VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT;
1156 /* Read in the vdev name-value pair list (112K). */
1157 if (devread(ffi, sector, 0, VDEV_PHYS_SIZE, stack) == 0)
1158 return (ERR_READ);
1160 vdev = (vdev_phys_t *)stack;
1162 if (nvlist_unpack(vdev->vp_nvlist, &nvlist))
1163 return (ERR_FSYS_CORRUPT);
1165 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state,
1166 DATA_TYPE_UINT64, NULL))
1167 return (ERR_FSYS_CORRUPT);
1169 if (pool_state == POOL_STATE_DESTROYED)
1170 return (ERR_FILESYSTEM_NOT_FOUND);
1172 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME,
1173 current_rootpool, DATA_TYPE_STRING, NULL))
1174 return (ERR_FSYS_CORRUPT);
1176 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg,
1177 DATA_TYPE_UINT64, NULL))
1178 return (ERR_FSYS_CORRUPT);
1180 /* not an active device */
1181 if (txg == 0)
1182 return (ERR_NO_BOOTPATH);
1184 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
1185 DATA_TYPE_NVLIST, NULL))
1186 return (ERR_FSYS_CORRUPT);
1188 if (vdev_get_bootpath(nv, current_bootpath))
1189 return (ERR_NO_BOOTPATH);
1191 return (0);
1194 /*
1195 * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
1196 * to the memory address MOS.
1198 * Return:
1199 * 1 - success
1200 * 0 - failure
1201 */
1202 static int
1203 zfs_mount(fsi_file_t *ffi, const char *options)
1205 char *stack;
1206 int label = 0;
1207 uberblock_phys_t *ub_array, *ubbest = NULL;
1208 objset_phys_t *osp;
1209 zfs_bootarea_t *zfs_ba;
1211 /* if zfs is already mounted, don't do it again */
1212 if (is_zfs_mount == 1)
1213 return (1);
1215 /* get much bigger data block for zfs */
1216 if (((zfs_ba = malloc(sizeof (zfs_bootarea_t))) == NULL)) {
1217 return (1);
1219 bzero(zfs_ba, sizeof (zfs_bootarea_t));
1221 /* replace small data area in fsi with big one */
1222 free(ffi->ff_fsi->f_data);
1223 ffi->ff_fsi->f_data = (void *)zfs_ba;
1225 /* If an boot filesystem is passed in, set it to current_bootfs */
1226 if (options != NULL) {
1227 if (strlen(options) < MAXNAMELEN) {
1228 strcpy(current_bootfs, options);
1232 stackbase = ZFS_SCRATCH;
1233 stack = stackbase;
1234 ub_array = (uberblock_phys_t *)stack;
1235 stack += VDEV_UBERBLOCK_RING;
1237 osp = (objset_phys_t *)stack;
1238 stack += sizeof (objset_phys_t);
1240 /* XXX add back labels support? */
1241 for (label = 0; ubbest == NULL && label < (VDEV_LABELS/2); label++) {
1242 uint64_t sector = (label * sizeof (vdev_label_t) +
1243 VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE +
1244 VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT;
1247 /* Read in the uberblock ring (128K). */
1248 if (devread(ffi, sector, 0, VDEV_UBERBLOCK_RING,
1249 (char *)ub_array) == 0)
1250 continue;
1252 if ((ubbest = find_bestub(ffi, ub_array, label)) != NULL &&
1253 zio_read(ffi, &ubbest->ubp_uberblock.ub_rootbp, osp, stack)
1254 == 0) {
1256 VERIFY_OS_TYPE(osp, DMU_OST_META);
1258 /* Got the MOS. Save it at the memory addr MOS. */
1259 grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE);
1261 if (check_pool_label(ffi, label, stack))
1262 return (0);
1264 /*
1265 * Copy fsi->f_data to ffi->ff_data since
1266 * fsig_mount copies from ff_data to f_data
1267 * overwriting fsi->f_data.
1268 */
1269 bcopy(zfs_ba, fsig_file_buf(ffi), FSYS_BUFLEN);
1271 is_zfs_mount = 1;
1272 return (1);
1276 return (0);
1279 /*
1280 * zfs_open() locates a file in the rootpool by following the
1281 * MOS and places the dnode of the file in the memory address DNODE.
1283 * Return:
1284 * 1 - success
1285 * 0 - failure
1286 */
1287 static int
1288 zfs_open(fsi_file_t *ffi, char *filename)
1290 char *stack;
1291 dnode_phys_t *mdn;
1292 char *bootstring;
1293 zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data;
1295 file_buf = NULL;
1296 stackbase = ZFS_SCRATCH;
1297 stack = stackbase;
1299 mdn = (dnode_phys_t *)stack;
1300 stack += sizeof (dnode_phys_t);
1302 dnode_mdn = NULL;
1303 dnode_buf = (dnode_phys_t *)stack;
1304 stack += 1<<DNODE_BLOCK_SHIFT;
1306 /*
1307 * menu.lst is placed at the root pool filesystem level,
1308 * do not goto 'current_bootfs'.
1309 */
1310 if (is_top_dataset_file(filename)) {
1311 if ((errnum = get_objset_mdn(ffi, MOS, NULL, NULL, mdn, stack)))
1312 return (0);
1314 current_bootfs_obj = 0;
1315 } else {
1316 if (current_bootfs[0] == '\0') {
1317 /* Get the default root filesystem object number */
1318 if ((errnum = get_default_bootfsobj(ffi, MOS,
1319 &current_bootfs_obj, stack)))
1320 return (0);
1321 if ((errnum = get_objset_mdn(ffi, MOS, NULL,
1322 &current_bootfs_obj, mdn, stack)))
1323 return (0);
1324 } else {
1325 if ((errnum = get_objset_mdn(ffi, MOS,
1326 current_bootfs, &current_bootfs_obj, mdn, stack)))
1327 return (0);
1330 /*
1331 * Put zfs rootpool and boot obj number into bootstring.
1332 */
1333 if (is_zfs_open == 0) {
1334 char temp[25]; /* needs to hold long long */
1335 int alloc_size;
1336 char zfs_bootstr[] = "zfs-bootfs=";
1337 char zfs_bootpath[] = ",bootpath='";
1339 snprintf(temp, sizeof(temp), "%llu", (unsigned long long)
1340 current_bootfs_obj);
1341 alloc_size = strlen(zfs_bootstr) +
1342 strlen(current_rootpool) +
1343 strlen(temp) + strlen(zfs_bootpath) +
1344 strlen(current_bootpath) + 3;
1345 bootstring = fsi_bootstring_alloc(ffi->ff_fsi,
1346 alloc_size);
1347 if (bootstring != NULL) {
1348 strcpy(bootstring, zfs_bootstr);
1349 strcat(bootstring, current_rootpool);
1350 strcat(bootstring, "/");
1351 strcat(bootstring, temp);
1352 strcat(bootstring, zfs_bootpath);
1353 strcat(bootstring, current_bootpath);
1354 strcat(bootstring, "'");
1355 is_zfs_open = 1;
1360 if (dnode_get_path(ffi, mdn, filename, DNODE, stack)) {
1361 errnum = ERR_FILE_NOT_FOUND;
1362 return (0);
1365 /* get the file size and set the file position to 0 */
1366 filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size;
1367 filepos = 0;
1369 dnode_buf = NULL;
1370 return (1);
1373 /*
1374 * zfs_read reads in the data blocks pointed by the DNODE.
1376 * Return:
1377 * len - the length successfully read in to the buffer
1378 * 0 - failure
1379 */
1380 static int
1381 zfs_read(fsi_file_t *ffi, char *buf, int len)
1383 char *stack;
1384 int blksz, length, movesize;
1385 zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data;
1387 if (file_buf == NULL) {
1388 file_buf = stackbase;
1389 stackbase += SPA_MAXBLOCKSIZE;
1390 file_start = file_end = 0;
1392 stack = stackbase;
1394 /*
1395 * If offset is in memory, move it into the buffer provided and return.
1396 */
1397 if (filepos >= file_start && filepos+len <= file_end) {
1398 grub_memmove(buf, file_buf + filepos - file_start, len);
1399 filepos += len;
1400 return (len);
1403 blksz = DNODE->dn_datablkszsec << SPA_MINBLOCKSHIFT;
1405 /*
1406 * Entire Dnode is too big to fit into the space available. We
1407 * will need to read it in chunks. This could be optimized to
1408 * read in as large a chunk as there is space available, but for
1409 * now, this only reads in one data block at a time.
1410 */
1411 length = len;
1412 while (length) {
1413 /*
1414 * Find requested blkid and the offset within that block.
1415 */
1416 uint64_t blkid = filepos / blksz;
1418 if ((errnum = dmu_read(ffi, DNODE, blkid, file_buf, stack)))
1419 return (0);
1421 file_start = blkid * blksz;
1422 file_end = file_start + blksz;
1424 movesize = MIN(length, file_end - filepos);
1426 grub_memmove(buf, file_buf + filepos - file_start,
1427 movesize);
1428 buf += movesize;
1429 length -= movesize;
1430 filepos += movesize;
1433 return (len);
1436 /*
1437 * No-Op
1438 */
1439 int
1440 zfs_embed(int *start_sector, int needed_sectors)
1442 return (1);
1445 fsi_plugin_ops_t *
1446 fsi_init_plugin(int version, fsi_plugin_t *fp, const char **name)
1448 static fsig_plugin_ops_t ops = {
1449 FSIMAGE_PLUGIN_VERSION,
1450 .fpo_mount = zfs_mount,
1451 .fpo_dir = zfs_open,
1452 .fpo_read = zfs_read
1453 };
1455 *name = "zfs";
1456 return (fsig_init(fp, &ops));