ia64/xen-unstable

view tools/libfsimage/reiserfs/fsys_reiserfs.c @ 19648:f0e2df69a8eb

x86 hvm: Allow cross-vendor migration

Intercept #UD and emulate SYSCALL/SYSENTER/SYSEXIT as necessary.

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 15:01:36 2009 +0100 (2009-05-26)
parents 4b882c41c9b9
children
line source
1 /* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */
2 /*
3 * GRUB -- GRand Unified Bootloader
4 * Copyright (C) 2000, 2001 Free Software Foundation, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
21 #include <fsimage_grub.h>
23 #undef REISERDEBUG
25 /* Some parts of this code (mainly the structures and defines) are
26 * from the original reiser fs code, as found in the linux kernel.
27 */
29 /* include/asm-i386/types.h */
30 typedef __signed__ char __s8;
31 typedef unsigned char __u8;
32 typedef __signed__ short __s16;
33 typedef unsigned short __u16;
34 typedef __signed__ int __s32;
35 typedef unsigned int __u32;
36 typedef unsigned long long __u64;
38 /* linux/posix_type.h */
39 typedef long linux_off_t;
41 /* linux/little_endian.h */
42 #define __cpu_to_le64(x) ((__u64) (x))
43 #define __le64_to_cpu(x) ((__u64) (x))
44 #define __cpu_to_le32(x) ((__u32) (x))
45 #define __le32_to_cpu(x) ((__u32) (x))
46 #define __cpu_to_le16(x) ((__u16) (x))
47 #define __le16_to_cpu(x) ((__u16) (x))
49 /* include/linux/reiser_fs.h */
50 /* This is the new super block of a journaling reiserfs system */
51 struct reiserfs_super_block
52 {
53 __u32 s_block_count; /* blocks count */
54 __u32 s_free_blocks; /* free blocks count */
55 __u32 s_root_block; /* root block number */
56 __u32 s_journal_block; /* journal block number */
57 __u32 s_journal_dev; /* journal device number */
58 __u32 s_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */
59 __u32 s_journal_trans_max; /* max number of blocks in a transaction. */
60 __u32 s_journal_magic; /* random value made on fs creation */
61 __u32 s_journal_max_batch; /* max number of blocks to batch into a trans */
62 __u32 s_journal_max_commit_age; /* in seconds, how old can an async commit be */
63 __u32 s_journal_max_trans_age; /* in seconds, how old can a transaction be */
64 __u16 s_blocksize; /* block size */
65 __u16 s_oid_maxsize; /* max size of object id array */
66 __u16 s_oid_cursize; /* current size of object id array */
67 __u16 s_state; /* valid or error */
68 char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */
69 __u16 s_tree_height; /* height of disk tree */
70 __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */
71 __u16 s_version;
72 char s_unused[128]; /* zero filled by mkreiserfs */
73 };
75 #define REISERFS_MAX_SUPPORTED_VERSION 2
76 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
77 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
78 #define REISER3FS_SUPER_MAGIC_STRING "ReIsEr3Fs"
80 #define MAX_HEIGHT 7
82 /* must be correct to keep the desc and commit structs at 4k */
83 #define JOURNAL_TRANS_HALF 1018
85 /* first block written in a commit. */
86 struct reiserfs_journal_desc {
87 __u32 j_trans_id; /* id of commit */
88 __u32 j_len; /* length of commit. len +1 is the commit block */
89 __u32 j_mount_id; /* mount id of this trans*/
90 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */
91 char j_magic[12];
92 };
94 /* last block written in a commit */
95 struct reiserfs_journal_commit {
96 __u32 j_trans_id; /* must match j_trans_id from the desc block */
97 __u32 j_len; /* ditto */
98 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */
99 char j_digest[16]; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */
100 };
102 /* this header block gets written whenever a transaction is considered
103 fully flushed, and is more recent than the last fully flushed
104 transaction.
105 fully flushed means all the log blocks and all the real blocks are
106 on disk, and this transaction does not need to be replayed.
107 */
108 struct reiserfs_journal_header {
109 /* id of last fully flushed transaction */
110 __u32 j_last_flush_trans_id;
111 /* offset in the log of where to start replay after a crash */
112 __u32 j_first_unflushed_offset;
113 /* mount id to detect very old transactions */
114 __u32 j_mount_id;
115 };
117 /* magic string to find desc blocks in the journal */
118 #define JOURNAL_DESC_MAGIC "ReIsErLB"
121 /*
122 * directories use this key as well as old files
123 */
124 struct offset_v1
125 {
126 /*
127 * for regular files this is the offset to the first byte of the
128 * body, contained in the object-item, as measured from the start of
129 * the entire body of the object.
130 *
131 * for directory entries, k_offset consists of hash derived from
132 * hashing the name and using few bits (23 or more) of the resulting
133 * hash, and generation number that allows distinguishing names with
134 * hash collisions. If number of collisions overflows generation
135 * number, we return EEXIST. High order bit is 0 always
136 */
137 __u32 k_offset;
138 __u32 k_uniqueness;
139 };
141 struct offset_v2
142 {
143 /*
144 * for regular files this is the offset to the first byte of the
145 * body, contained in the object-item, as measured from the start of
146 * the entire body of the object.
147 *
148 * for directory entries, k_offset consists of hash derived from
149 * hashing the name and using few bits (23 or more) of the resulting
150 * hash, and generation number that allows distinguishing names with
151 * hash collisions. If number of collisions overflows generation
152 * number, we return EEXIST. High order bit is 0 always
153 */
154 __u64 k_offset:60;
155 __u64 k_type: 4;
156 };
159 struct key
160 {
161 /* packing locality: by default parent directory object id */
162 __u32 k_dir_id;
163 /* object identifier */
164 __u32 k_objectid;
165 /* the offset and node type (old and new form) */
166 union
167 {
168 struct offset_v1 v1;
169 struct offset_v2 v2;
170 }
171 u;
172 };
174 #define KEY_SIZE (sizeof (struct key))
176 /* Header of a disk block. More precisely, header of a formatted leaf
177 or internal node, and not the header of an unformatted node. */
178 struct block_head
179 {
180 __u16 blk_level; /* Level of a block in the tree. */
181 __u16 blk_nr_item; /* Number of keys/items in a block. */
182 __u16 blk_free_space; /* Block free space in bytes. */
183 struct key blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes
184 only) */
185 };
186 #define BLKH_SIZE (sizeof (struct block_head))
187 #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */
189 struct item_head
190 {
191 struct key ih_key; /* Everything in the tree is found by searching for it based on its key.*/
193 union
194 {
195 __u16 ih_free_space; /* The free space in the last unformatted node of an indirect item if this
196 is an indirect item. This equals 0xFFFF iff this is a direct item or
197 stat data item. Note that the key, not this field, is used to determine
198 the item type, and thus which field this union contains. */
199 __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory
200 entries in the directory item. */
201 }
202 u;
203 __u16 ih_item_len; /* total size of the item body */
204 __u16 ih_item_location; /* an offset to the item body within the block */
205 __u16 ih_version; /* ITEM_VERSION_1 for all old items,
206 ITEM_VERSION_2 for new ones.
207 Highest bit is set by fsck
208 temporary, cleaned after all done */
209 };
210 /* size of item header */
211 #define IH_SIZE (sizeof (struct item_head))
213 #define ITEM_VERSION_1 0
214 #define ITEM_VERSION_2 1
215 #define IH_KEY_OFFSET(ih) ((ih)->ih_version == ITEM_VERSION_1 \
216 ? (ih)->ih_key.u.v1.k_offset \
217 : (ih)->ih_key.u.v2.k_offset)
219 #define IH_KEY_ISTYPE(ih, type) ((ih)->ih_version == ITEM_VERSION_1 \
220 ? (ih)->ih_key.u.v1.k_uniqueness == V1_##type \
221 : (ih)->ih_key.u.v2.k_type == V2_##type)
223 struct disk_child
224 {
225 unsigned long dc_block_number; /* Disk child's block number. */
226 unsigned short dc_size; /* Disk child's used space. */
227 };
229 #define DC_SIZE (sizeof (struct disk_child))
231 /* Stat Data on disk.
232 *
233 * Note that reiserfs has two different forms of stat data. Luckily
234 * the fields needed by grub are at the same position.
235 */
236 struct stat_data
237 {
238 __u16 sd_mode; /* file type, permissions */
239 __u16 sd_notused1[3]; /* fields not needed by reiserfs */
240 __u32 sd_size; /* file size */
241 __u32 sd_size_hi; /* file size high 32 bits (since version 2) */
242 };
244 struct reiserfs_de_head
245 {
246 __u32 deh_offset; /* third component of the directory entry key */
247 __u32 deh_dir_id; /* objectid of the parent directory of the
248 object, that is referenced by directory entry */
249 __u32 deh_objectid;/* objectid of the object, that is referenced by
250 directory entry */
251 __u16 deh_location;/* offset of name in the whole item */
252 __u16 deh_state; /* whether 1) entry contains stat data (for
253 future), and 2) whether entry is hidden
254 (unlinked) */
255 };
257 #define DEH_SIZE (sizeof (struct reiserfs_de_head))
259 #define DEH_Statdata (1 << 0) /* not used now */
260 #define DEH_Visible (1 << 2)
262 #define SD_OFFSET 0
263 #define SD_UNIQUENESS 0
264 #define DOT_OFFSET 1
265 #define DOT_DOT_OFFSET 2
266 #define DIRENTRY_UNIQUENESS 500
268 #define V1_TYPE_STAT_DATA 0x0
269 #define V1_TYPE_DIRECT 0xffffffff
270 #define V1_TYPE_INDIRECT 0xfffffffe
271 #define V1_TYPE_DIRECTORY_MAX 0xfffffffd
272 #define V2_TYPE_STAT_DATA 0
273 #define V2_TYPE_INDIRECT 1
274 #define V2_TYPE_DIRECT 2
275 #define V2_TYPE_DIRENTRY 3
277 #define REISERFS_ROOT_OBJECTID 2
278 #define REISERFS_ROOT_PARENT_OBJECTID 1
279 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
280 /* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */
281 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
282 #define REISERFS_OLD_BLOCKSIZE 4096
284 #define S_ISREG(mode) (((mode) & 0170000) == 0100000)
285 #define S_ISDIR(mode) (((mode) & 0170000) == 0040000)
286 #define S_ISLNK(mode) (((mode) & 0170000) == 0120000)
288 #define PATH_MAX 1024 /* include/linux/limits.h */
289 #define MAX_LINK_COUNT 5 /* number of symbolic links to follow */
291 /* The size of the node cache */
292 #define FSYSREISER_CACHE_SIZE 24*1024
293 #define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE
294 #define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3
296 /* Info about currently opened file */
297 struct fsys_reiser_fileinfo
298 {
299 __u32 k_dir_id;
300 __u32 k_objectid;
301 };
303 /* In memory info about the currently mounted filesystem */
304 struct fsys_reiser_info
305 {
306 /* The last read item head */
307 struct item_head *current_ih;
308 /* The last read item */
309 char *current_item;
310 /* The information for the currently opened file */
311 struct fsys_reiser_fileinfo fileinfo;
312 /* The start of the journal */
313 __u32 journal_block;
314 /* The size of the journal */
315 __u32 journal_block_count;
316 /* The first valid descriptor block in journal
317 (relative to journal_block) */
318 __u32 journal_first_desc;
320 /* The ReiserFS version. */
321 __u16 version;
322 /* The current depth of the reiser tree. */
323 __u16 tree_depth;
324 /* SECTOR_SIZE << blocksize_shift == blocksize. */
325 __u8 blocksize_shift;
326 /* 1 << full_blocksize_shift == blocksize. */
327 __u8 fullblocksize_shift;
328 /* The reiserfs block size (must be a power of 2) */
329 __u16 blocksize;
330 /* The number of cached tree nodes */
331 __u16 cached_slots;
332 /* The number of valid transactions in journal */
333 __u16 journal_transactions;
335 unsigned int blocks[MAX_HEIGHT];
336 unsigned int next_key_nr[MAX_HEIGHT];
337 };
339 /* The cached s+tree blocks in FSYS_BUF, see below
340 * for a more detailed description.
341 */
342 #define ROOT ((char *) FSYS_BUF)
343 #define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift))
344 #define LEAF CACHE (DISK_LEAF_NODE_LEVEL)
346 #define BLOCKHEAD(cache) ((struct block_head *) cache)
347 #define ITEMHEAD ((struct item_head *) ((char *) LEAF + BLKH_SIZE))
348 #define KEY(cache) ((struct key *) ((char *) cache + BLKH_SIZE))
349 #define DC(cache) ((struct disk_child *) \
350 ((char *) cache + BLKH_SIZE + KEY_SIZE * nr_item))
351 /* The fsys_reiser_info block.
352 */
353 #define INFO \
354 ((struct fsys_reiser_info *) ((char *) FSYS_BUF + FSYSREISER_CACHE_SIZE))
355 /*
356 * The journal cache. For each transaction it contains the number of
357 * blocks followed by the real block numbers of this transaction.
358 *
359 * If the block numbers of some transaction won't fit in this space,
360 * this list is stopped with a 0xffffffff marker and the remaining
361 * uncommitted transactions aren't cached.
362 */
363 #define JOURNAL_START ((__u32 *) (INFO + 1))
364 #define JOURNAL_END ((__u32 *) (FSYS_BUF + FSYS_BUFLEN))
366 #define log2 grub_log2
368 static __inline__ int
369 is_power_of_two (unsigned long word)
370 {
371 return (word & -word) == word;
372 }
374 static int
375 journal_read (fsi_file_t *ffi, int block, int len, char *buffer)
376 {
377 return devread (ffi, (INFO->journal_block + block) << INFO->blocksize_shift,
378 0, len, buffer);
379 }
381 /* Read a block from ReiserFS file system, taking the journal into
382 * account. If the block nr is in the journal, the block from the
383 * journal taken.
384 */
385 static int
386 block_read (fsi_file_t *ffi, int blockNr, int start, int len, char *buffer)
387 {
388 int transactions = INFO->journal_transactions;
389 int desc_block = INFO->journal_first_desc;
390 int journal_mask = INFO->journal_block_count - 1;
391 int translatedNr = blockNr;
392 __u32 *journal_table = JOURNAL_START;
393 while (transactions-- > 0)
394 {
395 int i = 0;
396 int j_len;
397 if (*journal_table != 0xffffffff)
398 {
399 /* Search for the blockNr in cached journal */
400 j_len = *journal_table++;
401 while (i++ < j_len)
402 {
403 if (*journal_table++ == blockNr)
404 {
405 journal_table += j_len - i;
406 goto found;
407 }
408 }
409 }
410 else
411 {
412 /* This is the end of cached journal marker. The remaining
413 * transactions are still on disk.
414 */
415 struct reiserfs_journal_desc desc;
416 struct reiserfs_journal_commit commit;
418 if (! journal_read (ffi, desc_block, sizeof (desc), (char *) &desc))
419 return 0;
421 j_len = desc.j_len;
422 while (i < j_len && i < JOURNAL_TRANS_HALF)
423 if (desc.j_realblock[i++] == blockNr)
424 goto found;
426 if (j_len >= JOURNAL_TRANS_HALF)
427 {
428 int commit_block = (desc_block + 1 + j_len) & journal_mask;
429 if (! journal_read (ffi, commit_block,
430 sizeof (commit), (char *) &commit))
431 return 0;
432 while (i < j_len)
433 if (commit.j_realblock[i++ - JOURNAL_TRANS_HALF] == blockNr)
434 goto found;
435 }
436 }
437 goto not_found;
439 found:
440 translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask);
441 #ifdef REISERDEBUG
442 printf ("block_read: block %d is mapped to journal block %d.\n",
443 blockNr, translatedNr - INFO->journal_block);
444 #endif
445 /* We must continue the search, as this block may be overwritten
446 * in later transactions.
447 */
448 not_found:
449 desc_block = (desc_block + 2 + j_len) & journal_mask;
450 }
451 return devread (ffi, translatedNr << INFO->blocksize_shift, start, len, buffer);
452 }
454 /* Init the journal data structure. We try to cache as much as
455 * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
456 * we can still read the rest from the disk on demand.
457 *
458 * The first number of valid transactions and the descriptor block of the
459 * first valid transaction are held in INFO. The transactions are all
460 * adjacent, but we must take care of the journal wrap around.
461 */
462 static int
463 journal_init (fsi_file_t *ffi)
464 {
465 unsigned int block_count = INFO->journal_block_count;
466 unsigned int desc_block;
467 unsigned int commit_block;
468 unsigned int next_trans_id;
469 struct reiserfs_journal_header header;
470 struct reiserfs_journal_desc desc;
471 struct reiserfs_journal_commit commit;
472 __u32 *journal_table = JOURNAL_START;
474 journal_read (ffi, block_count, sizeof (header), (char *) &header);
475 desc_block = header.j_first_unflushed_offset;
476 if (desc_block >= block_count)
477 return 0;
479 INFO->journal_first_desc = desc_block;
480 next_trans_id = header.j_last_flush_trans_id + 1;
482 #ifdef REISERDEBUG
483 printf ("journal_init: last flushed %d\n",
484 header.j_last_flush_trans_id);
485 #endif
487 while (1)
488 {
489 journal_read (ffi, desc_block, sizeof (desc), (char *) &desc);
490 if (substring (JOURNAL_DESC_MAGIC, desc.j_magic)
491 || desc.j_trans_id != next_trans_id
492 || desc.j_mount_id != header.j_mount_id)
493 /* no more valid transactions */
494 break;
496 commit_block = (desc_block + desc.j_len + 1) & (block_count - 1);
497 journal_read (ffi, commit_block, sizeof (commit), (char *) &commit);
498 if (desc.j_trans_id != commit.j_trans_id
499 || desc.j_len != commit.j_len)
500 /* no more valid transactions */
501 break;
503 #ifdef REISERDEBUG
504 printf ("Found valid transaction %d/%d at %d.\n",
505 desc.j_trans_id, desc.j_mount_id, desc_block);
506 #endif
508 next_trans_id++;
509 if (journal_table < JOURNAL_END)
510 {
511 if ((journal_table + 1 + desc.j_len) >= JOURNAL_END)
512 {
513 /* The table is almost full; mark the end of the cached
514 * journal.*/
515 *journal_table = 0xffffffff;
516 journal_table = JOURNAL_END;
517 }
518 else
519 {
520 int i;
521 /* Cache the length and the realblock numbers in the table.
522 * The block number of descriptor can easily be computed.
523 * and need not to be stored here.
524 */
525 *journal_table++ = desc.j_len;
526 for (i = 0; i < desc.j_len && i < JOURNAL_TRANS_HALF; i++)
527 {
528 *journal_table++ = desc.j_realblock[i];
529 #ifdef REISERDEBUG
530 printf ("block %d is in journal %d.\n",
531 desc.j_realblock[i], desc_block);
532 #endif
533 }
534 for ( ; i < desc.j_len; i++)
535 {
536 *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF];
537 #ifdef REISERDEBUG
538 printf ("block %d is in journal %d.\n",
539 commit.j_realblock[i-JOURNAL_TRANS_HALF],
540 desc_block);
541 #endif
542 }
543 }
544 }
545 desc_block = (commit_block + 1) & (block_count - 1);
546 }
547 #ifdef REISERDEBUG
548 printf ("Transaction %d/%d at %d isn't valid.\n",
549 desc.j_trans_id, desc.j_mount_id, desc_block);
550 #endif
552 INFO->journal_transactions
553 = next_trans_id - header.j_last_flush_trans_id - 1;
554 return errnum == 0;
555 }
557 /* check filesystem types and read superblock into memory buffer */
558 static int
559 reiserfs_mount (fsi_file_t *ffi, const char *options)
560 {
561 struct reiserfs_super_block super;
562 int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
564 if (/*part_length < superblock + (sizeof (super) >> SECTOR_BITS)
565 || */ !devread (ffi, superblock, 0, sizeof (struct reiserfs_super_block),
566 (char *) &super)
567 || (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
568 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
569 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
570 || (/* check that this is not a copy inside the journal log */
571 super.s_journal_block * super.s_blocksize
572 <= REISERFS_DISK_OFFSET_IN_BYTES))
573 {
574 /* Try old super block position */
575 superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
576 if (/*part_length < superblock + (sizeof (super) >> SECTOR_BITS)
577 || */ ! devread (ffi, superblock, 0, sizeof (struct reiserfs_super_block),
578 (char *) &super))
579 return 0;
581 if (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
582 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
583 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
584 {
585 /* pre journaling super block ? */
586 if (substring (REISERFS_SUPER_MAGIC_STRING,
587 (char*) ((char *) &super + 20)) > 0)
588 return 0;
590 super.s_blocksize = REISERFS_OLD_BLOCKSIZE;
591 super.s_journal_block = 0;
592 super.s_version = 0;
593 }
594 }
596 /* check the version number. */
597 if (super.s_version > REISERFS_MAX_SUPPORTED_VERSION)
598 return 0;
600 INFO->version = super.s_version;
601 INFO->blocksize = super.s_blocksize;
602 INFO->fullblocksize_shift = log2 (super.s_blocksize);
603 INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS;
604 INFO->cached_slots =
605 (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1;
607 #ifdef REISERDEBUG
608 printf ("reiserfs_mount: version=%d, blocksize=%d\n",
609 INFO->version, INFO->blocksize);
610 #endif /* REISERDEBUG */
612 /* Clear node cache. */
613 memset (INFO->blocks, 0, sizeof (INFO->blocks));
615 if (super.s_blocksize < FSYSREISER_MIN_BLOCKSIZE
616 || super.s_blocksize > FSYSREISER_MAX_BLOCKSIZE
617 || (SECTOR_SIZE << INFO->blocksize_shift) != super.s_blocksize)
618 return 0;
620 /* Initialize journal code. If something fails we end with zero
621 * journal_transactions, so we don't access the journal at all.
622 */
623 INFO->journal_transactions = 0;
624 if (super.s_journal_block != 0 && super.s_journal_dev == 0)
625 {
626 INFO->journal_block = super.s_journal_block;
627 INFO->journal_block_count = super.s_journal_size;
628 if (is_power_of_two (INFO->journal_block_count))
629 journal_init (ffi);
631 /* Read in super block again, maybe it is in the journal */
632 block_read (ffi, superblock >> INFO->blocksize_shift,
633 0, sizeof (struct reiserfs_super_block), (char *) &super);
634 }
636 if (! block_read (ffi, super.s_root_block, 0, INFO->blocksize, (char*) ROOT))
637 return 0;
639 INFO->tree_depth = BLOCKHEAD (ROOT)->blk_level;
641 #ifdef REISERDEBUG
642 printf ("root read_in: block=%d, depth=%d\n",
643 super.s_root_block, INFO->tree_depth);
644 #endif /* REISERDEBUG */
646 if (INFO->tree_depth >= MAX_HEIGHT)
647 return 0;
648 if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL)
649 {
650 /* There is only one node in the whole filesystem,
651 * which is simultanously leaf and root */
652 memcpy (LEAF, ROOT, INFO->blocksize);
653 }
654 return 1;
655 }
657 /***************** TREE ACCESSING METHODS *****************************/
659 /* I assume you are familiar with the ReiserFS tree, if not go to
660 * http://www.namesys.com/content_table.html
661 *
662 * My tree node cache is organized as following
663 * 0 ROOT node
664 * 1 LEAF node (if the ROOT is also a LEAF it is copied here
665 * 2-n other nodes on current path from bottom to top.
666 * if there is not enough space in the cache, the top most are
667 * omitted.
668 *
669 * I have only two methods to find a key in the tree:
670 * search_stat(dir_id, objectid) searches for the stat entry (always
671 * the first entry) of an object.
672 * next_key() gets the next key in tree order.
673 *
674 * This means, that I can only sequential reads of files are
675 * efficient, but this really doesn't hurt for grub.
676 */
678 /* Read in the node at the current path and depth into the node cache.
679 * You must set INFO->blocks[depth] before.
680 */
681 static char *
682 read_tree_node (fsi_file_t *ffi, unsigned int blockNr, int depth)
683 {
684 char* cache = CACHE(depth);
685 int num_cached = INFO->cached_slots;
686 if (depth < num_cached)
687 {
688 /* This is the cached part of the path. Check if same block is
689 * needed.
690 */
691 if (blockNr == INFO->blocks[depth])
692 return cache;
693 }
694 else
695 cache = CACHE(num_cached);
697 #ifdef REISERDEBUG
698 printf (" next read_in: block=%d (depth=%d)\n",
699 blockNr, depth);
700 #endif /* REISERDEBUG */
701 if (! block_read (ffi, blockNr, 0, INFO->blocksize, cache))
702 return 0;
703 /* Make sure it has the right node level */
704 if (BLOCKHEAD (cache)->blk_level != depth)
705 {
706 errnum = ERR_FSYS_CORRUPT;
707 return 0;
708 }
710 INFO->blocks[depth] = blockNr;
711 return cache;
712 }
714 /* Get the next key, i.e. the key following the last retrieved key in
715 * tree order. INFO->current_ih and
716 * INFO->current_info are adapted accordingly. */
717 static int
718 next_key (fsi_file_t *ffi)
719 {
720 int depth;
721 struct item_head *ih = INFO->current_ih + 1;
722 char *cache;
724 #ifdef REISERDEBUG
725 printf ("next_key:\n old ih: key %d:%d:%d:%d version:%d\n",
726 INFO->current_ih->ih_key.k_dir_id,
727 INFO->current_ih->ih_key.k_objectid,
728 INFO->current_ih->ih_key.u.v1.k_offset,
729 INFO->current_ih->ih_key.u.v1.k_uniqueness,
730 INFO->current_ih->ih_version);
731 #endif /* REISERDEBUG */
733 if (ih == &ITEMHEAD[BLOCKHEAD (LEAF)->blk_nr_item])
734 {
735 depth = DISK_LEAF_NODE_LEVEL;
736 /* The last item, was the last in the leaf node.
737 * Read in the next block
738 */
739 do
740 {
741 if (depth == INFO->tree_depth)
742 {
743 /* There are no more keys at all.
744 * Return a dummy item with MAX_KEY */
745 ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key;
746 goto found;
747 }
748 depth++;
749 #ifdef REISERDEBUG
750 printf (" depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]);
751 #endif /* REISERDEBUG */
752 }
753 while (INFO->next_key_nr[depth] == 0);
755 if (depth == INFO->tree_depth)
756 cache = ROOT;
757 else if (depth <= INFO->cached_slots)
758 cache = CACHE (depth);
759 else
760 {
761 cache = read_tree_node (ffi, INFO->blocks[depth], depth);
762 if (! cache)
763 return 0;
764 }
766 do
767 {
768 int nr_item = BLOCKHEAD (cache)->blk_nr_item;
769 int key_nr = INFO->next_key_nr[depth]++;
770 #ifdef REISERDEBUG
771 printf (" depth=%d, i=%d/%d\n", depth, key_nr, nr_item);
772 #endif /* REISERDEBUG */
773 if (key_nr == nr_item)
774 /* This is the last item in this block, set the next_key_nr to 0 */
775 INFO->next_key_nr[depth] = 0;
777 cache = read_tree_node (ffi, DC (cache)[key_nr].dc_block_number, --depth);
778 if (! cache)
779 return 0;
780 }
781 while (depth > DISK_LEAF_NODE_LEVEL);
783 ih = ITEMHEAD;
784 }
785 found:
786 INFO->current_ih = ih;
787 INFO->current_item = &LEAF[ih->ih_item_location];
788 #ifdef REISERDEBUG
789 printf (" new ih: key %d:%d:%d:%d version:%d\n",
790 INFO->current_ih->ih_key.k_dir_id,
791 INFO->current_ih->ih_key.k_objectid,
792 INFO->current_ih->ih_key.u.v1.k_offset,
793 INFO->current_ih->ih_key.u.v1.k_uniqueness,
794 INFO->current_ih->ih_version);
795 #endif /* REISERDEBUG */
796 return 1;
797 }
799 /* preconditions: reiserfs_mount already executed, therefore
800 * INFO block is valid
801 * returns: 0 if error (errnum is set),
802 * nonzero iff we were able to find the key successfully.
803 * postconditions: on a nonzero return, the current_ih and
804 * current_item fields describe the key that equals the
805 * searched key. INFO->next_key contains the next key after
806 * the searched key.
807 * side effects: messes around with the cache.
808 */
809 static int
810 search_stat (fsi_file_t *ffi, __u32 dir_id, __u32 objectid)
811 {
812 char *cache;
813 int depth;
814 int nr_item;
815 int i;
816 struct item_head *ih;
817 #ifdef REISERDEBUG
818 printf ("search_stat:\n key %d:%d:0:0\n", dir_id, objectid);
819 #endif /* REISERDEBUG */
821 depth = INFO->tree_depth;
822 cache = ROOT;
824 while (depth > DISK_LEAF_NODE_LEVEL)
825 {
826 struct key *key;
827 nr_item = BLOCKHEAD (cache)->blk_nr_item;
829 key = KEY (cache);
831 for (i = 0; i < nr_item; i++)
832 {
833 if (key->k_dir_id > dir_id
834 || (key->k_dir_id == dir_id
835 && (key->k_objectid > objectid
836 || (key->k_objectid == objectid
837 && (key->u.v1.k_offset
838 | key->u.v1.k_uniqueness) > 0))))
839 break;
840 key++;
841 }
843 #ifdef REISERDEBUG
844 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item);
845 #endif /* REISERDEBUG */
846 INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1;
847 cache = read_tree_node (ffi, DC (cache)[i].dc_block_number, --depth);
848 if (! cache)
849 return 0;
850 }
852 /* cache == LEAF */
853 nr_item = BLOCKHEAD (LEAF)->blk_nr_item;
854 ih = ITEMHEAD;
855 for (i = 0; i < nr_item; i++)
856 {
857 if (ih->ih_key.k_dir_id == dir_id
858 && ih->ih_key.k_objectid == objectid
859 && ih->ih_key.u.v1.k_offset == 0
860 && ih->ih_key.u.v1.k_uniqueness == 0)
861 {
862 #ifdef REISERDEBUG
863 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item);
864 #endif /* REISERDEBUG */
865 INFO->current_ih = ih;
866 INFO->current_item = &LEAF[ih->ih_item_location];
867 return 1;
868 }
869 ih++;
870 }
871 errnum = ERR_FSYS_CORRUPT;
872 return 0;
873 }
875 static int
876 reiserfs_read (fsi_file_t *ffi, char *buf, int len)
877 {
878 unsigned int blocksize;
879 unsigned int offset;
880 unsigned int to_read;
881 char *prev_buf = buf;
883 #ifdef REISERDEBUG
884 printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n",
885 filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1);
886 #endif /* REISERDEBUG */
888 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
889 || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1)
890 {
891 search_stat (ffi, INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid);
892 goto get_next_key;
893 }
895 while (! errnum)
896 {
897 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid)
898 break;
900 offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1;
901 blocksize = INFO->current_ih->ih_item_len;
903 #ifdef REISERDEBUG
904 printf (" loop: filepos=%d len=%d, offset=%d blocksize=%d\n",
905 filepos, len, offset, blocksize);
906 #endif /* REISERDEBUG */
908 if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT)
909 && offset < blocksize)
910 {
911 #ifdef REISERDEBUG
912 printf ("direct_read: offset=%d, blocksize=%d\n",
913 offset, blocksize);
914 #endif /* REISERDEBUG */
915 to_read = blocksize - offset;
916 if (to_read > len)
917 to_read = len;
919 if (disk_read_hook != NULL)
920 {
921 disk_read_func = disk_read_hook;
923 block_read (ffi, INFO->blocks[DISK_LEAF_NODE_LEVEL],
924 (INFO->current_item - LEAF + offset), to_read, buf);
926 disk_read_func = NULL;
927 }
928 else
929 memcpy (buf, INFO->current_item + offset, to_read);
930 goto update_buf_len;
931 }
932 else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT))
933 {
934 blocksize = (blocksize >> 2) << INFO->fullblocksize_shift;
935 #ifdef REISERDEBUG
936 printf ("indirect_read: offset=%d, blocksize=%d\n",
937 offset, blocksize);
938 #endif /* REISERDEBUG */
940 while (offset < blocksize)
941 {
942 __u32 blocknr = ((__u32 *) INFO->current_item)
943 [offset >> INFO->fullblocksize_shift];
944 int blk_offset = offset & (INFO->blocksize-1);
946 to_read = INFO->blocksize - blk_offset;
947 if (to_read > len)
948 to_read = len;
950 disk_read_func = disk_read_hook;
952 /* Journal is only for meta data. Data blocks can be read
953 * directly without using block_read
954 */
955 devread (ffi, blocknr << INFO->blocksize_shift,
956 blk_offset, to_read, buf);
958 disk_read_func = NULL;
959 update_buf_len:
960 len -= to_read;
961 buf += to_read;
962 offset += to_read;
963 filepos += to_read;
964 if (len == 0)
965 goto done;
966 }
967 }
968 get_next_key:
969 next_key (ffi);
970 }
971 done:
972 return errnum ? 0 : buf - prev_buf;
973 }
976 /* preconditions: reiserfs_mount already executed, therefore
977 * INFO block is valid
978 * returns: 0 if error, nonzero iff we were able to find the file successfully
979 * postconditions: on a nonzero return, INFO->fileinfo contains the info
980 * of the file we were trying to look up, filepos is 0 and filemax is
981 * the size of the file.
982 */
983 static int
984 reiserfs_dir (fsi_file_t *ffi, char *dirname)
985 {
986 struct reiserfs_de_head *de_head;
987 char *rest, ch;
988 __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
989 #ifndef STAGE1_5
990 int do_possibilities = 0;
991 #endif /* ! STAGE1_5 */
992 char linkbuf[PATH_MAX]; /* buffer for following symbolic links */
993 int link_count = 0;
994 int mode;
996 dir_id = REISERFS_ROOT_PARENT_OBJECTID;
997 objectid = REISERFS_ROOT_OBJECTID;
999 while (1)
1001 #ifdef REISERDEBUG
1002 printf ("dirname=%s\n", dirname);
1003 #endif /* REISERDEBUG */
1005 /* Search for the stat info first. */
1006 if (! search_stat (ffi, dir_id, objectid))
1007 return 0;
1009 #ifdef REISERDEBUG
1010 printf ("sd_mode=%x sd_size=%d\n",
1011 ((struct stat_data *) INFO->current_item)->sd_mode,
1012 ((struct stat_data *) INFO->current_item)->sd_size);
1013 #endif /* REISERDEBUG */
1015 mode = ((struct stat_data *) INFO->current_item)->sd_mode;
1017 /* If we've got a symbolic link, then chase it. */
1018 if (S_ISLNK (mode))
1020 int len;
1021 if (++link_count > MAX_LINK_COUNT)
1023 errnum = ERR_SYMLINK_LOOP;
1024 return 0;
1027 /* Get the symlink size. */
1028 filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1030 /* Find out how long our remaining name is. */
1031 len = 0;
1032 while (dirname[len] && !isspace ((uint8_t)dirname[len]))
1033 len++;
1035 if (filemax + len > sizeof (linkbuf) - 1)
1037 errnum = ERR_FILELENGTH;
1038 return 0;
1041 /* Copy the remaining name to the end of the symlink data.
1042 Note that DIRNAME and LINKBUF may overlap! */
1043 grub_memmove (linkbuf + filemax, dirname, len+1);
1045 INFO->fileinfo.k_dir_id = dir_id;
1046 INFO->fileinfo.k_objectid = objectid;
1047 filepos = 0;
1048 if (! next_key (ffi)
1049 || reiserfs_read (ffi, linkbuf, filemax) != filemax)
1051 if (! errnum)
1052 errnum = ERR_FSYS_CORRUPT;
1053 return 0;
1056 #ifdef REISERDEBUG
1057 printf ("symlink=%s\n", linkbuf);
1058 #endif /* REISERDEBUG */
1060 dirname = linkbuf;
1061 if (*dirname == '/')
1063 /* It's an absolute link, so look it up in root. */
1064 dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1065 objectid = REISERFS_ROOT_OBJECTID;
1067 else
1069 /* Relative, so look it up in our parent directory. */
1070 dir_id = parent_dir_id;
1071 objectid = parent_objectid;
1074 /* Now lookup the new name. */
1075 continue;
1078 /* if we have a real file (and we're not just printing possibilities),
1079 then this is where we want to exit */
1081 if (! *dirname || isspace ((uint8_t)*dirname))
1083 if (! S_ISREG (mode))
1085 errnum = ERR_BAD_FILETYPE;
1086 return 0;
1089 filepos = 0;
1090 filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1092 /* If this is a new stat data and size is > 4GB set filemax to
1093 * maximum
1094 */
1095 if (INFO->current_ih->ih_version == ITEM_VERSION_2
1096 && ((struct stat_data *) INFO->current_item)->sd_size_hi > 0)
1097 filemax = 0xffffffff;
1099 INFO->fileinfo.k_dir_id = dir_id;
1100 INFO->fileinfo.k_objectid = objectid;
1101 return next_key (ffi);
1104 /* continue with the file/directory name interpretation */
1105 while (*dirname == '/')
1106 dirname++;
1107 if (! S_ISDIR (mode))
1109 errnum = ERR_BAD_FILETYPE;
1110 return 0;
1112 for (rest = dirname; (ch = *rest) && ! isspace ((uint8_t)ch) && ch != '/'; rest++);
1113 *rest = 0;
1115 # ifndef STAGE1_5
1116 if (print_possibilities && ch != '/')
1117 do_possibilities = 1;
1118 # endif /* ! STAGE1_5 */
1120 while (1)
1122 char *name_end;
1123 int num_entries;
1125 if (! next_key (ffi))
1126 return 0;
1127 #ifdef REISERDEBUG
1128 printf ("ih: key %d:%d:%d:%d version:%d\n",
1129 INFO->current_ih->ih_key.k_dir_id,
1130 INFO->current_ih->ih_key.k_objectid,
1131 INFO->current_ih->ih_key.u.v1.k_offset,
1132 INFO->current_ih->ih_key.u.v1.k_uniqueness,
1133 INFO->current_ih->ih_version);
1134 #endif /* REISERDEBUG */
1136 if (INFO->current_ih->ih_key.k_objectid != objectid)
1137 break;
1139 name_end = INFO->current_item + INFO->current_ih->ih_item_len;
1140 de_head = (struct reiserfs_de_head *) INFO->current_item;
1141 num_entries = INFO->current_ih->u.ih_entry_count;
1142 while (num_entries > 0)
1144 char *filename = INFO->current_item + de_head->deh_location;
1145 char tmp = *name_end;
1146 if ((de_head->deh_state & DEH_Visible))
1148 int cmp;
1149 /* Directory names in ReiserFS are not null
1150 * terminated. We write a temporary 0 behind it.
1151 * NOTE: that this may overwrite the first block in
1152 * the tree cache. That doesn't hurt as long as we
1153 * don't call next_key () in between.
1154 */
1155 *name_end = 0;
1156 cmp = substring (dirname, filename);
1157 *name_end = tmp;
1158 # ifndef STAGE1_5
1159 if (do_possibilities)
1161 if (cmp <= 0)
1163 if (print_possibilities > 0)
1164 print_possibilities = -print_possibilities;
1165 *name_end = 0;
1166 print_a_completion (filename);
1167 *name_end = tmp;
1170 else
1171 # endif /* ! STAGE1_5 */
1172 if (cmp == 0)
1173 goto found;
1175 /* The beginning of this name marks the end of the next name.
1176 */
1177 name_end = filename;
1178 de_head++;
1179 num_entries--;
1183 # ifndef STAGE1_5
1184 if (print_possibilities < 0)
1185 return 1;
1186 # endif /* ! STAGE1_5 */
1188 errnum = ERR_FILE_NOT_FOUND;
1189 *rest = ch;
1190 return 0;
1192 found:
1194 *rest = ch;
1195 dirname = rest;
1197 parent_dir_id = dir_id;
1198 parent_objectid = objectid;
1199 dir_id = de_head->deh_dir_id;
1200 objectid = de_head->deh_objectid;
1204 int
1205 reiserfs_embed (fsi_file_t *ffi, int *start_sector, int needed_sectors)
1207 struct reiserfs_super_block super;
1208 int num_sectors;
1210 if (! devread (ffi, REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS, 0,
1211 sizeof (struct reiserfs_super_block), (char *) &super))
1212 return 0;
1214 *start_sector = 1; /* reserve first sector for stage1 */
1215 if ((substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1216 || substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1217 || substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) <= 0)
1218 && (/* check that this is not a super block copy inside
1219 * the journal log */
1220 super.s_journal_block * super.s_blocksize
1221 > REISERFS_DISK_OFFSET_IN_BYTES))
1222 num_sectors = (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1223 else
1224 num_sectors = (REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1226 return (needed_sectors <= num_sectors);
1229 fsi_plugin_ops_t *
1230 fsi_init_plugin(int version, fsi_plugin_t *fp, const char **name)
1232 static fsig_plugin_ops_t ops = {
1233 FSIMAGE_PLUGIN_VERSION,
1234 .fpo_mount = reiserfs_mount,
1235 .fpo_dir = reiserfs_dir,
1236 .fpo_read = reiserfs_read
1237 };
1239 *name = "reiserfs";
1240 return (fsig_init(fp, &ops));