direct-io.hg

view tools/libfsimage/reiserfs/fsys_reiserfs.c @ 12511:60b60f75a221

[POWERPC][LIBFS] Fix build breakage in log2 assembly.
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
author kfraser@localhost.localdomain
date Wed Nov 22 10:10:29 2006 +0000 (2006-11-22)
parents 825be74657c3
children 89ca591a2c21
line source
1 /* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */
2 /*
3 * GRUB -- GRand Unified Bootloader
4 * Copyright (C) 2000, 2001 Free Software Foundation, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
21 #include <fsimage_grub.h>
23 #undef REISERDEBUG
25 /* Some parts of this code (mainly the structures and defines) are
26 * from the original reiser fs code, as found in the linux kernel.
27 */
29 /* include/asm-i386/types.h */
30 typedef __signed__ char __s8;
31 typedef unsigned char __u8;
32 typedef __signed__ short __s16;
33 typedef unsigned short __u16;
34 typedef __signed__ int __s32;
35 typedef unsigned int __u32;
36 typedef unsigned long long __u64;
38 /* linux/posix_type.h */
39 typedef long linux_off_t;
41 /* linux/little_endian.h */
42 #define __cpu_to_le64(x) ((__u64) (x))
43 #define __le64_to_cpu(x) ((__u64) (x))
44 #define __cpu_to_le32(x) ((__u32) (x))
45 #define __le32_to_cpu(x) ((__u32) (x))
46 #define __cpu_to_le16(x) ((__u16) (x))
47 #define __le16_to_cpu(x) ((__u16) (x))
49 /* include/linux/reiser_fs.h */
50 /* This is the new super block of a journaling reiserfs system */
51 struct reiserfs_super_block
52 {
53 __u32 s_block_count; /* blocks count */
54 __u32 s_free_blocks; /* free blocks count */
55 __u32 s_root_block; /* root block number */
56 __u32 s_journal_block; /* journal block number */
57 __u32 s_journal_dev; /* journal device number */
58 __u32 s_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */
59 __u32 s_journal_trans_max; /* max number of blocks in a transaction. */
60 __u32 s_journal_magic; /* random value made on fs creation */
61 __u32 s_journal_max_batch; /* max number of blocks to batch into a trans */
62 __u32 s_journal_max_commit_age; /* in seconds, how old can an async commit be */
63 __u32 s_journal_max_trans_age; /* in seconds, how old can a transaction be */
64 __u16 s_blocksize; /* block size */
65 __u16 s_oid_maxsize; /* max size of object id array */
66 __u16 s_oid_cursize; /* current size of object id array */
67 __u16 s_state; /* valid or error */
68 char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */
69 __u16 s_tree_height; /* height of disk tree */
70 __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */
71 __u16 s_version;
72 char s_unused[128]; /* zero filled by mkreiserfs */
73 };
75 #define REISERFS_MAX_SUPPORTED_VERSION 2
76 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
77 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
78 #define REISER3FS_SUPER_MAGIC_STRING "ReIsEr3Fs"
80 #define MAX_HEIGHT 7
82 /* must be correct to keep the desc and commit structs at 4k */
83 #define JOURNAL_TRANS_HALF 1018
85 /* first block written in a commit. */
86 struct reiserfs_journal_desc {
87 __u32 j_trans_id; /* id of commit */
88 __u32 j_len; /* length of commit. len +1 is the commit block */
89 __u32 j_mount_id; /* mount id of this trans*/
90 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */
91 char j_magic[12];
92 };
94 /* last block written in a commit */
95 struct reiserfs_journal_commit {
96 __u32 j_trans_id; /* must match j_trans_id from the desc block */
97 __u32 j_len; /* ditto */
98 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */
99 char j_digest[16]; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */
100 };
102 /* this header block gets written whenever a transaction is considered
103 fully flushed, and is more recent than the last fully flushed
104 transaction.
105 fully flushed means all the log blocks and all the real blocks are
106 on disk, and this transaction does not need to be replayed.
107 */
108 struct reiserfs_journal_header {
109 /* id of last fully flushed transaction */
110 __u32 j_last_flush_trans_id;
111 /* offset in the log of where to start replay after a crash */
112 __u32 j_first_unflushed_offset;
113 /* mount id to detect very old transactions */
114 __u32 j_mount_id;
115 };
117 /* magic string to find desc blocks in the journal */
118 #define JOURNAL_DESC_MAGIC "ReIsErLB"
121 /*
122 * directories use this key as well as old files
123 */
124 struct offset_v1
125 {
126 /*
127 * for regular files this is the offset to the first byte of the
128 * body, contained in the object-item, as measured from the start of
129 * the entire body of the object.
130 *
131 * for directory entries, k_offset consists of hash derived from
132 * hashing the name and using few bits (23 or more) of the resulting
133 * hash, and generation number that allows distinguishing names with
134 * hash collisions. If number of collisions overflows generation
135 * number, we return EEXIST. High order bit is 0 always
136 */
137 __u32 k_offset;
138 __u32 k_uniqueness;
139 };
141 struct offset_v2
142 {
143 /*
144 * for regular files this is the offset to the first byte of the
145 * body, contained in the object-item, as measured from the start of
146 * the entire body of the object.
147 *
148 * for directory entries, k_offset consists of hash derived from
149 * hashing the name and using few bits (23 or more) of the resulting
150 * hash, and generation number that allows distinguishing names with
151 * hash collisions. If number of collisions overflows generation
152 * number, we return EEXIST. High order bit is 0 always
153 */
154 __u64 k_offset:60;
155 __u64 k_type: 4;
156 };
159 struct key
160 {
161 /* packing locality: by default parent directory object id */
162 __u32 k_dir_id;
163 /* object identifier */
164 __u32 k_objectid;
165 /* the offset and node type (old and new form) */
166 union
167 {
168 struct offset_v1 v1;
169 struct offset_v2 v2;
170 }
171 u;
172 };
174 #define KEY_SIZE (sizeof (struct key))
176 /* Header of a disk block. More precisely, header of a formatted leaf
177 or internal node, and not the header of an unformatted node. */
178 struct block_head
179 {
180 __u16 blk_level; /* Level of a block in the tree. */
181 __u16 blk_nr_item; /* Number of keys/items in a block. */
182 __u16 blk_free_space; /* Block free space in bytes. */
183 struct key blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes
184 only) */
185 };
186 #define BLKH_SIZE (sizeof (struct block_head))
187 #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */
189 struct item_head
190 {
191 struct key ih_key; /* Everything in the tree is found by searching for it based on its key.*/
193 union
194 {
195 __u16 ih_free_space; /* The free space in the last unformatted node of an indirect item if this
196 is an indirect item. This equals 0xFFFF iff this is a direct item or
197 stat data item. Note that the key, not this field, is used to determine
198 the item type, and thus which field this union contains. */
199 __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory
200 entries in the directory item. */
201 }
202 u;
203 __u16 ih_item_len; /* total size of the item body */
204 __u16 ih_item_location; /* an offset to the item body within the block */
205 __u16 ih_version; /* ITEM_VERSION_1 for all old items,
206 ITEM_VERSION_2 for new ones.
207 Highest bit is set by fsck
208 temporary, cleaned after all done */
209 };
210 /* size of item header */
211 #define IH_SIZE (sizeof (struct item_head))
213 #define ITEM_VERSION_1 0
214 #define ITEM_VERSION_2 1
215 #define IH_KEY_OFFSET(ih) ((ih)->ih_version == ITEM_VERSION_1 \
216 ? (ih)->ih_key.u.v1.k_offset \
217 : (ih)->ih_key.u.v2.k_offset)
219 #define IH_KEY_ISTYPE(ih, type) ((ih)->ih_version == ITEM_VERSION_1 \
220 ? (ih)->ih_key.u.v1.k_uniqueness == V1_##type \
221 : (ih)->ih_key.u.v2.k_type == V2_##type)
223 struct disk_child
224 {
225 unsigned long dc_block_number; /* Disk child's block number. */
226 unsigned short dc_size; /* Disk child's used space. */
227 };
229 #define DC_SIZE (sizeof (struct disk_child))
231 /* Stat Data on disk.
232 *
233 * Note that reiserfs has two different forms of stat data. Luckily
234 * the fields needed by grub are at the same position.
235 */
236 struct stat_data
237 {
238 __u16 sd_mode; /* file type, permissions */
239 __u16 sd_notused1[3]; /* fields not needed by reiserfs */
240 __u32 sd_size; /* file size */
241 __u32 sd_size_hi; /* file size high 32 bits (since version 2) */
242 };
244 struct reiserfs_de_head
245 {
246 __u32 deh_offset; /* third component of the directory entry key */
247 __u32 deh_dir_id; /* objectid of the parent directory of the
248 object, that is referenced by directory entry */
249 __u32 deh_objectid;/* objectid of the object, that is referenced by
250 directory entry */
251 __u16 deh_location;/* offset of name in the whole item */
252 __u16 deh_state; /* whether 1) entry contains stat data (for
253 future), and 2) whether entry is hidden
254 (unlinked) */
255 };
257 #define DEH_SIZE (sizeof (struct reiserfs_de_head))
259 #define DEH_Statdata (1 << 0) /* not used now */
260 #define DEH_Visible (1 << 2)
262 #define SD_OFFSET 0
263 #define SD_UNIQUENESS 0
264 #define DOT_OFFSET 1
265 #define DOT_DOT_OFFSET 2
266 #define DIRENTRY_UNIQUENESS 500
268 #define V1_TYPE_STAT_DATA 0x0
269 #define V1_TYPE_DIRECT 0xffffffff
270 #define V1_TYPE_INDIRECT 0xfffffffe
271 #define V1_TYPE_DIRECTORY_MAX 0xfffffffd
272 #define V2_TYPE_STAT_DATA 0
273 #define V2_TYPE_INDIRECT 1
274 #define V2_TYPE_DIRECT 2
275 #define V2_TYPE_DIRENTRY 3
277 #define REISERFS_ROOT_OBJECTID 2
278 #define REISERFS_ROOT_PARENT_OBJECTID 1
279 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
280 /* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */
281 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
282 #define REISERFS_OLD_BLOCKSIZE 4096
284 #define S_ISREG(mode) (((mode) & 0170000) == 0100000)
285 #define S_ISDIR(mode) (((mode) & 0170000) == 0040000)
286 #define S_ISLNK(mode) (((mode) & 0170000) == 0120000)
288 #define PATH_MAX 1024 /* include/linux/limits.h */
289 #define MAX_LINK_COUNT 5 /* number of symbolic links to follow */
291 /* The size of the node cache */
292 #define FSYSREISER_CACHE_SIZE 24*1024
293 #define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE
294 #define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3
296 /* Info about currently opened file */
297 struct fsys_reiser_fileinfo
298 {
299 __u32 k_dir_id;
300 __u32 k_objectid;
301 };
303 /* In memory info about the currently mounted filesystem */
304 struct fsys_reiser_info
305 {
306 /* The last read item head */
307 struct item_head *current_ih;
308 /* The last read item */
309 char *current_item;
310 /* The information for the currently opened file */
311 struct fsys_reiser_fileinfo fileinfo;
312 /* The start of the journal */
313 __u32 journal_block;
314 /* The size of the journal */
315 __u32 journal_block_count;
316 /* The first valid descriptor block in journal
317 (relative to journal_block) */
318 __u32 journal_first_desc;
320 /* The ReiserFS version. */
321 __u16 version;
322 /* The current depth of the reiser tree. */
323 __u16 tree_depth;
324 /* SECTOR_SIZE << blocksize_shift == blocksize. */
325 __u8 blocksize_shift;
326 /* 1 << full_blocksize_shift == blocksize. */
327 __u8 fullblocksize_shift;
328 /* The reiserfs block size (must be a power of 2) */
329 __u16 blocksize;
330 /* The number of cached tree nodes */
331 __u16 cached_slots;
332 /* The number of valid transactions in journal */
333 __u16 journal_transactions;
335 unsigned int blocks[MAX_HEIGHT];
336 unsigned int next_key_nr[MAX_HEIGHT];
337 };
339 /* The cached s+tree blocks in FSYS_BUF, see below
340 * for a more detailed description.
341 */
342 #define ROOT ((char *) FSYS_BUF)
343 #define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift))
344 #define LEAF CACHE (DISK_LEAF_NODE_LEVEL)
346 #define BLOCKHEAD(cache) ((struct block_head *) cache)
347 #define ITEMHEAD ((struct item_head *) ((char *) LEAF + BLKH_SIZE))
348 #define KEY(cache) ((struct key *) ((char *) cache + BLKH_SIZE))
349 #define DC(cache) ((struct disk_child *) \
350 ((char *) cache + BLKH_SIZE + KEY_SIZE * nr_item))
351 /* The fsys_reiser_info block.
352 */
353 #define INFO \
354 ((struct fsys_reiser_info *) ((char *) FSYS_BUF + FSYSREISER_CACHE_SIZE))
355 /*
356 * The journal cache. For each transaction it contains the number of
357 * blocks followed by the real block numbers of this transaction.
358 *
359 * If the block numbers of some transaction won't fit in this space,
360 * this list is stopped with a 0xffffffff marker and the remaining
361 * uncommitted transactions aren't cached.
362 */
363 #define JOURNAL_START ((__u32 *) (INFO + 1))
364 #define JOURNAL_END ((__u32 *) (FSYS_BUF + FSYS_BUFLEN))
366 #if defined(__i386__) || defined(__x86_64__)
368 #ifdef __amd64
369 #define BSF "bsfq"
370 #else
371 #define BSF "bsfl"
372 #endif
373 static __inline__ unsigned long
374 grub_log2 (unsigned long word)
375 {
376 __asm__ (BSF " %1,%0"
377 : "=r" (word)
378 : "r" (word));
379 return word;
380 }
382 #elif defined(__ia64__)
384 #if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
385 # define ia64_popcnt(x) __builtin_popcountl(x)
386 #else
387 # define ia64_popcnt(x) \
388 ({ \
389 __u64 ia64_intri_res; \
390 asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \
391 ia64_intri_res; \
392 })
393 #endif
395 static __inline__ unsigned long
396 grub_log2 (unsigned long word)
397 {
398 unsigned long result;
400 result = ia64_popcnt((word - 1) & ~word);
401 return result;
402 }
404 #elif defined(__powerpc__)
406 #ifdef __powerpc64__
407 #define PPC_CNTLZL "cntlzd"
408 #else
409 #define PPC_CNTLZL "cntlzw"
410 #endif
411 #define BITS_PER_LONG (sizeof(long) * 8)
413 static __inline__ int
414 __ilog2(unsigned long x)
415 {
416 int lz;
418 asm (PPC_CNTLZL " %0,%1" : "=r" (lz) : "r" (x));
419 return BITS_PER_LONG - 1 - lz;
420 }
422 static __inline__ unsigned long
423 grub_log2 (unsigned long word)
424 {
425 return __ilog2(word & -word);
426 }
428 #else /* Unoptimized */
430 static __inline__ unsigned long
431 grub_log2 (unsigned long word)
432 {
433 unsigned long result = 0;
435 while (!(word & 1UL))
436 {
437 result++;
438 word >>= 1;
439 }
440 return result;
441 }
442 #endif
443 #define log2 grub_log2
445 static __inline__ int
446 is_power_of_two (unsigned long word)
447 {
448 return (word & -word) == word;
449 }
451 static int
452 journal_read (fsi_file_t *ffi, int block, int len, char *buffer)
453 {
454 return devread (ffi, (INFO->journal_block + block) << INFO->blocksize_shift,
455 0, len, buffer);
456 }
458 /* Read a block from ReiserFS file system, taking the journal into
459 * account. If the block nr is in the journal, the block from the
460 * journal taken.
461 */
462 static int
463 block_read (fsi_file_t *ffi, int blockNr, int start, int len, char *buffer)
464 {
465 int transactions = INFO->journal_transactions;
466 int desc_block = INFO->journal_first_desc;
467 int journal_mask = INFO->journal_block_count - 1;
468 int translatedNr = blockNr;
469 __u32 *journal_table = JOURNAL_START;
470 while (transactions-- > 0)
471 {
472 int i = 0;
473 int j_len;
474 if (*journal_table != 0xffffffff)
475 {
476 /* Search for the blockNr in cached journal */
477 j_len = *journal_table++;
478 while (i++ < j_len)
479 {
480 if (*journal_table++ == blockNr)
481 {
482 journal_table += j_len - i;
483 goto found;
484 }
485 }
486 }
487 else
488 {
489 /* This is the end of cached journal marker. The remaining
490 * transactions are still on disk.
491 */
492 struct reiserfs_journal_desc desc;
493 struct reiserfs_journal_commit commit;
495 if (! journal_read (ffi, desc_block, sizeof (desc), (char *) &desc))
496 return 0;
498 j_len = desc.j_len;
499 while (i < j_len && i < JOURNAL_TRANS_HALF)
500 if (desc.j_realblock[i++] == blockNr)
501 goto found;
503 if (j_len >= JOURNAL_TRANS_HALF)
504 {
505 int commit_block = (desc_block + 1 + j_len) & journal_mask;
506 if (! journal_read (ffi, commit_block,
507 sizeof (commit), (char *) &commit))
508 return 0;
509 while (i < j_len)
510 if (commit.j_realblock[i++ - JOURNAL_TRANS_HALF] == blockNr)
511 goto found;
512 }
513 }
514 goto not_found;
516 found:
517 translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask);
518 #ifdef REISERDEBUG
519 printf ("block_read: block %d is mapped to journal block %d.\n",
520 blockNr, translatedNr - INFO->journal_block);
521 #endif
522 /* We must continue the search, as this block may be overwritten
523 * in later transactions.
524 */
525 not_found:
526 desc_block = (desc_block + 2 + j_len) & journal_mask;
527 }
528 return devread (ffi, translatedNr << INFO->blocksize_shift, start, len, buffer);
529 }
531 /* Init the journal data structure. We try to cache as much as
532 * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
533 * we can still read the rest from the disk on demand.
534 *
535 * The first number of valid transactions and the descriptor block of the
536 * first valid transaction are held in INFO. The transactions are all
537 * adjacent, but we must take care of the journal wrap around.
538 */
539 static int
540 journal_init (fsi_file_t *ffi)
541 {
542 unsigned int block_count = INFO->journal_block_count;
543 unsigned int desc_block;
544 unsigned int commit_block;
545 unsigned int next_trans_id;
546 struct reiserfs_journal_header header;
547 struct reiserfs_journal_desc desc;
548 struct reiserfs_journal_commit commit;
549 __u32 *journal_table = JOURNAL_START;
551 journal_read (ffi, block_count, sizeof (header), (char *) &header);
552 desc_block = header.j_first_unflushed_offset;
553 if (desc_block >= block_count)
554 return 0;
556 INFO->journal_first_desc = desc_block;
557 next_trans_id = header.j_last_flush_trans_id + 1;
559 #ifdef REISERDEBUG
560 printf ("journal_init: last flushed %d\n",
561 header.j_last_flush_trans_id);
562 #endif
564 while (1)
565 {
566 journal_read (ffi, desc_block, sizeof (desc), (char *) &desc);
567 if (substring (JOURNAL_DESC_MAGIC, desc.j_magic)
568 || desc.j_trans_id != next_trans_id
569 || desc.j_mount_id != header.j_mount_id)
570 /* no more valid transactions */
571 break;
573 commit_block = (desc_block + desc.j_len + 1) & (block_count - 1);
574 journal_read (ffi, commit_block, sizeof (commit), (char *) &commit);
575 if (desc.j_trans_id != commit.j_trans_id
576 || desc.j_len != commit.j_len)
577 /* no more valid transactions */
578 break;
580 #ifdef REISERDEBUG
581 printf ("Found valid transaction %d/%d at %d.\n",
582 desc.j_trans_id, desc.j_mount_id, desc_block);
583 #endif
585 next_trans_id++;
586 if (journal_table < JOURNAL_END)
587 {
588 if ((journal_table + 1 + desc.j_len) >= JOURNAL_END)
589 {
590 /* The table is almost full; mark the end of the cached
591 * journal.*/
592 *journal_table = 0xffffffff;
593 journal_table = JOURNAL_END;
594 }
595 else
596 {
597 int i;
598 /* Cache the length and the realblock numbers in the table.
599 * The block number of descriptor can easily be computed.
600 * and need not to be stored here.
601 */
602 *journal_table++ = desc.j_len;
603 for (i = 0; i < desc.j_len && i < JOURNAL_TRANS_HALF; i++)
604 {
605 *journal_table++ = desc.j_realblock[i];
606 #ifdef REISERDEBUG
607 printf ("block %d is in journal %d.\n",
608 desc.j_realblock[i], desc_block);
609 #endif
610 }
611 for ( ; i < desc.j_len; i++)
612 {
613 *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF];
614 #ifdef REISERDEBUG
615 printf ("block %d is in journal %d.\n",
616 commit.j_realblock[i-JOURNAL_TRANS_HALF],
617 desc_block);
618 #endif
619 }
620 }
621 }
622 desc_block = (commit_block + 1) & (block_count - 1);
623 }
624 #ifdef REISERDEBUG
625 printf ("Transaction %d/%d at %d isn't valid.\n",
626 desc.j_trans_id, desc.j_mount_id, desc_block);
627 #endif
629 INFO->journal_transactions
630 = next_trans_id - header.j_last_flush_trans_id - 1;
631 return errnum == 0;
632 }
634 /* check filesystem types and read superblock into memory buffer */
635 int
636 reiserfs_mount (fsi_file_t *ffi)
637 {
638 struct reiserfs_super_block super;
639 int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
641 if (/*part_length < superblock + (sizeof (super) >> SECTOR_BITS)
642 || */ !devread (ffi, superblock, 0, sizeof (struct reiserfs_super_block),
643 (char *) &super)
644 || (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
645 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
646 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
647 || (/* check that this is not a copy inside the journal log */
648 super.s_journal_block * super.s_blocksize
649 <= REISERFS_DISK_OFFSET_IN_BYTES))
650 {
651 /* Try old super block position */
652 superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
653 if (/*part_length < superblock + (sizeof (super) >> SECTOR_BITS)
654 || */ ! devread (ffi, superblock, 0, sizeof (struct reiserfs_super_block),
655 (char *) &super))
656 return 0;
658 if (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
659 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
660 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
661 {
662 /* pre journaling super block ? */
663 if (substring (REISERFS_SUPER_MAGIC_STRING,
664 (char*) ((char *) &super + 20)) > 0)
665 return 0;
667 super.s_blocksize = REISERFS_OLD_BLOCKSIZE;
668 super.s_journal_block = 0;
669 super.s_version = 0;
670 }
671 }
673 /* check the version number. */
674 if (super.s_version > REISERFS_MAX_SUPPORTED_VERSION)
675 return 0;
677 INFO->version = super.s_version;
678 INFO->blocksize = super.s_blocksize;
679 INFO->fullblocksize_shift = log2 (super.s_blocksize);
680 INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS;
681 INFO->cached_slots =
682 (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1;
684 #ifdef REISERDEBUG
685 printf ("reiserfs_mount: version=%d, blocksize=%d\n",
686 INFO->version, INFO->blocksize);
687 #endif /* REISERDEBUG */
689 /* Clear node cache. */
690 memset (INFO->blocks, 0, sizeof (INFO->blocks));
692 if (super.s_blocksize < FSYSREISER_MIN_BLOCKSIZE
693 || super.s_blocksize > FSYSREISER_MAX_BLOCKSIZE
694 || (SECTOR_SIZE << INFO->blocksize_shift) != super.s_blocksize)
695 return 0;
697 /* Initialize journal code. If something fails we end with zero
698 * journal_transactions, so we don't access the journal at all.
699 */
700 INFO->journal_transactions = 0;
701 if (super.s_journal_block != 0 && super.s_journal_dev == 0)
702 {
703 INFO->journal_block = super.s_journal_block;
704 INFO->journal_block_count = super.s_journal_size;
705 if (is_power_of_two (INFO->journal_block_count))
706 journal_init (ffi);
708 /* Read in super block again, maybe it is in the journal */
709 block_read (ffi, superblock >> INFO->blocksize_shift,
710 0, sizeof (struct reiserfs_super_block), (char *) &super);
711 }
713 if (! block_read (ffi, super.s_root_block, 0, INFO->blocksize, (char*) ROOT))
714 return 0;
716 INFO->tree_depth = BLOCKHEAD (ROOT)->blk_level;
718 #ifdef REISERDEBUG
719 printf ("root read_in: block=%d, depth=%d\n",
720 super.s_root_block, INFO->tree_depth);
721 #endif /* REISERDEBUG */
723 if (INFO->tree_depth >= MAX_HEIGHT)
724 return 0;
725 if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL)
726 {
727 /* There is only one node in the whole filesystem,
728 * which is simultanously leaf and root */
729 memcpy (LEAF, ROOT, INFO->blocksize);
730 }
731 return 1;
732 }
734 /***************** TREE ACCESSING METHODS *****************************/
736 /* I assume you are familiar with the ReiserFS tree, if not go to
737 * http://www.namesys.com/content_table.html
738 *
739 * My tree node cache is organized as following
740 * 0 ROOT node
741 * 1 LEAF node (if the ROOT is also a LEAF it is copied here
742 * 2-n other nodes on current path from bottom to top.
743 * if there is not enough space in the cache, the top most are
744 * omitted.
745 *
746 * I have only two methods to find a key in the tree:
747 * search_stat(dir_id, objectid) searches for the stat entry (always
748 * the first entry) of an object.
749 * next_key() gets the next key in tree order.
750 *
751 * This means, that I can only sequential reads of files are
752 * efficient, but this really doesn't hurt for grub.
753 */
755 /* Read in the node at the current path and depth into the node cache.
756 * You must set INFO->blocks[depth] before.
757 */
758 static char *
759 read_tree_node (fsi_file_t *ffi, unsigned int blockNr, int depth)
760 {
761 char* cache = CACHE(depth);
762 int num_cached = INFO->cached_slots;
763 if (depth < num_cached)
764 {
765 /* This is the cached part of the path. Check if same block is
766 * needed.
767 */
768 if (blockNr == INFO->blocks[depth])
769 return cache;
770 }
771 else
772 cache = CACHE(num_cached);
774 #ifdef REISERDEBUG
775 printf (" next read_in: block=%d (depth=%d)\n",
776 blockNr, depth);
777 #endif /* REISERDEBUG */
778 if (! block_read (ffi, blockNr, 0, INFO->blocksize, cache))
779 return 0;
780 /* Make sure it has the right node level */
781 if (BLOCKHEAD (cache)->blk_level != depth)
782 {
783 errnum = ERR_FSYS_CORRUPT;
784 return 0;
785 }
787 INFO->blocks[depth] = blockNr;
788 return cache;
789 }
791 /* Get the next key, i.e. the key following the last retrieved key in
792 * tree order. INFO->current_ih and
793 * INFO->current_info are adapted accordingly. */
794 static int
795 next_key (fsi_file_t *ffi)
796 {
797 int depth;
798 struct item_head *ih = INFO->current_ih + 1;
799 char *cache;
801 #ifdef REISERDEBUG
802 printf ("next_key:\n old ih: key %d:%d:%d:%d version:%d\n",
803 INFO->current_ih->ih_key.k_dir_id,
804 INFO->current_ih->ih_key.k_objectid,
805 INFO->current_ih->ih_key.u.v1.k_offset,
806 INFO->current_ih->ih_key.u.v1.k_uniqueness,
807 INFO->current_ih->ih_version);
808 #endif /* REISERDEBUG */
810 if (ih == &ITEMHEAD[BLOCKHEAD (LEAF)->blk_nr_item])
811 {
812 depth = DISK_LEAF_NODE_LEVEL;
813 /* The last item, was the last in the leaf node.
814 * Read in the next block
815 */
816 do
817 {
818 if (depth == INFO->tree_depth)
819 {
820 /* There are no more keys at all.
821 * Return a dummy item with MAX_KEY */
822 ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key;
823 goto found;
824 }
825 depth++;
826 #ifdef REISERDEBUG
827 printf (" depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]);
828 #endif /* REISERDEBUG */
829 }
830 while (INFO->next_key_nr[depth] == 0);
832 if (depth == INFO->tree_depth)
833 cache = ROOT;
834 else if (depth <= INFO->cached_slots)
835 cache = CACHE (depth);
836 else
837 {
838 cache = read_tree_node (ffi, INFO->blocks[depth], depth);
839 if (! cache)
840 return 0;
841 }
843 do
844 {
845 int nr_item = BLOCKHEAD (cache)->blk_nr_item;
846 int key_nr = INFO->next_key_nr[depth]++;
847 #ifdef REISERDEBUG
848 printf (" depth=%d, i=%d/%d\n", depth, key_nr, nr_item);
849 #endif /* REISERDEBUG */
850 if (key_nr == nr_item)
851 /* This is the last item in this block, set the next_key_nr to 0 */
852 INFO->next_key_nr[depth] = 0;
854 cache = read_tree_node (ffi, DC (cache)[key_nr].dc_block_number, --depth);
855 if (! cache)
856 return 0;
857 }
858 while (depth > DISK_LEAF_NODE_LEVEL);
860 ih = ITEMHEAD;
861 }
862 found:
863 INFO->current_ih = ih;
864 INFO->current_item = &LEAF[ih->ih_item_location];
865 #ifdef REISERDEBUG
866 printf (" new ih: key %d:%d:%d:%d version:%d\n",
867 INFO->current_ih->ih_key.k_dir_id,
868 INFO->current_ih->ih_key.k_objectid,
869 INFO->current_ih->ih_key.u.v1.k_offset,
870 INFO->current_ih->ih_key.u.v1.k_uniqueness,
871 INFO->current_ih->ih_version);
872 #endif /* REISERDEBUG */
873 return 1;
874 }
876 /* preconditions: reiserfs_mount already executed, therefore
877 * INFO block is valid
878 * returns: 0 if error (errnum is set),
879 * nonzero iff we were able to find the key successfully.
880 * postconditions: on a nonzero return, the current_ih and
881 * current_item fields describe the key that equals the
882 * searched key. INFO->next_key contains the next key after
883 * the searched key.
884 * side effects: messes around with the cache.
885 */
886 static int
887 search_stat (fsi_file_t *ffi, __u32 dir_id, __u32 objectid)
888 {
889 char *cache;
890 int depth;
891 int nr_item;
892 int i;
893 struct item_head *ih;
894 #ifdef REISERDEBUG
895 printf ("search_stat:\n key %d:%d:0:0\n", dir_id, objectid);
896 #endif /* REISERDEBUG */
898 depth = INFO->tree_depth;
899 cache = ROOT;
901 while (depth > DISK_LEAF_NODE_LEVEL)
902 {
903 struct key *key;
904 nr_item = BLOCKHEAD (cache)->blk_nr_item;
906 key = KEY (cache);
908 for (i = 0; i < nr_item; i++)
909 {
910 if (key->k_dir_id > dir_id
911 || (key->k_dir_id == dir_id
912 && (key->k_objectid > objectid
913 || (key->k_objectid == objectid
914 && (key->u.v1.k_offset
915 | key->u.v1.k_uniqueness) > 0))))
916 break;
917 key++;
918 }
920 #ifdef REISERDEBUG
921 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item);
922 #endif /* REISERDEBUG */
923 INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1;
924 cache = read_tree_node (ffi, DC (cache)[i].dc_block_number, --depth);
925 if (! cache)
926 return 0;
927 }
929 /* cache == LEAF */
930 nr_item = BLOCKHEAD (LEAF)->blk_nr_item;
931 ih = ITEMHEAD;
932 for (i = 0; i < nr_item; i++)
933 {
934 if (ih->ih_key.k_dir_id == dir_id
935 && ih->ih_key.k_objectid == objectid
936 && ih->ih_key.u.v1.k_offset == 0
937 && ih->ih_key.u.v1.k_uniqueness == 0)
938 {
939 #ifdef REISERDEBUG
940 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item);
941 #endif /* REISERDEBUG */
942 INFO->current_ih = ih;
943 INFO->current_item = &LEAF[ih->ih_item_location];
944 return 1;
945 }
946 ih++;
947 }
948 errnum = ERR_FSYS_CORRUPT;
949 return 0;
950 }
952 int
953 reiserfs_read (fsi_file_t *ffi, char *buf, int len)
954 {
955 unsigned int blocksize;
956 unsigned int offset;
957 unsigned int to_read;
958 char *prev_buf = buf;
960 #ifdef REISERDEBUG
961 printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n",
962 filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1);
963 #endif /* REISERDEBUG */
965 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
966 || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1)
967 {
968 search_stat (ffi, INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid);
969 goto get_next_key;
970 }
972 while (! errnum)
973 {
974 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid)
975 break;
977 offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1;
978 blocksize = INFO->current_ih->ih_item_len;
980 #ifdef REISERDEBUG
981 printf (" loop: filepos=%d len=%d, offset=%d blocksize=%d\n",
982 filepos, len, offset, blocksize);
983 #endif /* REISERDEBUG */
985 if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT)
986 && offset < blocksize)
987 {
988 #ifdef REISERDEBUG
989 printf ("direct_read: offset=%d, blocksize=%d\n",
990 offset, blocksize);
991 #endif /* REISERDEBUG */
992 to_read = blocksize - offset;
993 if (to_read > len)
994 to_read = len;
996 if (disk_read_hook != NULL)
997 {
998 disk_read_func = disk_read_hook;
1000 block_read (ffi, INFO->blocks[DISK_LEAF_NODE_LEVEL],
1001 (INFO->current_item - LEAF + offset), to_read, buf);
1003 disk_read_func = NULL;
1005 else
1006 memcpy (buf, INFO->current_item + offset, to_read);
1007 goto update_buf_len;
1009 else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT))
1011 blocksize = (blocksize >> 2) << INFO->fullblocksize_shift;
1012 #ifdef REISERDEBUG
1013 printf ("indirect_read: offset=%d, blocksize=%d\n",
1014 offset, blocksize);
1015 #endif /* REISERDEBUG */
1017 while (offset < blocksize)
1019 __u32 blocknr = ((__u32 *) INFO->current_item)
1020 [offset >> INFO->fullblocksize_shift];
1021 int blk_offset = offset & (INFO->blocksize-1);
1023 to_read = INFO->blocksize - blk_offset;
1024 if (to_read > len)
1025 to_read = len;
1027 disk_read_func = disk_read_hook;
1029 /* Journal is only for meta data. Data blocks can be read
1030 * directly without using block_read
1031 */
1032 devread (ffi, blocknr << INFO->blocksize_shift,
1033 blk_offset, to_read, buf);
1035 disk_read_func = NULL;
1036 update_buf_len:
1037 len -= to_read;
1038 buf += to_read;
1039 offset += to_read;
1040 filepos += to_read;
1041 if (len == 0)
1042 goto done;
1045 get_next_key:
1046 next_key (ffi);
1048 done:
1049 return errnum ? 0 : buf - prev_buf;
1053 /* preconditions: reiserfs_mount already executed, therefore
1054 * INFO block is valid
1055 * returns: 0 if error, nonzero iff we were able to find the file successfully
1056 * postconditions: on a nonzero return, INFO->fileinfo contains the info
1057 * of the file we were trying to look up, filepos is 0 and filemax is
1058 * the size of the file.
1059 */
1060 int
1061 reiserfs_dir (fsi_file_t *ffi, char *dirname)
1063 struct reiserfs_de_head *de_head;
1064 char *rest, ch;
1065 __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
1066 #ifndef STAGE1_5
1067 int do_possibilities = 0;
1068 #endif /* ! STAGE1_5 */
1069 char linkbuf[PATH_MAX]; /* buffer for following symbolic links */
1070 int link_count = 0;
1071 int mode;
1073 dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1074 objectid = REISERFS_ROOT_OBJECTID;
1076 while (1)
1078 #ifdef REISERDEBUG
1079 printf ("dirname=%s\n", dirname);
1080 #endif /* REISERDEBUG */
1082 /* Search for the stat info first. */
1083 if (! search_stat (ffi, dir_id, objectid))
1084 return 0;
1086 #ifdef REISERDEBUG
1087 printf ("sd_mode=%x sd_size=%d\n",
1088 ((struct stat_data *) INFO->current_item)->sd_mode,
1089 ((struct stat_data *) INFO->current_item)->sd_size);
1090 #endif /* REISERDEBUG */
1092 mode = ((struct stat_data *) INFO->current_item)->sd_mode;
1094 /* If we've got a symbolic link, then chase it. */
1095 if (S_ISLNK (mode))
1097 int len;
1098 if (++link_count > MAX_LINK_COUNT)
1100 errnum = ERR_SYMLINK_LOOP;
1101 return 0;
1104 /* Get the symlink size. */
1105 filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1107 /* Find out how long our remaining name is. */
1108 len = 0;
1109 while (dirname[len] && !isspace (dirname[len]))
1110 len++;
1112 if (filemax + len > sizeof (linkbuf) - 1)
1114 errnum = ERR_FILELENGTH;
1115 return 0;
1118 /* Copy the remaining name to the end of the symlink data.
1119 Note that DIRNAME and LINKBUF may overlap! */
1120 grub_memmove (linkbuf + filemax, dirname, len+1);
1122 INFO->fileinfo.k_dir_id = dir_id;
1123 INFO->fileinfo.k_objectid = objectid;
1124 filepos = 0;
1125 if (! next_key (ffi)
1126 || reiserfs_read (ffi, linkbuf, filemax) != filemax)
1128 if (! errnum)
1129 errnum = ERR_FSYS_CORRUPT;
1130 return 0;
1133 #ifdef REISERDEBUG
1134 printf ("symlink=%s\n", linkbuf);
1135 #endif /* REISERDEBUG */
1137 dirname = linkbuf;
1138 if (*dirname == '/')
1140 /* It's an absolute link, so look it up in root. */
1141 dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1142 objectid = REISERFS_ROOT_OBJECTID;
1144 else
1146 /* Relative, so look it up in our parent directory. */
1147 dir_id = parent_dir_id;
1148 objectid = parent_objectid;
1151 /* Now lookup the new name. */
1152 continue;
1155 /* if we have a real file (and we're not just printing possibilities),
1156 then this is where we want to exit */
1158 if (! *dirname || isspace (*dirname))
1160 if (! S_ISREG (mode))
1162 errnum = ERR_BAD_FILETYPE;
1163 return 0;
1166 filepos = 0;
1167 filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1169 /* If this is a new stat data and size is > 4GB set filemax to
1170 * maximum
1171 */
1172 if (INFO->current_ih->ih_version == ITEM_VERSION_2
1173 && ((struct stat_data *) INFO->current_item)->sd_size_hi > 0)
1174 filemax = 0xffffffff;
1176 INFO->fileinfo.k_dir_id = dir_id;
1177 INFO->fileinfo.k_objectid = objectid;
1178 return next_key (ffi);
1181 /* continue with the file/directory name interpretation */
1182 while (*dirname == '/')
1183 dirname++;
1184 if (! S_ISDIR (mode))
1186 errnum = ERR_BAD_FILETYPE;
1187 return 0;
1189 for (rest = dirname; (ch = *rest) && ! isspace (ch) && ch != '/'; rest++);
1190 *rest = 0;
1192 # ifndef STAGE1_5
1193 if (print_possibilities && ch != '/')
1194 do_possibilities = 1;
1195 # endif /* ! STAGE1_5 */
1197 while (1)
1199 char *name_end;
1200 int num_entries;
1202 if (! next_key (ffi))
1203 return 0;
1204 #ifdef REISERDEBUG
1205 printf ("ih: key %d:%d:%d:%d version:%d\n",
1206 INFO->current_ih->ih_key.k_dir_id,
1207 INFO->current_ih->ih_key.k_objectid,
1208 INFO->current_ih->ih_key.u.v1.k_offset,
1209 INFO->current_ih->ih_key.u.v1.k_uniqueness,
1210 INFO->current_ih->ih_version);
1211 #endif /* REISERDEBUG */
1213 if (INFO->current_ih->ih_key.k_objectid != objectid)
1214 break;
1216 name_end = INFO->current_item + INFO->current_ih->ih_item_len;
1217 de_head = (struct reiserfs_de_head *) INFO->current_item;
1218 num_entries = INFO->current_ih->u.ih_entry_count;
1219 while (num_entries > 0)
1221 char *filename = INFO->current_item + de_head->deh_location;
1222 char tmp = *name_end;
1223 if ((de_head->deh_state & DEH_Visible))
1225 int cmp;
1226 /* Directory names in ReiserFS are not null
1227 * terminated. We write a temporary 0 behind it.
1228 * NOTE: that this may overwrite the first block in
1229 * the tree cache. That doesn't hurt as long as we
1230 * don't call next_key () in between.
1231 */
1232 *name_end = 0;
1233 cmp = substring (dirname, filename);
1234 *name_end = tmp;
1235 # ifndef STAGE1_5
1236 if (do_possibilities)
1238 if (cmp <= 0)
1240 if (print_possibilities > 0)
1241 print_possibilities = -print_possibilities;
1242 *name_end = 0;
1243 print_a_completion (filename);
1244 *name_end = tmp;
1247 else
1248 # endif /* ! STAGE1_5 */
1249 if (cmp == 0)
1250 goto found;
1252 /* The beginning of this name marks the end of the next name.
1253 */
1254 name_end = filename;
1255 de_head++;
1256 num_entries--;
1260 # ifndef STAGE1_5
1261 if (print_possibilities < 0)
1262 return 1;
1263 # endif /* ! STAGE1_5 */
1265 errnum = ERR_FILE_NOT_FOUND;
1266 *rest = ch;
1267 return 0;
1269 found:
1271 *rest = ch;
1272 dirname = rest;
1274 parent_dir_id = dir_id;
1275 parent_objectid = objectid;
1276 dir_id = de_head->deh_dir_id;
1277 objectid = de_head->deh_objectid;
1281 int
1282 reiserfs_embed (fsi_file_t *ffi, int *start_sector, int needed_sectors)
1284 struct reiserfs_super_block super;
1285 int num_sectors;
1287 if (! devread (ffi, REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS, 0,
1288 sizeof (struct reiserfs_super_block), (char *) &super))
1289 return 0;
1291 *start_sector = 1; /* reserve first sector for stage1 */
1292 if ((substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1293 || substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1294 || substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) <= 0)
1295 && (/* check that this is not a super block copy inside
1296 * the journal log */
1297 super.s_journal_block * super.s_blocksize
1298 > REISERFS_DISK_OFFSET_IN_BYTES))
1299 num_sectors = (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1300 else
1301 num_sectors = (REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1303 return (needed_sectors <= num_sectors);
1306 fsi_plugin_ops_t *
1307 fsi_init_plugin(int version, fsi_plugin_t *fp, const char **name)
1309 static fsig_plugin_ops_t ops = {
1310 FSIMAGE_PLUGIN_VERSION,
1311 .fpo_mount = reiserfs_mount,
1312 .fpo_dir = reiserfs_dir,
1313 .fpo_read = reiserfs_read
1314 };
1316 *name = "reiserfs";
1317 return (fsig_init(fp, &ops));