Last active
August 29, 2015 13:55
-
-
Save anpage/8748402 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From ed4e8a4bcc0370deeffeae3b5d8fc9feece54ae2 Mon Sep 17 00:00:00 2001 | |
From: Alex Page <[email protected]> | |
Date: Fri, 31 Jan 2014 21:47:22 -0500 | |
Subject: [PATCH] Implemented F2FS for use on /data | |
Based on commit 4776c688c705b46925f9c9e1e6daf09692519fcf of Google's | |
Tegra 3 kernel for the 2012 Nexus 7. | |
Patches thanks to Now Computing's f2fs-backports repo: | |
https://github.com/nowcomputing/f2fs-backports/ | |
--- | |
Documentation/filesystems/00-INDEX | 2 + | |
Documentation/filesystems/f2fs.txt | 502 ++++++++ | |
arch/arm/configs/tegra3_android_defconfig | 3 + | |
fs/Kconfig | 1 + | |
fs/Makefile | 1 + | |
fs/dcache.c | 4 +- | |
fs/f2fs/Kconfig | 65 + | |
fs/f2fs/Makefile | 7 + | |
fs/f2fs/acl.c | 423 +++++++ | |
fs/f2fs/acl.h | 57 + | |
fs/f2fs/checkpoint.c | 860 +++++++++++++ | |
fs/f2fs/data.c | 790 ++++++++++++ | |
fs/f2fs/debug.c | 353 ++++++ | |
fs/f2fs/dir.c | 714 +++++++++++ | |
fs/f2fs/f2fs.h | 1290 ++++++++++++++++++++ | |
fs/f2fs/file.c | 725 +++++++++++ | |
fs/f2fs/gc.c | 738 ++++++++++++ | |
fs/f2fs/gc.h | 110 ++ | |
fs/f2fs/hash.c | 101 ++ | |
fs/f2fs/inode.c | 273 +++++ | |
fs/f2fs/namei.c | 557 +++++++++ | |
fs/f2fs/node.c | 1859 +++++++++++++++++++++++++++++ | |
fs/f2fs/node.h | 345 ++++++ | |
fs/f2fs/recovery.c | 502 ++++++++ | |
fs/f2fs/segment.c | 1787 +++++++++++++++++++++++++++ | |
fs/f2fs/segment.h | 637 ++++++++++ | |
fs/f2fs/super.c | 1154 ++++++++++++++++++ | |
fs/f2fs/xattr.c | 600 ++++++++++ | |
fs/f2fs/xattr.h | 152 +++ | |
include/linux/dcache.h | 1 + | |
include/linux/f2fs_fs.h | 424 +++++++ | |
include/linux/fs.h | 13 + | |
include/linux/magic.h | 1 + | |
include/linux/security.h | 19 +- | |
include/linux/xattr.h | 6 + | |
include/trace/events/f2fs.h | 682 +++++++++++ | |
security/security.c | 33 + | |
37 files changed, 15788 insertions(+), 3 deletions(-) | |
create mode 100644 Documentation/filesystems/f2fs.txt | |
create mode 100644 fs/f2fs/Kconfig | |
create mode 100644 fs/f2fs/Makefile | |
create mode 100644 fs/f2fs/acl.c | |
create mode 100644 fs/f2fs/acl.h | |
create mode 100644 fs/f2fs/checkpoint.c | |
create mode 100644 fs/f2fs/data.c | |
create mode 100644 fs/f2fs/debug.c | |
create mode 100644 fs/f2fs/dir.c | |
create mode 100644 fs/f2fs/f2fs.h | |
create mode 100644 fs/f2fs/file.c | |
create mode 100644 fs/f2fs/gc.c | |
create mode 100644 fs/f2fs/gc.h | |
create mode 100644 fs/f2fs/hash.c | |
create mode 100644 fs/f2fs/inode.c | |
create mode 100644 fs/f2fs/namei.c | |
create mode 100644 fs/f2fs/node.c | |
create mode 100644 fs/f2fs/node.h | |
create mode 100644 fs/f2fs/recovery.c | |
create mode 100644 fs/f2fs/segment.c | |
create mode 100644 fs/f2fs/segment.h | |
create mode 100644 fs/f2fs/super.c | |
create mode 100644 fs/f2fs/xattr.c | |
create mode 100644 fs/f2fs/xattr.h | |
create mode 100644 include/linux/f2fs_fs.h | |
create mode 100644 include/trace/events/f2fs.h | |
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX | |
index 8c624a1..ce5fd46 100644 | |
--- a/Documentation/filesystems/00-INDEX | |
+++ b/Documentation/filesystems/00-INDEX | |
@@ -48,6 +48,8 @@ ext4.txt | |
- info, mount options and specifications for the Ext4 filesystem. | |
files.txt | |
- info on file management in the Linux kernel. | |
+f2fs.txt | |
+ - info and mount options for the F2FS filesystem. | |
fuse.txt | |
- info on the Filesystem in User SpacE including mount options. | |
gfs2.txt | |
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt | |
new file mode 100644 | |
index 0000000..d225139 | |
--- /dev/null | |
+++ b/Documentation/filesystems/f2fs.txt | |
@@ -0,0 +1,502 @@ | |
+================================================================================ | |
+WHAT IS Flash-Friendly File System (F2FS)? | |
+================================================================================ | |
+ | |
+NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have | |
+been equipped on a variety systems ranging from mobile to server systems. Since | |
+they are known to have different characteristics from the conventional rotating | |
+disks, a file system, an upper layer to the storage device, should adapt to the | |
+changes from the sketch in the design level. | |
+ | |
+F2FS is a file system exploiting NAND flash memory-based storage devices, which | |
+is based on Log-structured File System (LFS). The design has been focused on | |
+addressing the fundamental issues in LFS, which are snowball effect of wandering | |
+tree and high cleaning overhead. | |
+ | |
+Since a NAND flash memory-based storage device shows different characteristic | |
+according to its internal geometry or flash memory management scheme, namely FTL, | |
+F2FS and its tools support various parameters not only for configuring on-disk | |
+layout, but also for selecting allocation and cleaning algorithms. | |
+ | |
+The following git tree provides the file system formatting tool (mkfs.f2fs), | |
+a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). | |
+>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git | |
+ | |
+For reporting bugs and sending patches, please use the following mailing list: | |
+>> [email protected] | |
+ | |
+================================================================================ | |
+BACKGROUND AND DESIGN ISSUES | |
+================================================================================ | |
+ | |
+Log-structured File System (LFS) | |
+-------------------------------- | |
+"A log-structured file system writes all modifications to disk sequentially in | |
+a log-like structure, thereby speeding up both file writing and crash recovery. | |
+The log is the only structure on disk; it contains indexing information so that | |
+files can be read back from the log efficiently. In order to maintain large free | |
+areas on disk for fast writing, we divide the log into segments and use a | |
+segment cleaner to compress the live information from heavily fragmented | |
+segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and | |
+implementation of a log-structured file system", ACM Trans. Computer Systems | |
+10, 1, 26–52. | |
+ | |
+Wandering Tree Problem | |
+---------------------- | |
+In LFS, when a file data is updated and written to the end of log, its direct | |
+pointer block is updated due to the changed location. Then the indirect pointer | |
+block is also updated due to the direct pointer block update. In this manner, | |
+the upper index structures such as inode, inode map, and checkpoint block are | |
+also updated recursively. This problem is called as wandering tree problem [1], | |
+and in order to enhance the performance, it should eliminate or relax the update | |
+propagation as much as possible. | |
+ | |
+[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/ | |
+ | |
+Cleaning Overhead | |
+----------------- | |
+Since LFS is based on out-of-place writes, it produces so many obsolete blocks | |
+scattered across the whole storage. In order to serve new empty log space, it | |
+needs to reclaim these obsolete blocks seamlessly to users. This job is called | |
+as a cleaning process. | |
+ | |
+The process consists of three operations as follows. | |
+1. A victim segment is selected through referencing segment usage table. | |
+2. It loads parent index structures of all the data in the victim identified by | |
+ segment summary blocks. | |
+3. It checks the cross-reference between the data and its parent index structure. | |
+4. It moves valid data selectively. | |
+ | |
+This cleaning job may cause unexpected long delays, so the most important goal | |
+is to hide the latencies to users. And also definitely, it should reduce the | |
+amount of valid data to be moved, and move them quickly as well. | |
+ | |
+================================================================================ | |
+KEY FEATURES | |
+================================================================================ | |
+ | |
+Flash Awareness | |
+--------------- | |
+- Enlarge the random write area for better performance, but provide the high | |
+ spatial locality | |
+- Align FS data structures to the operational units in FTL as best efforts | |
+ | |
+Wandering Tree Problem | |
+---------------------- | |
+- Use a term, “node”, that represents inodes as well as various pointer blocks | |
+- Introduce Node Address Table (NAT) containing the locations of all the “node” | |
+ blocks; this will cut off the update propagation. | |
+ | |
+Cleaning Overhead | |
+----------------- | |
+- Support a background cleaning process | |
+- Support greedy and cost-benefit algorithms for victim selection policies | |
+- Support multi-head logs for static/dynamic hot and cold data separation | |
+- Introduce adaptive logging for efficient block allocation | |
+ | |
+================================================================================ | |
+MOUNT OPTIONS | |
+================================================================================ | |
+ | |
+background_gc=%s Turn on/off cleaning operations, namely garbage | |
+ collection, triggered in background when I/O subsystem is | |
+ idle. If background_gc=on, it will turn on the garbage | |
+ collection and if background_gc=off, garbage collection | |
+ will be truned off. | |
+ Default value for this option is on. So garbage | |
+ collection is on by default. | |
+disable_roll_forward Disable the roll-forward recovery routine | |
+discard Issue discard/TRIM commands when a segment is cleaned. | |
+no_heap Disable heap-style segment allocation which finds free | |
+ segments for data from the beginning of main area, while | |
+ for node from the end of main area. | |
+nouser_xattr Disable Extended User Attributes. Note: xattr is enabled | |
+ by default if CONFIG_F2FS_FS_XATTR is selected. | |
+noacl Disable POSIX Access Control List. Note: acl is enabled | |
+ by default if CONFIG_F2FS_FS_POSIX_ACL is selected. | |
+active_logs=%u Support configuring the number of active logs. In the | |
+ current design, f2fs supports only 2, 4, and 6 logs. | |
+ Default number is 6. | |
+disable_ext_identify Disable the extension list configured by mkfs, so f2fs | |
+ does not aware of cold files such as media files. | |
+inline_xattr Enable the inline xattrs feature. | |
+ | |
+================================================================================ | |
+DEBUGFS ENTRIES | |
+================================================================================ | |
+ | |
+/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as | |
+f2fs. Each file shows the whole f2fs information. | |
+ | |
+/sys/kernel/debug/f2fs/status includes: | |
+ - major file system information managed by f2fs currently | |
+ - average SIT information about whole segments | |
+ - current memory footprint consumed by f2fs. | |
+ | |
+================================================================================ | |
+SYSFS ENTRIES | |
+================================================================================ | |
+ | |
+Information about mounted f2f2 file systems can be found in | |
+/sys/fs/f2fs. Each mounted filesystem will have a directory in | |
+/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). | |
+The files in each per-device directory are shown in table below. | |
+ | |
+Files in /sys/fs/f2fs/<devname> | |
+(see also Documentation/ABI/testing/sysfs-fs-f2fs) | |
+.............................................................................. | |
+ File Content | |
+ | |
+ gc_max_sleep_time This tuning parameter controls the maximum sleep | |
+ time for the garbage collection thread. Time is | |
+ in milliseconds. | |
+ | |
+ gc_min_sleep_time This tuning parameter controls the minimum sleep | |
+ time for the garbage collection thread. Time is | |
+ in milliseconds. | |
+ | |
+ gc_no_gc_sleep_time This tuning parameter controls the default sleep | |
+ time for the garbage collection thread. Time is | |
+ in milliseconds. | |
+ | |
+ gc_idle This parameter controls the selection of victim | |
+ policy for garbage collection. Setting gc_idle = 0 | |
+ (default) will disable this option. Setting | |
+ gc_idle = 1 will select the Cost Benefit approach | |
+ & setting gc_idle = 2 will select the greedy aproach. | |
+ | |
+ reclaim_segments This parameter controls the number of prefree | |
+ segments to be reclaimed. If the number of prefree | |
+ segments is larger than this number, f2fs tries to | |
+ conduct checkpoint to reclaim the prefree segments | |
+ to free segments. By default, 100 segments, 200MB. | |
+ | |
+================================================================================ | |
+USAGE | |
+================================================================================ | |
+ | |
+1. Download userland tools and compile them. | |
+ | |
+2. Skip, if f2fs was compiled statically inside kernel. | |
+ Otherwise, insert the f2fs.ko module. | |
+ # insmod f2fs.ko | |
+ | |
+3. Create a directory trying to mount | |
+ # mkdir /mnt/f2fs | |
+ | |
+4. Format the block device, and then mount as f2fs | |
+ # mkfs.f2fs -l label /dev/block_device | |
+ # mount -t f2fs /dev/block_device /mnt/f2fs | |
+ | |
+mkfs.f2fs | |
+--------- | |
+The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, | |
+which builds a basic on-disk layout. | |
+ | |
+The options consist of: | |
+-l [label] : Give a volume label, up to 512 unicode name. | |
+-a [0 or 1] : Split start location of each area for heap-based allocation. | |
+ 1 is set by default, which performs this. | |
+-o [int] : Set overprovision ratio in percent over volume size. | |
+ 5 is set by default. | |
+-s [int] : Set the number of segments per section. | |
+ 1 is set by default. | |
+-z [int] : Set the number of sections per zone. | |
+ 1 is set by default. | |
+-e [str] : Set basic extension list. e.g. "mp3,gif,mov" | |
+-t [0 or 1] : Disable discard command or not. | |
+ 1 is set by default, which conducts discard. | |
+ | |
+fsck.f2fs | |
+--------- | |
+The fsck.f2fs is a tool to check the consistency of an f2fs-formatted | |
+partition, which examines whether the filesystem metadata and user-made data | |
+are cross-referenced correctly or not. | |
+Note that, initial version of the tool does not fix any inconsistency. | |
+ | |
+The options consist of: | |
+ -d debug level [default:0] | |
+ | |
+dump.f2fs | |
+--------- | |
+The dump.f2fs shows the information of specific inode and dumps SSA and SIT to | |
+file. Each file is dump_ssa and dump_sit. | |
+ | |
+The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. | |
+It shows on-disk inode information reconized by a given inode number, and is | |
+able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and | |
+./dump_sit respectively. | |
+ | |
+The options consist of: | |
+ -d debug level [default:0] | |
+ -i inode no (hex) | |
+ -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] | |
+ -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] | |
+ | |
+Examples: | |
+# dump.f2fs -i [ino] /dev/sdx | |
+# dump.f2fs -s 0~-1 /dev/sdx (SIT dump) | |
+# dump.f2fs -a 0~-1 /dev/sdx (SSA dump) | |
+ | |
+================================================================================ | |
+DESIGN | |
+================================================================================ | |
+ | |
+On-disk Layout | |
+-------------- | |
+ | |
+F2FS divides the whole volume into a number of segments, each of which is fixed | |
+to 2MB in size. A section is composed of consecutive segments, and a zone | |
+consists of a set of sections. By default, section and zone sizes are set to one | |
+segment size identically, but users can easily modify the sizes by mkfs. | |
+ | |
+F2FS splits the entire volume into six areas, and all the areas except superblock | |
+consists of multiple segments as described below. | |
+ | |
+ align with the zone size <-| | |
+ |-> align with the segment size | |
+ _________________________________________________________________________ | |
+ | | | Segment | Node | Segment | | | |
+ | Superblock | Checkpoint | Info. | Address | Summary | Main | | |
+ | (SB) | (CP) | Table (SIT) | Table (NAT) | Area (SSA) | | | |
+ |____________|_____2______|______N______|______N______|______N_____|__N___| | |
+ . . | |
+ . . | |
+ . . | |
+ ._________________________________________. | |
+ |_Segment_|_..._|_Segment_|_..._|_Segment_| | |
+ . . | |
+ ._________._________ | |
+ |_section_|__...__|_ | |
+ . . | |
+ .________. | |
+ |__zone__| | |
+ | |
+- Superblock (SB) | |
+ : It is located at the beginning of the partition, and there exist two copies | |
+ to avoid file system crash. It contains basic partition information and some | |
+ default parameters of f2fs. | |
+ | |
+- Checkpoint (CP) | |
+ : It contains file system information, bitmaps for valid NAT/SIT sets, orphan | |
+ inode lists, and summary entries of current active segments. | |
+ | |
+- Segment Information Table (SIT) | |
+ : It contains segment information such as valid block count and bitmap for the | |
+ validity of all the blocks. | |
+ | |
+- Node Address Table (NAT) | |
+ : It is composed of a block address table for all the node blocks stored in | |
+ Main area. | |
+ | |
+- Segment Summary Area (SSA) | |
+ : It contains summary entries which contains the owner information of all the | |
+ data and node blocks stored in Main area. | |
+ | |
+- Main Area | |
+ : It contains file and directory data including their indices. | |
+ | |
+In order to avoid misalignment between file system and flash-based storage, F2FS | |
+aligns the start block address of CP with the segment size. Also, it aligns the | |
+start block address of Main area with the zone size by reserving some segments | |
+in SSA area. | |
+ | |
+Reference the following survey for additional technical details. | |
+https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey | |
+ | |
+File System Metadata Structure | |
+------------------------------ | |
+ | |
+F2FS adopts the checkpointing scheme to maintain file system consistency. At | |
+mount time, F2FS first tries to find the last valid checkpoint data by scanning | |
+CP area. In order to reduce the scanning time, F2FS uses only two copies of CP. | |
+One of them always indicates the last valid data, which is called as shadow copy | |
+mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism. | |
+ | |
+For file system consistency, each CP points to which NAT and SIT copies are | |
+valid, as shown as below. | |
+ | |
+ +--------+----------+---------+ | |
+ | CP | SIT | NAT | | |
+ +--------+----------+---------+ | |
+ . . . . | |
+ . . . . | |
+ . . . . | |
+ +-------+-------+--------+--------+--------+--------+ | |
+ | CP #0 | CP #1 | SIT #0 | SIT #1 | NAT #0 | NAT #1 | | |
+ +-------+-------+--------+--------+--------+--------+ | |
+ | ^ ^ | |
+ | | | | |
+ `----------------------------------------' | |
+ | |
+Index Structure | |
+--------------- | |
+ | |
+The key data structure to manage the data locations is a "node". Similar to | |
+traditional file structures, F2FS has three types of node: inode, direct node, | |
+indirect node. F2FS assigns 4KB to an inode block which contains 923 data block | |
+indices, two direct node pointers, two indirect node pointers, and one double | |
+indirect node pointer as described below. One direct node block contains 1018 | |
+data blocks, and one indirect node block contains also 1018 node blocks. Thus, | |
+one inode block (i.e., a file) covers: | |
+ | |
+ 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB. | |
+ | |
+ Inode block (4KB) | |
+ |- data (923) | |
+ |- direct node (2) | |
+ | `- data (1018) | |
+ |- indirect node (2) | |
+ | `- direct node (1018) | |
+ | `- data (1018) | |
+ `- double indirect node (1) | |
+ `- indirect node (1018) | |
+ `- direct node (1018) | |
+ `- data (1018) | |
+ | |
+Note that, all the node blocks are mapped by NAT which means the location of | |
+each node is translated by the NAT table. In the consideration of the wandering | |
+tree problem, F2FS is able to cut off the propagation of node updates caused by | |
+leaf data writes. | |
+ | |
+Directory Structure | |
+------------------- | |
+ | |
+A directory entry occupies 11 bytes, which consists of the following attributes. | |
+ | |
+- hash hash value of the file name | |
+- ino inode number | |
+- len the length of file name | |
+- type file type such as directory, symlink, etc | |
+ | |
+A dentry block consists of 214 dentry slots and file names. Therein a bitmap is | |
+used to represent whether each dentry is valid or not. A dentry block occupies | |
+4KB with the following composition. | |
+ | |
+ Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) + | |
+ dentries(11 * 214 bytes) + file name (8 * 214 bytes) | |
+ | |
+ [Bucket] | |
+ +--------------------------------+ | |
+ |dentry block 1 | dentry block 2 | | |
+ +--------------------------------+ | |
+ . . | |
+ . . | |
+ . [Dentry Block Structure: 4KB] . | |
+ +--------+----------+----------+------------+ | |
+ | bitmap | reserved | dentries | file names | | |
+ +--------+----------+----------+------------+ | |
+ [Dentry Block: 4KB] . . | |
+ . . | |
+ . . | |
+ +------+------+-----+------+ | |
+ | hash | ino | len | type | | |
+ +------+------+-----+------+ | |
+ [Dentry Structure: 11 bytes] | |
+ | |
+F2FS implements multi-level hash tables for directory structure. Each level has | |
+a hash table with dedicated number of hash buckets as shown below. Note that | |
+"A(2B)" means a bucket includes 2 data blocks. | |
+ | |
+---------------------- | |
+A : bucket | |
+B : block | |
+N : MAX_DIR_HASH_DEPTH | |
+---------------------- | |
+ | |
+level #0 | A(2B) | |
+ | | |
+level #1 | A(2B) - A(2B) | |
+ | | |
+level #2 | A(2B) - A(2B) - A(2B) - A(2B) | |
+ . | . . . . | |
+level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) | |
+ . | . . . . | |
+level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) | |
+ | |
+The number of blocks and buckets are determined by, | |
+ | |
+ ,- 2, if n < MAX_DIR_HASH_DEPTH / 2, | |
+ # of blocks in level #n = | | |
+ `- 4, Otherwise | |
+ | |
+ ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, | |
+ # of buckets in level #n = | | |
+ `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise | |
+ | |
+When F2FS finds a file name in a directory, at first a hash value of the file | |
+name is calculated. Then, F2FS scans the hash table in level #0 to find the | |
+dentry consisting of the file name and its inode number. If not found, F2FS | |
+scans the next hash table in level #1. In this way, F2FS scans hash tables in | |
+each levels incrementally from 1 to N. In each levels F2FS needs to scan only | |
+one bucket determined by the following equation, which shows O(log(# of files)) | |
+complexity. | |
+ | |
+ bucket number to scan in level #n = (hash value) % (# of buckets in level #n) | |
+ | |
+In the case of file creation, F2FS finds empty consecutive slots that cover the | |
+file name. F2FS searches the empty slots in the hash tables of whole levels from | |
+1 to N in the same way as the lookup operation. | |
+ | |
+The following figure shows an example of two cases holding children. | |
+ --------------> Dir <-------------- | |
+ | | | |
+ child child | |
+ | |
+ child - child [hole] - child | |
+ | |
+ child - child - child [hole] - [hole] - child | |
+ | |
+ Case 1: Case 2: | |
+ Number of children = 6, Number of children = 3, | |
+ File size = 7 File size = 7 | |
+ | |
+Default Block Allocation | |
+------------------------ | |
+ | |
+At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node | |
+and Hot/Warm/Cold data. | |
+ | |
+- Hot node contains direct node blocks of directories. | |
+- Warm node contains direct node blocks except hot node blocks. | |
+- Cold node contains indirect node blocks | |
+- Hot data contains dentry blocks | |
+- Warm data contains data blocks except hot and cold data blocks | |
+- Cold data contains multimedia data or migrated data blocks | |
+ | |
+LFS has two schemes for free space management: threaded log and copy-and-compac- | |
+tion. The copy-and-compaction scheme which is known as cleaning, is well-suited | |
+for devices showing very good sequential write performance, since free segments | |
+are served all the time for writing new data. However, it suffers from cleaning | |
+overhead under high utilization. Contrarily, the threaded log scheme suffers | |
+from random writes, but no cleaning process is needed. F2FS adopts a hybrid | |
+scheme where the copy-and-compaction scheme is adopted by default, but the | |
+policy is dynamically changed to the threaded log scheme according to the file | |
+system status. | |
+ | |
+In order to align F2FS with underlying flash-based storage, F2FS allocates a | |
+segment in a unit of section. F2FS expects that the section size would be the | |
+same as the unit size of garbage collection in FTL. Furthermore, with respect | |
+to the mapping granularity in FTL, F2FS allocates each section of the active | |
+logs from different zones as much as possible, since FTL can write the data in | |
+the active logs into one allocation unit according to its mapping granularity. | |
+ | |
+Cleaning process | |
+---------------- | |
+ | |
+F2FS does cleaning both on demand and in the background. On-demand cleaning is | |
+triggered when there are not enough free segments to serve VFS calls. Background | |
+cleaner is operated by a kernel thread, and triggers the cleaning job when the | |
+system is idle. | |
+ | |
+F2FS supports two victim selection policies: greedy and cost-benefit algorithms. | |
+In the greedy algorithm, F2FS selects a victim segment having the smallest number | |
+of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment | |
+according to the segment age and the number of valid blocks in order to address | |
+log block thrashing problem in the greedy algorithm. F2FS adopts the greedy | |
+algorithm for on-demand cleaner, while background cleaner adopts cost-benefit | |
+algorithm. | |
+ | |
+In order to identify whether the data in the victim segment are valid or not, | |
+F2FS manages a bitmap. Each bit represents the validity of a block, and the | |
+bitmap is composed of a bit stream covering whole blocks in main area. | |
\ No newline at end of file | |
diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig | |
index ff5dde7..6d12133 100644 | |
--- a/arch/arm/configs/tegra3_android_defconfig | |
+++ b/arch/arm/configs/tegra3_android_defconfig | |
@@ -484,6 +484,9 @@ CONFIG_FUSE_FS=y | |
CONFIG_VFAT_FS=y | |
CONFIG_NTFS_FS=y | |
CONFIG_TMPFS=y | |
+CONFIG_F2FS_FS=y | |
+CONFIG_F2FS_FS_XATTR=y | |
+CONFIG_F2FS_FS_SECURITY=y | |
CONFIG_NFS_FS=y | |
CONFIG_ROOT_NFS=y | |
CONFIG_PARTITION_ADVANCED=y | |
diff --git a/fs/Kconfig b/fs/Kconfig | |
index 99453ba..47fe939 100644 | |
--- a/fs/Kconfig | |
+++ b/fs/Kconfig | |
@@ -219,6 +219,7 @@ source "fs/pstore/Kconfig" | |
source "fs/sysv/Kconfig" | |
source "fs/ufs/Kconfig" | |
source "fs/exofs/Kconfig" | |
+source "fs/f2fs/Kconfig" | |
endif # MISC_FILESYSTEMS | |
diff --git a/fs/Makefile b/fs/Makefile | |
index a8bbb32..974efc2 100644 | |
--- a/fs/Makefile | |
+++ b/fs/Makefile | |
@@ -120,6 +120,7 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ | |
obj-$(CONFIG_OCFS2_FS) += ocfs2/ | |
obj-$(CONFIG_BTRFS_FS) += btrfs/ | |
obj-$(CONFIG_GFS2_FS) += gfs2/ | |
+obj-$(CONFIG_F2FS_FS) += f2fs/ | |
obj-$(CONFIG_EXOFS_FS) += exofs/ | |
obj-$(CONFIG_CEPH_FS) += ceph/ | |
obj-$(CONFIG_PSTORE) += pstore/ | |
diff --git a/fs/dcache.c b/fs/dcache.c | |
index 8b732a2..239f5e6 100644 | |
--- a/fs/dcache.c | |
+++ b/fs/dcache.c | |
@@ -1438,7 +1438,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) | |
return alias; | |
} | |
-static struct dentry * d_find_any_alias(struct inode *inode) | |
+struct dentry * d_find_any_alias(struct inode *inode) | |
{ | |
struct dentry *de; | |
@@ -1447,7 +1447,7 @@ static struct dentry * d_find_any_alias(struct inode *inode) | |
spin_unlock(&inode->i_lock); | |
return de; | |
} | |
- | |
+EXPORT_SYMBOL(d_find_any_alias); | |
/** | |
* d_obtain_alias - find or allocate a dentry for a given inode | |
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig | |
new file mode 100644 | |
index 0000000..e06e099 | |
--- /dev/null | |
+++ b/fs/f2fs/Kconfig | |
@@ -0,0 +1,65 @@ | |
+config F2FS_FS | |
+ tristate "F2FS filesystem support (EXPERIMENTAL)" | |
+ depends on BLOCK | |
+ help | |
+ F2FS is based on Log-structured File System (LFS), which supports | |
+ versatile "flash-friendly" features. The design has been focused on | |
+ addressing the fundamental issues in LFS, which are snowball effect | |
+ of wandering tree and high cleaning overhead. | |
+ | |
+ Since flash-based storages show different characteristics according to | |
+ the internal geometry or flash memory management schemes aka FTL, F2FS | |
+ and tools support various parameters not only for configuring on-disk | |
+ layout, but also for selecting allocation and cleaning algorithms. | |
+ | |
+ If unsure, say N. | |
+ | |
+config F2FS_STAT_FS | |
+ bool "F2FS Status Information" | |
+ depends on F2FS_FS && DEBUG_FS | |
+ default y | |
+ help | |
+ /sys/kernel/debug/f2fs/ contains information about all the partitions | |
+ mounted as f2fs. Each file shows the whole f2fs information. | |
+ | |
+ /sys/kernel/debug/f2fs/status includes: | |
+ - major file system information managed by f2fs currently | |
+ - average SIT information about whole segments | |
+ - current memory footprint consumed by f2fs. | |
+ | |
+config F2FS_FS_XATTR | |
+ bool "F2FS extended attributes" | |
+ depends on F2FS_FS | |
+ default y | |
+ help | |
+ Extended attributes are name:value pairs associated with inodes by | |
+ the kernel or by users (see the attr(5) manual page, or visit | |
+ <http://acl.bestbits.at/> for details). | |
+ | |
+ If unsure, say N. | |
+ | |
+config F2FS_FS_POSIX_ACL | |
+ bool "F2FS Access Control Lists" | |
+ depends on F2FS_FS_XATTR | |
+ select FS_POSIX_ACL | |
+ default y | |
+ help | |
+ Posix Access Control Lists (ACLs) support permissions for users and | |
+ gourps beyond the owner/group/world scheme. | |
+ | |
+ To learn more about Access Control Lists, visit the POSIX ACLs for | |
+ Linux website <http://acl.bestbits.at/>. | |
+ | |
+ If you don't know what Access Control Lists are, say N | |
+ | |
+config F2FS_FS_SECURITY | |
+ bool "F2FS Security Labels" | |
+ depends on F2FS_FS_XATTR | |
+ help | |
+ Security labels provide an access control facility to support Linux | |
+ Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO | |
+ Linux. This option enables an extended attribute handler for file | |
+ security labels in the f2fs filesystem, so that it requires enabling | |
+ the extended attribute support in advance. | |
+ | |
+ If you are not using a security module, say N. | |
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile | |
new file mode 100644 | |
index 0000000..27a0820 | |
--- /dev/null | |
+++ b/fs/f2fs/Makefile | |
@@ -0,0 +1,7 @@ | |
+obj-$(CONFIG_F2FS_FS) += f2fs.o | |
+ | |
+f2fs-y := dir.o file.o inode.o namei.o hash.o super.o | |
+f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o | |
+f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o | |
+f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o | |
+f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o | |
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c | |
new file mode 100644 | |
index 0000000..b20ced3 | |
--- /dev/null | |
+++ b/fs/f2fs/acl.c | |
@@ -0,0 +1,423 @@ | |
+/* | |
+ * fs/f2fs/acl.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * Portions of this code from linux/fs/ext2/acl.c | |
+ * | |
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <[email protected]> | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/f2fs_fs.h> | |
+#include "f2fs.h" | |
+#include "xattr.h" | |
+#include "acl.h" | |
+ | |
+#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ | |
+ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) | |
+ | |
+static inline size_t f2fs_acl_size(int count) | |
+{ | |
+ if (count <= 4) { | |
+ return sizeof(struct f2fs_acl_header) + | |
+ count * sizeof(struct f2fs_acl_entry_short); | |
+ } else { | |
+ return sizeof(struct f2fs_acl_header) + | |
+ 4 * sizeof(struct f2fs_acl_entry_short) + | |
+ (count - 4) * sizeof(struct f2fs_acl_entry); | |
+ } | |
+} | |
+ | |
+static inline int f2fs_acl_count(size_t size) | |
+{ | |
+ ssize_t s; | |
+ size -= sizeof(struct f2fs_acl_header); | |
+ s = size - 4 * sizeof(struct f2fs_acl_entry_short); | |
+ if (s < 0) { | |
+ if (size % sizeof(struct f2fs_acl_entry_short)) | |
+ return -1; | |
+ return size / sizeof(struct f2fs_acl_entry_short); | |
+ } else { | |
+ if (s % sizeof(struct f2fs_acl_entry)) | |
+ return -1; | |
+ return s / sizeof(struct f2fs_acl_entry) + 4; | |
+ } | |
+} | |
+ | |
+static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) | |
+{ | |
+ int i, count; | |
+ struct posix_acl *acl; | |
+ struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value; | |
+ struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); | |
+ const char *end = value + size; | |
+ | |
+ if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) | |
+ return ERR_PTR(-EINVAL); | |
+ | |
+ count = f2fs_acl_count(size); | |
+ if (count < 0) | |
+ return ERR_PTR(-EINVAL); | |
+ if (count == 0) | |
+ return NULL; | |
+ | |
+ acl = posix_acl_alloc(count, GFP_KERNEL); | |
+ if (!acl) | |
+ return ERR_PTR(-ENOMEM); | |
+ | |
+ for (i = 0; i < count; i++) { | |
+ | |
+ if ((char *)entry > end) | |
+ goto fail; | |
+ | |
+ acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag); | |
+ acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm); | |
+ | |
+ switch (acl->a_entries[i].e_tag) { | |
+ case ACL_USER_OBJ: | |
+ case ACL_GROUP_OBJ: | |
+ case ACL_MASK: | |
+ case ACL_OTHER: | |
+ entry = (struct f2fs_acl_entry *)((char *)entry + | |
+ sizeof(struct f2fs_acl_entry_short)); | |
+ break; | |
+ | |
+ case ACL_USER: | |
+ case ACL_GROUP: | |
+ acl->a_entries[i].e_id = le32_to_cpu(entry->e_id); | |
+ entry = (struct f2fs_acl_entry *)((char *)entry + | |
+ sizeof(struct f2fs_acl_entry)); | |
+ break; | |
+ default: | |
+ goto fail; | |
+ } | |
+ } | |
+ if ((char *)entry != end) | |
+ goto fail; | |
+ return acl; | |
+fail: | |
+ posix_acl_release(acl); | |
+ return ERR_PTR(-EINVAL); | |
+} | |
+ | |
+static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) | |
+{ | |
+ struct f2fs_acl_header *f2fs_acl; | |
+ struct f2fs_acl_entry *entry; | |
+ int i; | |
+ | |
+ f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * | |
+ sizeof(struct f2fs_acl_entry), GFP_KERNEL); | |
+ if (!f2fs_acl) | |
+ return ERR_PTR(-ENOMEM); | |
+ | |
+ f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION); | |
+ entry = (struct f2fs_acl_entry *)(f2fs_acl + 1); | |
+ | |
+ for (i = 0; i < acl->a_count; i++) { | |
+ | |
+ entry->e_tag = cpu_to_le16(acl->a_entries[i].e_tag); | |
+ entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm); | |
+ | |
+ switch (acl->a_entries[i].e_tag) { | |
+ case ACL_USER: | |
+ case ACL_GROUP: | |
+ entry->e_id = cpu_to_le32(acl->a_entries[i].e_id); | |
+ entry = (struct f2fs_acl_entry *)((char *)entry + | |
+ sizeof(struct f2fs_acl_entry)); | |
+ break; | |
+ case ACL_USER_OBJ: | |
+ case ACL_GROUP_OBJ: | |
+ case ACL_MASK: | |
+ case ACL_OTHER: | |
+ entry = (struct f2fs_acl_entry *)((char *)entry + | |
+ sizeof(struct f2fs_acl_entry_short)); | |
+ break; | |
+ default: | |
+ goto fail; | |
+ } | |
+ } | |
+ *size = f2fs_acl_size(acl->a_count); | |
+ return (void *)f2fs_acl; | |
+ | |
+fail: | |
+ kfree(f2fs_acl); | |
+ return ERR_PTR(-EINVAL); | |
+} | |
+ | |
+struct posix_acl *f2fs_get_acl(struct inode *inode, int type) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; | |
+ void *value = NULL; | |
+ struct posix_acl *acl; | |
+ int retval; | |
+ | |
+ if (!test_opt(sbi, POSIX_ACL)) | |
+ return NULL; | |
+ | |
+ acl = get_cached_acl(inode, type); | |
+ if (acl != ACL_NOT_CACHED) | |
+ return acl; | |
+ | |
+ if (type == ACL_TYPE_ACCESS) | |
+ name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; | |
+ | |
+ retval = f2fs_getxattr(inode, name_index, "", NULL, 0); | |
+ if (retval > 0) { | |
+ value = kmalloc(retval, GFP_KERNEL); | |
+ if (!value) | |
+ return ERR_PTR(-ENOMEM); | |
+ retval = f2fs_getxattr(inode, name_index, "", value, retval); | |
+ } | |
+ | |
+ if (retval > 0) | |
+ acl = f2fs_acl_from_disk(value, retval); | |
+ else if (retval == -ENODATA) | |
+ acl = NULL; | |
+ else | |
+ acl = ERR_PTR(retval); | |
+ kfree(value); | |
+ | |
+ if (!IS_ERR(acl)) | |
+ set_cached_acl(inode, type, acl); | |
+ | |
+ return acl; | |
+} | |
+ | |
+static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct f2fs_inode_info *fi = F2FS_I(inode); | |
+ int name_index; | |
+ void *value = NULL; | |
+ size_t size = 0; | |
+ int error; | |
+ | |
+ if (!test_opt(sbi, POSIX_ACL)) | |
+ return 0; | |
+ if (S_ISLNK(inode->i_mode)) | |
+ return -EOPNOTSUPP; | |
+ | |
+ switch (type) { | |
+ case ACL_TYPE_ACCESS: | |
+ name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; | |
+ if (acl) { | |
+ error = posix_acl_equiv_mode(acl, &inode->i_mode); | |
+ if (error < 0) | |
+ return error; | |
+ set_acl_inode(fi, inode->i_mode); | |
+ if (error == 0) | |
+ acl = NULL; | |
+ } | |
+ break; | |
+ | |
+ case ACL_TYPE_DEFAULT: | |
+ name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; | |
+ if (!S_ISDIR(inode->i_mode)) | |
+ return acl ? -EACCES : 0; | |
+ break; | |
+ | |
+ default: | |
+ return -EINVAL; | |
+ } | |
+ | |
+ if (acl) { | |
+ value = f2fs_acl_to_disk(acl, &size); | |
+ if (IS_ERR(value)) { | |
+ cond_clear_inode_flag(fi, FI_ACL_MODE); | |
+ return (int)PTR_ERR(value); | |
+ } | |
+ } | |
+ | |
+ error = f2fs_setxattr(inode, name_index, "", value, size, NULL); | |
+ | |
+ kfree(value); | |
+ if (!error) | |
+ set_cached_acl(inode, type, acl); | |
+ | |
+ cond_clear_inode_flag(fi, FI_ACL_MODE); | |
+ return error; | |
+} | |
+ | |
+int f2fs_init_acl(struct inode *inode, struct inode *dir) | |
+{ | |
+ struct posix_acl *acl = NULL; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | |
+ int error = 0; | |
+ | |
+ if (!S_ISLNK(inode->i_mode)) { | |
+ if (test_opt(sbi, POSIX_ACL)) { | |
+ acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT); | |
+ if (IS_ERR(acl)) | |
+ return PTR_ERR(acl); | |
+ } | |
+ if (!acl && !(test_opt(sbi, ANDROID_EMU) && | |
+ F2FS_I(inode)->i_advise & FADVISE_ANDROID_EMU)) | |
+ inode->i_mode &= ~current_umask(); | |
+ } | |
+ | |
+ if (test_opt(sbi, POSIX_ACL) && acl) { | |
+ | |
+ if (S_ISDIR(inode->i_mode)) { | |
+ error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl); | |
+ if (error) | |
+ goto cleanup; | |
+ } | |
+ error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); | |
+ if (error < 0) | |
+ return error; | |
+ if (error > 0) | |
+ error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); | |
+ } | |
+cleanup: | |
+ posix_acl_release(acl); | |
+ return error; | |
+} | |
+ | |
+int f2fs_acl_chmod(struct inode *inode) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct posix_acl *acl; | |
+ int error; | |
+ mode_t mode = get_inode_mode(inode); | |
+ | |
+ if (!test_opt(sbi, POSIX_ACL)) | |
+ return 0; | |
+ if (S_ISLNK(mode)) | |
+ return -EOPNOTSUPP; | |
+ | |
+ acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS); | |
+ if (IS_ERR(acl) || !acl) | |
+ return PTR_ERR(acl); | |
+ | |
+ error = posix_acl_chmod(&acl, GFP_KERNEL, mode); | |
+ if (error) | |
+ return error; | |
+ error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); | |
+ posix_acl_release(acl); | |
+ return error; | |
+} | |
+ | |
+int f2fs_android_emu(struct f2fs_sb_info *sbi, struct inode *inode, | |
+ u32 *uid, u32 *gid, umode_t *mode) | |
+{ | |
+ F2FS_I(inode)->i_advise |= FADVISE_ANDROID_EMU; | |
+ | |
+ if (uid) | |
+ *uid = sbi->android_emu_uid; | |
+ if (gid) | |
+ *gid = sbi->android_emu_gid; | |
+ if (mode) { | |
+ *mode = (*mode & ~S_IRWXUGO) | sbi->android_emu_mode; | |
+ if (F2FS_I(inode)->i_advise & FADVISE_ANDROID_EMU_ROOT) | |
+ *mode &= ~S_IRWXO; | |
+ if (S_ISDIR(*mode)) { | |
+ if (*mode & S_IRUSR) | |
+ *mode |= S_IXUSR; | |
+ if (*mode & S_IRGRP) | |
+ *mode |= S_IXGRP; | |
+ if (*mode & S_IROTH) | |
+ *mode |= S_IXOTH; | |
+ } | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list, | |
+ size_t list_size, const char *name, size_t name_len, int type) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); | |
+ const char *xname = POSIX_ACL_XATTR_DEFAULT; | |
+ size_t size; | |
+ | |
+ if (!test_opt(sbi, POSIX_ACL)) | |
+ return 0; | |
+ | |
+ if (type == ACL_TYPE_ACCESS) | |
+ xname = POSIX_ACL_XATTR_ACCESS; | |
+ | |
+ size = strlen(xname) + 1; | |
+ if (list && size <= list_size) | |
+ memcpy(list, xname, size); | |
+ return size; | |
+} | |
+ | |
+static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name, | |
+ void *buffer, size_t size, int type) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); | |
+ struct posix_acl *acl; | |
+ int error; | |
+ | |
+ if (strcmp(name, "") != 0) | |
+ return -EINVAL; | |
+ if (!test_opt(sbi, POSIX_ACL)) | |
+ return -EOPNOTSUPP; | |
+ | |
+ acl = f2fs_get_acl(dentry->d_inode, type); | |
+ if (IS_ERR(acl)) | |
+ return PTR_ERR(acl); | |
+ if (!acl) | |
+ return -ENODATA; | |
+ error = posix_acl_to_xattr(acl, buffer, size); | |
+ posix_acl_release(acl); | |
+ | |
+ return error; | |
+} | |
+ | |
+static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, | |
+ const void *value, size_t size, int flags, int type) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); | |
+ struct inode *inode = dentry->d_inode; | |
+ struct posix_acl *acl = NULL; | |
+ int error; | |
+ | |
+ if (strcmp(name, "") != 0) | |
+ return -EINVAL; | |
+ if (!test_opt(sbi, POSIX_ACL)) | |
+ return -EOPNOTSUPP; | |
+ if (!inode_owner_or_capable(inode)) | |
+ return -EPERM; | |
+ | |
+ if (value) { | |
+ acl = posix_acl_from_xattr(value, size); | |
+ if (IS_ERR(acl)) | |
+ return PTR_ERR(acl); | |
+ if (acl) { | |
+ error = posix_acl_valid(acl); | |
+ if (error) | |
+ goto release_and_out; | |
+ } | |
+ } else { | |
+ acl = NULL; | |
+ } | |
+ | |
+ error = f2fs_set_acl(inode, type, acl); | |
+ | |
+release_and_out: | |
+ posix_acl_release(acl); | |
+ return error; | |
+} | |
+ | |
+const struct xattr_handler f2fs_xattr_acl_default_handler = { | |
+ .prefix = POSIX_ACL_XATTR_DEFAULT, | |
+ .flags = ACL_TYPE_DEFAULT, | |
+ .list = f2fs_xattr_list_acl, | |
+ .get = f2fs_xattr_get_acl, | |
+ .set = f2fs_xattr_set_acl, | |
+}; | |
+ | |
+const struct xattr_handler f2fs_xattr_acl_access_handler = { | |
+ .prefix = POSIX_ACL_XATTR_ACCESS, | |
+ .flags = ACL_TYPE_ACCESS, | |
+ .list = f2fs_xattr_list_acl, | |
+ .get = f2fs_xattr_get_acl, | |
+ .set = f2fs_xattr_set_acl, | |
+}; | |
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h | |
new file mode 100644 | |
index 0000000..80f4306 | |
--- /dev/null | |
+++ b/fs/f2fs/acl.h | |
@@ -0,0 +1,57 @@ | |
+/* | |
+ * fs/f2fs/acl.h | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * Portions of this code from linux/fs/ext2/acl.h | |
+ * | |
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <[email protected]> | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#ifndef __F2FS_ACL_H__ | |
+#define __F2FS_ACL_H__ | |
+ | |
+#include <linux/posix_acl_xattr.h> | |
+ | |
+#define F2FS_ACL_VERSION 0x0001 | |
+ | |
+struct f2fs_acl_entry { | |
+ __le16 e_tag; | |
+ __le16 e_perm; | |
+ __le32 e_id; | |
+}; | |
+ | |
+struct f2fs_acl_entry_short { | |
+ __le16 e_tag; | |
+ __le16 e_perm; | |
+}; | |
+ | |
+struct f2fs_acl_header { | |
+ __le32 a_version; | |
+}; | |
+ | |
+#ifdef CONFIG_F2FS_FS_POSIX_ACL | |
+ | |
+extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type); | |
+extern int f2fs_acl_chmod(struct inode *inode); | |
+extern int f2fs_init_acl(struct inode *inode, struct inode *dir); | |
+#else | |
+#define f2fs_check_acl NULL | |
+#define f2fs_get_acl NULL | |
+#define f2fs_set_acl NULL | |
+ | |
+static inline int f2fs_acl_chmod(struct inode *inode) | |
+{ | |
+ return 0; | |
+} | |
+ | |
+static inline int f2fs_init_acl(struct inode *inode, struct inode *dir) | |
+{ | |
+ return 0; | |
+} | |
+#endif | |
+#endif /* __F2FS_ACL_H__ */ | |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c | |
new file mode 100644 | |
index 0000000..db6a633 | |
--- /dev/null | |
+++ b/fs/f2fs/checkpoint.c | |
@@ -0,0 +1,860 @@ | |
+/* | |
+ * fs/f2fs/checkpoint.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/fs.h> | |
+#include <linux/bio.h> | |
+#include <linux/mpage.h> | |
+#include <linux/writeback.h> | |
+#include <linux/blkdev.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/pagevec.h> | |
+#include <linux/swap.h> | |
+ | |
+#include "f2fs.h" | |
+#include "node.h" | |
+#include "segment.h" | |
+#include <trace/events/f2fs.h> | |
+ | |
+static struct kmem_cache *orphan_entry_slab; | |
+static struct kmem_cache *inode_entry_slab; | |
+ | |
+/* | |
+ * We guarantee no failure on the returned page. | |
+ */ | |
+struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) | |
+{ | |
+ struct address_space *mapping = sbi->meta_inode->i_mapping; | |
+ struct page *page = NULL; | |
+repeat: | |
+ page = grab_cache_page(mapping, index); | |
+ if (!page) { | |
+ cond_resched(); | |
+ goto repeat; | |
+ } | |
+ | |
+ /* We wait writeback only inside grab_meta_page() */ | |
+ wait_on_page_writeback(page); | |
+ SetPageUptodate(page); | |
+ return page; | |
+} | |
+ | |
+/* | |
+ * We guarantee no failure on the returned page. | |
+ */ | |
+struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) | |
+{ | |
+ struct address_space *mapping = sbi->meta_inode->i_mapping; | |
+ struct page *page; | |
+repeat: | |
+ page = grab_cache_page(mapping, index); | |
+ if (!page) { | |
+ cond_resched(); | |
+ goto repeat; | |
+ } | |
+ if (PageUptodate(page)) | |
+ goto out; | |
+ | |
+ if (f2fs_readpage(sbi, page, index, READ_SYNC)) | |
+ goto repeat; | |
+ | |
+ lock_page(page); | |
+ if (page->mapping != mapping) { | |
+ f2fs_put_page(page, 1); | |
+ goto repeat; | |
+ } | |
+out: | |
+ mark_page_accessed(page); | |
+ return page; | |
+} | |
+ | |
+static int f2fs_write_meta_page(struct page *page, | |
+ struct writeback_control *wbc) | |
+{ | |
+ struct inode *inode = page->mapping->host; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ | |
+ /* Should not write any meta pages, if any IO error was occurred */ | |
+ if (wbc->for_reclaim || sbi->por_doing || | |
+ is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { | |
+ dec_page_count(sbi, F2FS_DIRTY_META); | |
+ wbc->pages_skipped++; | |
+ set_page_dirty(page); | |
+ return AOP_WRITEPAGE_ACTIVATE; | |
+ } | |
+ | |
+ wait_on_page_writeback(page); | |
+ | |
+ write_meta_page(sbi, page); | |
+ dec_page_count(sbi, F2FS_DIRTY_META); | |
+ unlock_page(page); | |
+ return 0; | |
+} | |
+ | |
+static int f2fs_write_meta_pages(struct address_space *mapping, | |
+ struct writeback_control *wbc) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | |
+ struct block_device *bdev = sbi->sb->s_bdev; | |
+ long written; | |
+ | |
+ if (wbc->for_kupdate) | |
+ return 0; | |
+ | |
+ if (get_pages(sbi, F2FS_DIRTY_META) == 0) | |
+ return 0; | |
+ | |
+ /* if mounting is failed, skip writing node pages */ | |
+ mutex_lock(&sbi->cp_mutex); | |
+ written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); | |
+ mutex_unlock(&sbi->cp_mutex); | |
+ wbc->nr_to_write -= written; | |
+ return 0; | |
+} | |
+ | |
+long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, | |
+ long nr_to_write) | |
+{ | |
+ struct address_space *mapping = sbi->meta_inode->i_mapping; | |
+ pgoff_t index = 0, end = LONG_MAX; | |
+ struct pagevec pvec; | |
+ long nwritten = 0; | |
+ struct writeback_control wbc = { | |
+ .for_reclaim = 0, | |
+ }; | |
+ | |
+ pagevec_init(&pvec, 0); | |
+ | |
+ while (index <= end) { | |
+ int i, nr_pages; | |
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | |
+ PAGECACHE_TAG_DIRTY, | |
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | |
+ if (nr_pages == 0) | |
+ break; | |
+ | |
+ for (i = 0; i < nr_pages; i++) { | |
+ struct page *page = pvec.pages[i]; | |
+ lock_page(page); | |
+ BUG_ON(page->mapping != mapping); | |
+ BUG_ON(!PageDirty(page)); | |
+ clear_page_dirty_for_io(page); | |
+ if (f2fs_write_meta_page(page, &wbc)) { | |
+ unlock_page(page); | |
+ break; | |
+ } | |
+ if (nwritten++ >= nr_to_write) | |
+ break; | |
+ } | |
+ pagevec_release(&pvec); | |
+ cond_resched(); | |
+ } | |
+ | |
+ if (nwritten) | |
+ f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); | |
+ | |
+ return nwritten; | |
+} | |
+ | |
+static int f2fs_set_meta_page_dirty(struct page *page) | |
+{ | |
+ struct address_space *mapping = page->mapping; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | |
+ | |
+ SetPageUptodate(page); | |
+ if (!PageDirty(page)) { | |
+ __set_page_dirty_nobuffers(page); | |
+ inc_page_count(sbi, F2FS_DIRTY_META); | |
+ return 1; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+const struct address_space_operations f2fs_meta_aops = { | |
+ .writepage = f2fs_write_meta_page, | |
+ .writepages = f2fs_write_meta_pages, | |
+ .set_page_dirty = f2fs_set_meta_page_dirty, | |
+}; | |
+ | |
+int acquire_orphan_inode(struct f2fs_sb_info *sbi) | |
+{ | |
+ unsigned int max_orphans; | |
+ int err = 0; | |
+ | |
+ /* | |
+ * considering 512 blocks in a segment 5 blocks are needed for cp | |
+ * and log segment summaries. Remaining blocks are used to keep | |
+ * orphan entries with the limitation one reserved segment | |
+ * for cp pack we can have max 1020*507 orphan entries | |
+ */ | |
+ max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK; | |
+ mutex_lock(&sbi->orphan_inode_mutex); | |
+ if (sbi->n_orphans >= max_orphans) | |
+ err = -ENOSPC; | |
+ else | |
+ sbi->n_orphans++; | |
+ mutex_unlock(&sbi->orphan_inode_mutex); | |
+ return err; | |
+} | |
+ | |
+void release_orphan_inode(struct f2fs_sb_info *sbi) | |
+{ | |
+ mutex_lock(&sbi->orphan_inode_mutex); | |
+ if (sbi->n_orphans == 0) { | |
+ f2fs_msg(sbi->sb, KERN_ERR, "releasing " | |
+ "unacquired orphan inode"); | |
+ f2fs_handle_error(sbi); | |
+ } else | |
+ sbi->n_orphans--; | |
+ mutex_unlock(&sbi->orphan_inode_mutex); | |
+} | |
+ | |
+void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |
+{ | |
+ struct list_head *head, *this; | |
+ struct orphan_inode_entry *new = NULL, *orphan = NULL; | |
+ | |
+ mutex_lock(&sbi->orphan_inode_mutex); | |
+ head = &sbi->orphan_inode_list; | |
+ list_for_each(this, head) { | |
+ orphan = list_entry(this, struct orphan_inode_entry, list); | |
+ if (orphan->ino == ino) | |
+ goto out; | |
+ if (orphan->ino > ino) | |
+ break; | |
+ orphan = NULL; | |
+ } | |
+retry: | |
+ new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); | |
+ if (!new) { | |
+ cond_resched(); | |
+ goto retry; | |
+ } | |
+ new->ino = ino; | |
+ | |
+ /* add new_oentry into list which is sorted by inode number */ | |
+ if (orphan) | |
+ list_add(&new->list, this->prev); | |
+ else | |
+ list_add_tail(&new->list, head); | |
+out: | |
+ mutex_unlock(&sbi->orphan_inode_mutex); | |
+} | |
+ | |
+void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |
+{ | |
+ struct list_head *head; | |
+ struct orphan_inode_entry *orphan; | |
+ | |
+ mutex_lock(&sbi->orphan_inode_mutex); | |
+ head = &sbi->orphan_inode_list; | |
+ list_for_each_entry(orphan, head, list) { | |
+ if (orphan->ino == ino) { | |
+ list_del(&orphan->list); | |
+ kmem_cache_free(orphan_entry_slab, orphan); | |
+ if (sbi->n_orphans == 0) { | |
+ f2fs_msg(sbi->sb, KERN_ERR, "removing " | |
+ "unacquired orphan inode %d", | |
+ ino); | |
+ f2fs_handle_error(sbi); | |
+ } else | |
+ sbi->n_orphans--; | |
+ break; | |
+ } | |
+ } | |
+ mutex_unlock(&sbi->orphan_inode_mutex); | |
+} | |
+ | |
+static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |
+{ | |
+ struct inode *inode = f2fs_iget(sbi->sb, ino); | |
+ if (IS_ERR(inode)) { | |
+ f2fs_msg(sbi->sb, KERN_ERR, "unable to recover orphan inode %d", | |
+ ino); | |
+ f2fs_handle_error(sbi); | |
+ return; | |
+ } | |
+ clear_nlink(inode); | |
+ | |
+ /* truncate all the data during iput */ | |
+ iput(inode); | |
+} | |
+ | |
+int recover_orphan_inodes(struct f2fs_sb_info *sbi) | |
+{ | |
+ block_t start_blk, orphan_blkaddr, i, j; | |
+ | |
+ if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) | |
+ return 0; | |
+ | |
+ sbi->por_doing = 1; | |
+ start_blk = __start_cp_addr(sbi) + 1; | |
+ orphan_blkaddr = __start_sum_addr(sbi) - 1; | |
+ | |
+ for (i = 0; i < orphan_blkaddr; i++) { | |
+ struct page *page = get_meta_page(sbi, start_blk + i); | |
+ struct f2fs_orphan_block *orphan_blk; | |
+ | |
+ orphan_blk = (struct f2fs_orphan_block *)page_address(page); | |
+ for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { | |
+ nid_t ino = le32_to_cpu(orphan_blk->ino[j]); | |
+ recover_orphan_inode(sbi, ino); | |
+ } | |
+ f2fs_put_page(page, 1); | |
+ } | |
+ /* clear Orphan Flag */ | |
+ clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); | |
+ sbi->por_doing = 0; | |
+ return 0; | |
+} | |
+ | |
+static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) | |
+{ | |
+ struct list_head *head, *this, *next; | |
+ struct f2fs_orphan_block *orphan_blk = NULL; | |
+ struct page *page = NULL; | |
+ unsigned int nentries = 0; | |
+ unsigned short index = 1; | |
+ unsigned short orphan_blocks; | |
+ | |
+ orphan_blocks = (unsigned short)((sbi->n_orphans + | |
+ (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); | |
+ | |
+ mutex_lock(&sbi->orphan_inode_mutex); | |
+ head = &sbi->orphan_inode_list; | |
+ | |
+ /* loop for each orphan inode entry and write them in Jornal block */ | |
+ list_for_each_safe(this, next, head) { | |
+ struct orphan_inode_entry *orphan; | |
+ | |
+ orphan = list_entry(this, struct orphan_inode_entry, list); | |
+ | |
+ if (nentries == F2FS_ORPHANS_PER_BLOCK) { | |
+ /* | |
+ * an orphan block is full of 1020 entries, | |
+ * then we need to flush current orphan blocks | |
+ * and bring another one in memory | |
+ */ | |
+ orphan_blk->blk_addr = cpu_to_le16(index); | |
+ orphan_blk->blk_count = cpu_to_le16(orphan_blocks); | |
+ orphan_blk->entry_count = cpu_to_le32(nentries); | |
+ set_page_dirty(page); | |
+ f2fs_put_page(page, 1); | |
+ index++; | |
+ start_blk++; | |
+ nentries = 0; | |
+ page = NULL; | |
+ } | |
+ if (page) | |
+ goto page_exist; | |
+ | |
+ page = grab_meta_page(sbi, start_blk); | |
+ orphan_blk = (struct f2fs_orphan_block *)page_address(page); | |
+ memset(orphan_blk, 0, sizeof(*orphan_blk)); | |
+page_exist: | |
+ orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); | |
+ } | |
+ if (!page) | |
+ goto end; | |
+ | |
+ orphan_blk->blk_addr = cpu_to_le16(index); | |
+ orphan_blk->blk_count = cpu_to_le16(orphan_blocks); | |
+ orphan_blk->entry_count = cpu_to_le32(nentries); | |
+ set_page_dirty(page); | |
+ f2fs_put_page(page, 1); | |
+end: | |
+ mutex_unlock(&sbi->orphan_inode_mutex); | |
+} | |
+ | |
+static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, | |
+ block_t cp_addr, unsigned long long *version) | |
+{ | |
+ struct page *cp_page_1, *cp_page_2 = NULL; | |
+ unsigned long blk_size = sbi->blocksize; | |
+ struct f2fs_checkpoint *cp_block; | |
+ unsigned long long cur_version = 0, pre_version = 0; | |
+ size_t crc_offset; | |
+ __u32 crc = 0; | |
+ | |
+ /* Read the 1st cp block in this CP pack */ | |
+ cp_page_1 = get_meta_page(sbi, cp_addr); | |
+ | |
+ /* get the version number */ | |
+ cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1); | |
+ crc_offset = le32_to_cpu(cp_block->checksum_offset); | |
+ if (crc_offset >= blk_size) | |
+ goto invalid_cp1; | |
+ | |
+ crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); | |
+ if (!f2fs_crc_valid(crc, cp_block, crc_offset)) | |
+ goto invalid_cp1; | |
+ | |
+ pre_version = cur_cp_version(cp_block); | |
+ | |
+ /* Read the 2nd cp block in this CP pack */ | |
+ cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; | |
+ cp_page_2 = get_meta_page(sbi, cp_addr); | |
+ | |
+ cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); | |
+ crc_offset = le32_to_cpu(cp_block->checksum_offset); | |
+ if (crc_offset >= blk_size) | |
+ goto invalid_cp2; | |
+ | |
+ crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); | |
+ if (!f2fs_crc_valid(crc, cp_block, crc_offset)) | |
+ goto invalid_cp2; | |
+ | |
+ cur_version = cur_cp_version(cp_block); | |
+ | |
+ if (cur_version == pre_version) { | |
+ *version = cur_version; | |
+ f2fs_put_page(cp_page_2, 1); | |
+ return cp_page_1; | |
+ } | |
+invalid_cp2: | |
+ f2fs_put_page(cp_page_2, 1); | |
+invalid_cp1: | |
+ f2fs_put_page(cp_page_1, 1); | |
+ return NULL; | |
+} | |
+ | |
+int get_valid_checkpoint(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_checkpoint *cp_block; | |
+ struct f2fs_super_block *fsb = sbi->raw_super; | |
+ struct page *cp1, *cp2, *cur_page; | |
+ unsigned long blk_size = sbi->blocksize; | |
+ unsigned long long cp1_version = 0, cp2_version = 0; | |
+ unsigned long long cp_start_blk_no; | |
+ | |
+ sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); | |
+ if (!sbi->ckpt) | |
+ return -ENOMEM; | |
+ /* | |
+ * Finding out valid cp block involves read both | |
+ * sets( cp pack1 and cp pack 2) | |
+ */ | |
+ cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); | |
+ cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); | |
+ | |
+ /* The second checkpoint pack should start at the next segment */ | |
+ cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); | |
+ cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); | |
+ | |
+ if (cp1 && cp2) { | |
+ if (ver_after(cp2_version, cp1_version)) | |
+ cur_page = cp2; | |
+ else | |
+ cur_page = cp1; | |
+ } else if (cp1) { | |
+ cur_page = cp1; | |
+ } else if (cp2) { | |
+ cur_page = cp2; | |
+ } else { | |
+ goto fail_no_cp; | |
+ } | |
+ | |
+ cp_block = (struct f2fs_checkpoint *)page_address(cur_page); | |
+ memcpy(sbi->ckpt, cp_block, blk_size); | |
+ | |
+ f2fs_put_page(cp1, 1); | |
+ f2fs_put_page(cp2, 1); | |
+ return 0; | |
+ | |
+fail_no_cp: | |
+ kfree(sbi->ckpt); | |
+ return -EINVAL; | |
+} | |
+ | |
+static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct list_head *head = &sbi->dir_inode_list; | |
+ struct list_head *this; | |
+ | |
+ list_for_each(this, head) { | |
+ struct dir_inode_entry *entry; | |
+ entry = list_entry(this, struct dir_inode_entry, list); | |
+ if (entry->inode == inode) | |
+ return -EEXIST; | |
+ } | |
+ list_add_tail(&new->list, head); | |
+#ifdef CONFIG_F2FS_STAT_FS | |
+ sbi->n_dirty_dirs++; | |
+#endif | |
+ return 0; | |
+} | |
+ | |
+void set_dirty_dir_page(struct inode *inode, struct page *page) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct dir_inode_entry *new; | |
+ | |
+ if (!S_ISDIR(inode->i_mode)) | |
+ return; | |
+retry: | |
+ new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); | |
+ if (!new) { | |
+ cond_resched(); | |
+ goto retry; | |
+ } | |
+ new->inode = inode; | |
+ INIT_LIST_HEAD(&new->list); | |
+ | |
+ spin_lock(&sbi->dir_inode_lock); | |
+ if (__add_dirty_inode(inode, new)) | |
+ kmem_cache_free(inode_entry_slab, new); | |
+ | |
+ inc_page_count(sbi, F2FS_DIRTY_DENTS); | |
+ inode_inc_dirty_dents(inode); | |
+ SetPagePrivate(page); | |
+ spin_unlock(&sbi->dir_inode_lock); | |
+} | |
+ | |
+void add_dirty_dir_inode(struct inode *inode) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct dir_inode_entry *new; | |
+retry: | |
+ new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); | |
+ if (!new) { | |
+ cond_resched(); | |
+ goto retry; | |
+ } | |
+ new->inode = inode; | |
+ INIT_LIST_HEAD(&new->list); | |
+ | |
+ spin_lock(&sbi->dir_inode_lock); | |
+ if (__add_dirty_inode(inode, new)) | |
+ kmem_cache_free(inode_entry_slab, new); | |
+ spin_unlock(&sbi->dir_inode_lock); | |
+} | |
+ | |
+void remove_dirty_dir_inode(struct inode *inode) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct list_head *head = &sbi->dir_inode_list; | |
+ struct list_head *this; | |
+ | |
+ if (!S_ISDIR(inode->i_mode)) | |
+ return; | |
+ | |
+ spin_lock(&sbi->dir_inode_lock); | |
+ if (atomic_read(&F2FS_I(inode)->dirty_dents)) { | |
+ spin_unlock(&sbi->dir_inode_lock); | |
+ return; | |
+ } | |
+ | |
+ list_for_each(this, head) { | |
+ struct dir_inode_entry *entry; | |
+ entry = list_entry(this, struct dir_inode_entry, list); | |
+ if (entry->inode == inode) { | |
+ list_del(&entry->list); | |
+ kmem_cache_free(inode_entry_slab, entry); | |
+#ifdef CONFIG_F2FS_STAT_FS | |
+ sbi->n_dirty_dirs--; | |
+#endif | |
+ break; | |
+ } | |
+ } | |
+ spin_unlock(&sbi->dir_inode_lock); | |
+ | |
+ /* Only from the recovery routine */ | |
+ if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { | |
+ clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); | |
+ iput(inode); | |
+ } | |
+} | |
+ | |
+struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) | |
+{ | |
+ struct list_head *head = &sbi->dir_inode_list; | |
+ struct list_head *this; | |
+ struct inode *inode = NULL; | |
+ | |
+ spin_lock(&sbi->dir_inode_lock); | |
+ list_for_each(this, head) { | |
+ struct dir_inode_entry *entry; | |
+ entry = list_entry(this, struct dir_inode_entry, list); | |
+ if (entry->inode->i_ino == ino) { | |
+ inode = entry->inode; | |
+ break; | |
+ } | |
+ } | |
+ spin_unlock(&sbi->dir_inode_lock); | |
+ return inode; | |
+} | |
+ | |
+void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct list_head *head = &sbi->dir_inode_list; | |
+ struct dir_inode_entry *entry; | |
+ struct inode *inode; | |
+retry: | |
+ spin_lock(&sbi->dir_inode_lock); | |
+ if (list_empty(head)) { | |
+ spin_unlock(&sbi->dir_inode_lock); | |
+ return; | |
+ } | |
+ entry = list_entry(head->next, struct dir_inode_entry, list); | |
+ inode = igrab(entry->inode); | |
+ spin_unlock(&sbi->dir_inode_lock); | |
+ if (inode) { | |
+ filemap_flush(inode->i_mapping); | |
+ iput(inode); | |
+ } else { | |
+ /* | |
+ * We should submit bio, since it exists several | |
+ * wribacking dentry pages in the freeing inode. | |
+ */ | |
+ f2fs_submit_bio(sbi, DATA, true); | |
+ } | |
+ goto retry; | |
+} | |
+ | |
+/* | |
+ * Freeze all the FS-operations for checkpoint. | |
+ */ | |
+static void block_operations(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct writeback_control wbc = { | |
+ .sync_mode = WB_SYNC_ALL, | |
+ .nr_to_write = LONG_MAX, | |
+ .for_reclaim = 0, | |
+ }; | |
+ struct blk_plug plug; | |
+ | |
+ blk_start_plug(&plug); | |
+ | |
+retry_flush_dents: | |
+ mutex_lock_all(sbi); | |
+ | |
+ /* write all the dirty dentry pages */ | |
+ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { | |
+ mutex_unlock_all(sbi); | |
+ sync_dirty_dir_inodes(sbi); | |
+ goto retry_flush_dents; | |
+ } | |
+ | |
+ /* | |
+ * POR: we should ensure that there is no dirty node pages | |
+ * until finishing nat/sit flush. | |
+ */ | |
+retry_flush_nodes: | |
+ mutex_lock(&sbi->node_write); | |
+ | |
+ if (get_pages(sbi, F2FS_DIRTY_NODES)) { | |
+ mutex_unlock(&sbi->node_write); | |
+ sync_node_pages(sbi, 0, &wbc); | |
+ goto retry_flush_nodes; | |
+ } | |
+ blk_finish_plug(&plug); | |
+} | |
+ | |
+static void unblock_operations(struct f2fs_sb_info *sbi) | |
+{ | |
+ mutex_unlock(&sbi->node_write); | |
+ mutex_unlock_all(sbi); | |
+} | |
+ | |
+static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |
+{ | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ nid_t last_nid = 0; | |
+ block_t start_blk; | |
+ struct page *cp_page; | |
+ unsigned int data_sum_blocks, orphan_blocks; | |
+ __u32 crc32 = 0; | |
+ void *kaddr; | |
+ int i; | |
+ | |
+ /* Flush all the NAT/SIT pages */ | |
+ while (get_pages(sbi, F2FS_DIRTY_META)) | |
+ sync_meta_pages(sbi, META, LONG_MAX); | |
+ | |
+ next_free_nid(sbi, &last_nid); | |
+ | |
+ /* | |
+ * modify checkpoint | |
+ * version number is already updated | |
+ */ | |
+ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); | |
+ ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); | |
+ ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); | |
+ for (i = 0; i < 3; i++) { | |
+ ckpt->cur_node_segno[i] = | |
+ cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); | |
+ ckpt->cur_node_blkoff[i] = | |
+ cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); | |
+ ckpt->alloc_type[i + CURSEG_HOT_NODE] = | |
+ curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); | |
+ } | |
+ for (i = 0; i < 3; i++) { | |
+ ckpt->cur_data_segno[i] = | |
+ cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); | |
+ ckpt->cur_data_blkoff[i] = | |
+ cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); | |
+ ckpt->alloc_type[i + CURSEG_HOT_DATA] = | |
+ curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); | |
+ } | |
+ | |
+ ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); | |
+ ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); | |
+ ckpt->next_free_nid = cpu_to_le32(last_nid); | |
+ | |
+ /* 2 cp + n data seg summary + orphan inode blocks */ | |
+ data_sum_blocks = npages_for_summary_flush(sbi); | |
+ if (data_sum_blocks < 3) | |
+ set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); | |
+ else | |
+ clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); | |
+ | |
+ orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) | |
+ / F2FS_ORPHANS_PER_BLOCK; | |
+ ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); | |
+ | |
+ if (is_umount) { | |
+ set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); | |
+ ckpt->cp_pack_total_block_count = cpu_to_le32(2 + | |
+ data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); | |
+ } else { | |
+ clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); | |
+ ckpt->cp_pack_total_block_count = cpu_to_le32(2 + | |
+ data_sum_blocks + orphan_blocks); | |
+ } | |
+ | |
+ if (sbi->n_orphans) | |
+ set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); | |
+ else | |
+ clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); | |
+ | |
+ /* update SIT/NAT bitmap */ | |
+ get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); | |
+ get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); | |
+ | |
+ crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); | |
+ *((__le32 *)((unsigned char *)ckpt + | |
+ le32_to_cpu(ckpt->checksum_offset))) | |
+ = cpu_to_le32(crc32); | |
+ | |
+ start_blk = __start_cp_addr(sbi); | |
+ | |
+ /* write out checkpoint buffer at block 0 */ | |
+ cp_page = grab_meta_page(sbi, start_blk++); | |
+ kaddr = page_address(cp_page); | |
+ memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); | |
+ set_page_dirty(cp_page); | |
+ f2fs_put_page(cp_page, 1); | |
+ | |
+ if (sbi->n_orphans) { | |
+ write_orphan_inodes(sbi, start_blk); | |
+ start_blk += orphan_blocks; | |
+ } | |
+ | |
+ write_data_summaries(sbi, start_blk); | |
+ start_blk += data_sum_blocks; | |
+ if (is_umount) { | |
+ write_node_summaries(sbi, start_blk); | |
+ start_blk += NR_CURSEG_NODE_TYPE; | |
+ } | |
+ | |
+ /* writeout checkpoint block */ | |
+ cp_page = grab_meta_page(sbi, start_blk); | |
+ kaddr = page_address(cp_page); | |
+ memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); | |
+ set_page_dirty(cp_page); | |
+ f2fs_put_page(cp_page, 1); | |
+ | |
+ /* wait for previous submitted node/meta pages writeback */ | |
+ while (get_pages(sbi, F2FS_WRITEBACK)) | |
+ congestion_wait(BLK_RW_ASYNC, HZ / 50); | |
+ | |
+ filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); | |
+ filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); | |
+ | |
+ /* update user_block_counts */ | |
+ sbi->last_valid_block_count = sbi->total_valid_block_count; | |
+ sbi->alloc_valid_block_count = 0; | |
+ | |
+ /* Here, we only have one bio having CP pack */ | |
+ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); | |
+ | |
+ if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { | |
+ clear_prefree_segments(sbi); | |
+ F2FS_RESET_SB_DIRT(sbi); | |
+ } | |
+} | |
+ | |
+/* | |
+ * We guarantee that this checkpoint procedure should not fail. | |
+ */ | |
+void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |
+{ | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ unsigned long long ckpt_ver; | |
+ | |
+ trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); | |
+ | |
+ mutex_lock(&sbi->cp_mutex); | |
+ block_operations(sbi); | |
+ | |
+ trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); | |
+ | |
+ f2fs_submit_bio(sbi, DATA, true); | |
+ f2fs_submit_bio(sbi, NODE, true); | |
+ f2fs_submit_bio(sbi, META, true); | |
+ | |
+ /* | |
+ * update checkpoint pack index | |
+ * Increase the version number so that | |
+ * SIT entries and seg summaries are written at correct place | |
+ */ | |
+ ckpt_ver = cur_cp_version(ckpt); | |
+ ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); | |
+ | |
+ /* write cached NAT/SIT entries to NAT/SIT area */ | |
+ flush_nat_entries(sbi); | |
+ flush_sit_entries(sbi); | |
+ | |
+ /* unlock all the fs_lock[] in do_checkpoint() */ | |
+ do_checkpoint(sbi, is_umount); | |
+ | |
+ unblock_operations(sbi); | |
+ mutex_unlock(&sbi->cp_mutex); | |
+ | |
+ trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); | |
+} | |
+ | |
+void init_orphan_info(struct f2fs_sb_info *sbi) | |
+{ | |
+ mutex_init(&sbi->orphan_inode_mutex); | |
+ INIT_LIST_HEAD(&sbi->orphan_inode_list); | |
+ sbi->n_orphans = 0; | |
+} | |
+ | |
+int __init create_checkpoint_caches(void) | |
+{ | |
+ orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", | |
+ sizeof(struct orphan_inode_entry), NULL); | |
+ if (unlikely(!orphan_entry_slab)) | |
+ return -ENOMEM; | |
+ inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", | |
+ sizeof(struct dir_inode_entry), NULL); | |
+ if (unlikely(!inode_entry_slab)) { | |
+ kmem_cache_destroy(orphan_entry_slab); | |
+ return -ENOMEM; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+void destroy_checkpoint_caches(void) | |
+{ | |
+ kmem_cache_destroy(orphan_entry_slab); | |
+ kmem_cache_destroy(inode_entry_slab); | |
+} | |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c | |
new file mode 100644 | |
index 0000000..550adc3 | |
--- /dev/null | |
+++ b/fs/f2fs/data.c | |
@@ -0,0 +1,790 @@ | |
+/* | |
+ * fs/f2fs/data.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/fs.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/buffer_head.h> | |
+#include <linux/mpage.h> | |
+#include <linux/writeback.h> | |
+#include <linux/backing-dev.h> | |
+#include <linux/blkdev.h> | |
+#include <linux/bio.h> | |
+#include <linux/prefetch.h> | |
+ | |
+#include "f2fs.h" | |
+#include "node.h" | |
+#include "segment.h" | |
+#include <trace/events/f2fs.h> | |
+ | |
+/* | |
+ * Lock ordering for the change of data block address: | |
+ * ->data_page | |
+ * ->node_page | |
+ * update block addresses in the node page | |
+ */ | |
+static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) | |
+{ | |
+ struct f2fs_node *rn; | |
+ __le32 *addr_array; | |
+ struct page *node_page = dn->node_page; | |
+ unsigned int ofs_in_node = dn->ofs_in_node; | |
+ | |
+ f2fs_wait_on_page_writeback(node_page, NODE, false); | |
+ | |
+ rn = F2FS_NODE(node_page); | |
+ | |
+ /* Get physical address of data block */ | |
+ addr_array = blkaddr_in_node(rn); | |
+ addr_array[ofs_in_node] = cpu_to_le32(new_addr); | |
+ set_page_dirty(node_page); | |
+} | |
+ | |
+int reserve_new_block(struct dnode_of_data *dn) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | |
+ | |
+ if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) | |
+ return -EPERM; | |
+ if (!inc_valid_block_count(sbi, dn->inode, 1)) | |
+ return -ENOSPC; | |
+ | |
+ trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); | |
+ | |
+ __set_data_blkaddr(dn, NEW_ADDR); | |
+ dn->data_blkaddr = NEW_ADDR; | |
+ sync_inode_page(dn); | |
+ return 0; | |
+} | |
+ | |
+static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | |
+ struct buffer_head *bh_result) | |
+{ | |
+ struct f2fs_inode_info *fi = F2FS_I(inode); | |
+#ifdef CONFIG_F2FS_STAT_FS | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+#endif | |
+ pgoff_t start_fofs, end_fofs; | |
+ block_t start_blkaddr; | |
+ | |
+ read_lock(&fi->ext.ext_lock); | |
+ if (fi->ext.len == 0) { | |
+ read_unlock(&fi->ext.ext_lock); | |
+ return 0; | |
+ } | |
+ | |
+#ifdef CONFIG_F2FS_STAT_FS | |
+ sbi->total_hit_ext++; | |
+#endif | |
+ start_fofs = fi->ext.fofs; | |
+ end_fofs = fi->ext.fofs + fi->ext.len - 1; | |
+ start_blkaddr = fi->ext.blk_addr; | |
+ | |
+ if (pgofs >= start_fofs && pgofs <= end_fofs) { | |
+ unsigned int blkbits = inode->i_sb->s_blocksize_bits; | |
+ size_t count; | |
+ | |
+ clear_buffer_new(bh_result); | |
+ map_bh(bh_result, inode->i_sb, | |
+ start_blkaddr + pgofs - start_fofs); | |
+ count = end_fofs - pgofs + 1; | |
+ if (count < (UINT_MAX >> blkbits)) | |
+ bh_result->b_size = (count << blkbits); | |
+ else | |
+ bh_result->b_size = UINT_MAX; | |
+ | |
+#ifdef CONFIG_F2FS_STAT_FS | |
+ sbi->read_hit_ext++; | |
+#endif | |
+ read_unlock(&fi->ext.ext_lock); | |
+ return 1; | |
+ } | |
+ read_unlock(&fi->ext.ext_lock); | |
+ return 0; | |
+} | |
+ | |
+void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) | |
+{ | |
+ struct f2fs_inode_info *fi = F2FS_I(dn->inode); | |
+ pgoff_t fofs, start_fofs, end_fofs; | |
+ block_t start_blkaddr, end_blkaddr; | |
+ | |
+ BUG_ON(blk_addr == NEW_ADDR); | |
+ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | |
+ dn->ofs_in_node; | |
+ | |
+ /* Update the page address in the parent node */ | |
+ __set_data_blkaddr(dn, blk_addr); | |
+ | |
+ write_lock(&fi->ext.ext_lock); | |
+ | |
+ start_fofs = fi->ext.fofs; | |
+ end_fofs = fi->ext.fofs + fi->ext.len - 1; | |
+ start_blkaddr = fi->ext.blk_addr; | |
+ end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; | |
+ | |
+ /* Drop and initialize the matched extent */ | |
+ if (fi->ext.len == 1 && fofs == start_fofs) | |
+ fi->ext.len = 0; | |
+ | |
+ /* Initial extent */ | |
+ if (fi->ext.len == 0) { | |
+ if (blk_addr != NULL_ADDR) { | |
+ fi->ext.fofs = fofs; | |
+ fi->ext.blk_addr = blk_addr; | |
+ fi->ext.len = 1; | |
+ } | |
+ goto end_update; | |
+ } | |
+ | |
+ /* Front merge */ | |
+ if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { | |
+ fi->ext.fofs--; | |
+ fi->ext.blk_addr--; | |
+ fi->ext.len++; | |
+ goto end_update; | |
+ } | |
+ | |
+ /* Back merge */ | |
+ if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { | |
+ fi->ext.len++; | |
+ goto end_update; | |
+ } | |
+ | |
+ /* Split the existing extent */ | |
+ if (fi->ext.len > 1 && | |
+ fofs >= start_fofs && fofs <= end_fofs) { | |
+ if ((end_fofs - fofs) < (fi->ext.len >> 1)) { | |
+ fi->ext.len = fofs - start_fofs; | |
+ } else { | |
+ fi->ext.fofs = fofs + 1; | |
+ fi->ext.blk_addr = start_blkaddr + | |
+ fofs - start_fofs + 1; | |
+ fi->ext.len -= fofs - start_fofs + 1; | |
+ } | |
+ goto end_update; | |
+ } | |
+ write_unlock(&fi->ext.ext_lock); | |
+ return; | |
+ | |
+end_update: | |
+ write_unlock(&fi->ext.ext_lock); | |
+ sync_inode_page(dn); | |
+} | |
+ | |
+struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct address_space *mapping = inode->i_mapping; | |
+ struct dnode_of_data dn; | |
+ struct page *page; | |
+ int err; | |
+ | |
+ page = find_get_page(mapping, index); | |
+ if (page && PageUptodate(page)) | |
+ return page; | |
+ f2fs_put_page(page, 0); | |
+ | |
+ set_new_dnode(&dn, inode, NULL, NULL, 0); | |
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE); | |
+ if (err) | |
+ return ERR_PTR(err); | |
+ f2fs_put_dnode(&dn); | |
+ | |
+ if (dn.data_blkaddr == NULL_ADDR) | |
+ return ERR_PTR(-ENOENT); | |
+ | |
+ /* By fallocate(), there is no cached page, but with NEW_ADDR */ | |
+ if (dn.data_blkaddr == NEW_ADDR) | |
+ return ERR_PTR(-EINVAL); | |
+ | |
+ page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); | |
+ if (!page) | |
+ return ERR_PTR(-ENOMEM); | |
+ | |
+ if (PageUptodate(page)) { | |
+ unlock_page(page); | |
+ return page; | |
+ } | |
+ | |
+ err = f2fs_readpage(sbi, page, dn.data_blkaddr, | |
+ sync ? READ_SYNC : READA); | |
+ if (sync) { | |
+ wait_on_page_locked(page); | |
+ if (!PageUptodate(page)) { | |
+ f2fs_put_page(page, 0); | |
+ return ERR_PTR(-EIO); | |
+ } | |
+ } | |
+ return page; | |
+} | |
+ | |
+/* | |
+ * If it tries to access a hole, return an error. | |
+ * Because, the callers, functions in dir.c and GC, should be able to know | |
+ * whether this page exists or not. | |
+ */ | |
+struct page *get_lock_data_page(struct inode *inode, pgoff_t index) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct address_space *mapping = inode->i_mapping; | |
+ struct dnode_of_data dn; | |
+ struct page *page; | |
+ int err; | |
+ | |
+repeat: | |
+ page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); | |
+ if (!page) | |
+ return ERR_PTR(-ENOMEM); | |
+ | |
+ set_new_dnode(&dn, inode, NULL, NULL, 0); | |
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE); | |
+ if (err) { | |
+ f2fs_put_page(page, 1); | |
+ return ERR_PTR(err); | |
+ } | |
+ f2fs_put_dnode(&dn); | |
+ | |
+ if (dn.data_blkaddr == NULL_ADDR) { | |
+ f2fs_put_page(page, 1); | |
+ return ERR_PTR(-ENOENT); | |
+ } | |
+ | |
+ if (PageUptodate(page)) | |
+ return page; | |
+ | |
+ /* | |
+ * A new dentry page is allocated but not able to be written, since its | |
+ * new inode page couldn't be allocated due to -ENOSPC. | |
+ * In such the case, its blkaddr can be remained as NEW_ADDR. | |
+ * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. | |
+ */ | |
+ if (dn.data_blkaddr == NEW_ADDR) { | |
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE); | |
+ SetPageUptodate(page); | |
+ return page; | |
+ } | |
+ | |
+ err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); | |
+ if (err) | |
+ return ERR_PTR(err); | |
+ | |
+ lock_page(page); | |
+ if (!PageUptodate(page)) { | |
+ f2fs_put_page(page, 1); | |
+ return ERR_PTR(-EIO); | |
+ } | |
+ if (page->mapping != mapping) { | |
+ f2fs_put_page(page, 1); | |
+ goto repeat; | |
+ } | |
+ return page; | |
+} | |
+ | |
+/* | |
+ * Caller ensures that this data page is never allocated. | |
+ * A new zero-filled data page is allocated in the page cache. | |
+ * | |
+ * Also, caller should grab and release a mutex by calling mutex_lock_op() and | |
+ * mutex_unlock_op(). | |
+ * Note that, npage is set only by make_empty_dir. | |
+ */ | |
+struct page *get_new_data_page(struct inode *inode, | |
+ struct page *npage, pgoff_t index, bool new_i_size) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct address_space *mapping = inode->i_mapping; | |
+ struct page *page; | |
+ struct dnode_of_data dn; | |
+ int err; | |
+ | |
+ set_new_dnode(&dn, inode, npage, npage, 0); | |
+ err = get_dnode_of_data(&dn, index, ALLOC_NODE); | |
+ if (err) | |
+ return ERR_PTR(err); | |
+ | |
+ if (dn.data_blkaddr == NULL_ADDR) { | |
+ if (reserve_new_block(&dn)) { | |
+ if (!npage) | |
+ f2fs_put_dnode(&dn); | |
+ return ERR_PTR(-ENOSPC); | |
+ } | |
+ } | |
+ if (!npage) | |
+ f2fs_put_dnode(&dn); | |
+repeat: | |
+ page = grab_cache_page(mapping, index); | |
+ if (!page) | |
+ return ERR_PTR(-ENOMEM); | |
+ | |
+ if (PageUptodate(page)) | |
+ return page; | |
+ | |
+ if (dn.data_blkaddr == NEW_ADDR) { | |
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE); | |
+ SetPageUptodate(page); | |
+ } else { | |
+ err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); | |
+ if (err) | |
+ return ERR_PTR(err); | |
+ lock_page(page); | |
+ if (!PageUptodate(page)) { | |
+ f2fs_put_page(page, 1); | |
+ return ERR_PTR(-EIO); | |
+ } | |
+ if (page->mapping != mapping) { | |
+ f2fs_put_page(page, 1); | |
+ goto repeat; | |
+ } | |
+ } | |
+ | |
+ if (new_i_size && | |
+ i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { | |
+ i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); | |
+ /* Only the directory inode sets new_i_size */ | |
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); | |
+ mark_inode_dirty_sync(inode); | |
+ } | |
+ return page; | |
+} | |
+ | |
+static void read_end_io(struct bio *bio, int err) | |
+{ | |
+ const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | |
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | |
+ | |
+ do { | |
+ struct page *page = bvec->bv_page; | |
+ | |
+ if (--bvec >= bio->bi_io_vec) | |
+ prefetchw(&bvec->bv_page->flags); | |
+ | |
+ if (uptodate) { | |
+ SetPageUptodate(page); | |
+ } else { | |
+ ClearPageUptodate(page); | |
+ SetPageError(page); | |
+ } | |
+ unlock_page(page); | |
+ } while (bvec >= bio->bi_io_vec); | |
+ bio_put(bio); | |
+} | |
+ | |
+/* | |
+ * Fill the locked page with data located in the block address. | |
+ * Return unlocked page. | |
+ */ | |
+int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, | |
+ block_t blk_addr, int type) | |
+{ | |
+ struct block_device *bdev = sbi->sb->s_bdev; | |
+ struct bio *bio; | |
+ | |
+ trace_f2fs_readpage(page, blk_addr, type); | |
+ | |
+ down_read(&sbi->bio_sem); | |
+ | |
+ /* Allocate a new bio */ | |
+ bio = f2fs_bio_alloc(bdev, 1); | |
+ | |
+ /* Initialize the bio */ | |
+ bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); | |
+ bio->bi_end_io = read_end_io; | |
+ | |
+ if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { | |
+ bio_put(bio); | |
+ up_read(&sbi->bio_sem); | |
+ f2fs_put_page(page, 1); | |
+ return -EFAULT; | |
+ } | |
+ | |
+ submit_bio(type, bio); | |
+ up_read(&sbi->bio_sem); | |
+ return 0; | |
+} | |
+ | |
+/* | |
+ * This function should be used by the data read flow only where it | |
+ * does not check the "create" flag that indicates block allocation. | |
+ * The reason for this special functionality is to exploit VFS readahead | |
+ * mechanism. | |
+ */ | |
+static int get_data_block_ro(struct inode *inode, sector_t iblock, | |
+ struct buffer_head *bh_result, int create) | |
+{ | |
+ unsigned int blkbits = inode->i_sb->s_blocksize_bits; | |
+ unsigned maxblocks = bh_result->b_size >> blkbits; | |
+ struct dnode_of_data dn; | |
+ pgoff_t pgofs; | |
+ int err; | |
+ | |
+ /* Get the page offset from the block offset(iblock) */ | |
+ pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); | |
+ | |
+ if (check_extent_cache(inode, pgofs, bh_result)) { | |
+ trace_f2fs_get_data_block(inode, iblock, bh_result, 0); | |
+ return 0; | |
+ } | |
+ | |
+ /* When reading holes, we need its node page */ | |
+ set_new_dnode(&dn, inode, NULL, NULL, 0); | |
+ err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); | |
+ if (err) { | |
+ trace_f2fs_get_data_block(inode, iblock, bh_result, err); | |
+ return (err == -ENOENT) ? 0 : err; | |
+ } | |
+ | |
+ /* It does not support data allocation */ | |
+ BUG_ON(create); | |
+ | |
+ if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { | |
+ int i; | |
+ unsigned int end_offset; | |
+ | |
+ end_offset = IS_INODE(dn.node_page) ? | |
+ ADDRS_PER_INODE(F2FS_I(inode)) : | |
+ ADDRS_PER_BLOCK; | |
+ | |
+ clear_buffer_new(bh_result); | |
+ | |
+ /* Give more consecutive addresses for the read ahead */ | |
+ for (i = 0; i < end_offset - dn.ofs_in_node; i++) | |
+ if (((datablock_addr(dn.node_page, | |
+ dn.ofs_in_node + i)) | |
+ != (dn.data_blkaddr + i)) || maxblocks == i) | |
+ break; | |
+ map_bh(bh_result, inode->i_sb, dn.data_blkaddr); | |
+ bh_result->b_size = (i << blkbits); | |
+ } | |
+ f2fs_put_dnode(&dn); | |
+ trace_f2fs_get_data_block(inode, iblock, bh_result, 0); | |
+ return 0; | |
+} | |
+ | |
+static int f2fs_read_data_page(struct file *file, struct page *page) | |
+{ | |
+ return mpage_readpage(page, get_data_block_ro); | |
+} | |
+ | |
+static int f2fs_read_data_pages(struct file *file, | |
+ struct address_space *mapping, | |
+ struct list_head *pages, unsigned nr_pages) | |
+{ | |
+ return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); | |
+} | |
+ | |
+int do_write_data_page(struct page *page) | |
+{ | |
+ struct inode *inode = page->mapping->host; | |
+ block_t old_blk_addr, new_blk_addr; | |
+ struct dnode_of_data dn; | |
+ int err = 0; | |
+ | |
+ set_new_dnode(&dn, inode, NULL, NULL, 0); | |
+ err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); | |
+ if (err) | |
+ return err; | |
+ | |
+ old_blk_addr = dn.data_blkaddr; | |
+ | |
+ /* This page is already truncated */ | |
+ if (old_blk_addr == NULL_ADDR) | |
+ goto out_writepage; | |
+ | |
+ set_page_writeback(page); | |
+ | |
+ /* | |
+ * If current allocation needs SSR, | |
+ * it had better in-place writes for updated data. | |
+ */ | |
+ if (unlikely(old_blk_addr != NEW_ADDR && | |
+ !is_cold_data(page) && | |
+ need_inplace_update(inode))) { | |
+ rewrite_data_page(F2FS_SB(inode->i_sb), page, | |
+ old_blk_addr); | |
+ } else { | |
+ write_data_page(inode, page, &dn, | |
+ old_blk_addr, &new_blk_addr); | |
+ update_extent_cache(new_blk_addr, &dn); | |
+ } | |
+out_writepage: | |
+ f2fs_put_dnode(&dn); | |
+ return err; | |
+} | |
+ | |
+static int f2fs_write_data_page(struct page *page, | |
+ struct writeback_control *wbc) | |
+{ | |
+ struct inode *inode = page->mapping->host; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ loff_t i_size = i_size_read(inode); | |
+ const pgoff_t end_index = ((unsigned long long) i_size) | |
+ >> PAGE_CACHE_SHIFT; | |
+ unsigned offset; | |
+ bool need_balance_fs = false; | |
+ int err = 0; | |
+ | |
+ if (page->index < end_index) | |
+ goto write; | |
+ | |
+ /* | |
+ * If the offset is out-of-range of file size, | |
+ * this page does not have to be written to disk. | |
+ */ | |
+ offset = i_size & (PAGE_CACHE_SIZE - 1); | |
+ if ((page->index >= end_index + 1) || !offset) { | |
+ if (S_ISDIR(inode->i_mode)) { | |
+ dec_page_count(sbi, F2FS_DIRTY_DENTS); | |
+ inode_dec_dirty_dents(inode); | |
+ } | |
+ goto out; | |
+ } | |
+ | |
+ zero_user_segment(page, offset, PAGE_CACHE_SIZE); | |
+write: | |
+ if (sbi->por_doing) { | |
+ err = AOP_WRITEPAGE_ACTIVATE; | |
+ goto redirty_out; | |
+ } | |
+ | |
+ /* Dentry blocks are controlled by checkpoint */ | |
+ if (S_ISDIR(inode->i_mode)) { | |
+ dec_page_count(sbi, F2FS_DIRTY_DENTS); | |
+ inode_dec_dirty_dents(inode); | |
+ err = do_write_data_page(page); | |
+ } else { | |
+ int ilock = mutex_lock_op(sbi); | |
+ err = do_write_data_page(page); | |
+ mutex_unlock_op(sbi, ilock); | |
+ need_balance_fs = true; | |
+ } | |
+ if (err == -ENOENT) | |
+ goto out; | |
+ else if (err) | |
+ goto redirty_out; | |
+ | |
+ if (wbc->for_reclaim) | |
+ f2fs_submit_bio(sbi, DATA, true); | |
+ | |
+ clear_cold_data(page); | |
+out: | |
+ unlock_page(page); | |
+ if (need_balance_fs) | |
+ f2fs_balance_fs(sbi); | |
+ return 0; | |
+ | |
+redirty_out: | |
+ wbc->pages_skipped++; | |
+ set_page_dirty(page); | |
+ return err; | |
+} | |
+ | |
+#define MAX_DESIRED_PAGES_WP 4096 | |
+ | |
+static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, | |
+ void *data) | |
+{ | |
+ struct address_space *mapping = data; | |
+ int ret = mapping->a_ops->writepage(page, wbc); | |
+ mapping_set_error(mapping, ret); | |
+ return ret; | |
+} | |
+ | |
+static int f2fs_write_data_pages(struct address_space *mapping, | |
+ struct writeback_control *wbc) | |
+{ | |
+ struct inode *inode = mapping->host; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ bool locked = false; | |
+ int ret; | |
+ long excess_nrtw = 0, desired_nrtw; | |
+ | |
+ /* deal with chardevs and other special file */ | |
+ if (!mapping->a_ops->writepage) | |
+ return 0; | |
+ | |
+ if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { | |
+ desired_nrtw = MAX_DESIRED_PAGES_WP; | |
+ excess_nrtw = desired_nrtw - wbc->nr_to_write; | |
+ wbc->nr_to_write = desired_nrtw; | |
+ } | |
+ | |
+ if (!S_ISDIR(inode->i_mode)) { | |
+ mutex_lock(&sbi->writepages); | |
+ locked = true; | |
+ } | |
+ ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); | |
+ if (locked) | |
+ mutex_unlock(&sbi->writepages); | |
+ f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); | |
+ | |
+ remove_dirty_dir_inode(inode); | |
+ | |
+ wbc->nr_to_write -= excess_nrtw; | |
+ return ret; | |
+} | |
+ | |
+static int f2fs_write_begin(struct file *file, struct address_space *mapping, | |
+ loff_t pos, unsigned len, unsigned flags, | |
+ struct page **pagep, void **fsdata) | |
+{ | |
+ struct inode *inode = mapping->host; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct page *page; | |
+ pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; | |
+ struct dnode_of_data dn; | |
+ int err = 0; | |
+ int ilock; | |
+ | |
+ f2fs_balance_fs(sbi); | |
+repeat: | |
+ page = grab_cache_page_write_begin(mapping, index, flags); | |
+ if (!page) | |
+ return -ENOMEM; | |
+ *pagep = page; | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ | |
+ set_new_dnode(&dn, inode, NULL, NULL, 0); | |
+ err = get_dnode_of_data(&dn, index, ALLOC_NODE); | |
+ if (err) | |
+ goto err; | |
+ | |
+ if (dn.data_blkaddr == NULL_ADDR) | |
+ err = reserve_new_block(&dn); | |
+ | |
+ f2fs_put_dnode(&dn); | |
+ if (err) | |
+ goto err; | |
+ | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+ if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) | |
+ return 0; | |
+ | |
+ if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { | |
+ unsigned start = pos & (PAGE_CACHE_SIZE - 1); | |
+ unsigned end = start + len; | |
+ | |
+ /* Reading beyond i_size is simple: memset to zero */ | |
+ zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); | |
+ goto out; | |
+ } | |
+ | |
+ if (dn.data_blkaddr == NEW_ADDR) { | |
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE); | |
+ } else { | |
+ err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); | |
+ if (err) | |
+ return err; | |
+ lock_page(page); | |
+ if (!PageUptodate(page)) { | |
+ f2fs_put_page(page, 1); | |
+ return -EIO; | |
+ } | |
+ if (page->mapping != mapping) { | |
+ f2fs_put_page(page, 1); | |
+ goto repeat; | |
+ } | |
+ } | |
+out: | |
+ SetPageUptodate(page); | |
+ clear_cold_data(page); | |
+ return 0; | |
+ | |
+err: | |
+ mutex_unlock_op(sbi, ilock); | |
+ f2fs_put_page(page, 1); | |
+ return err; | |
+} | |
+ | |
+static int f2fs_write_end(struct file *file, | |
+ struct address_space *mapping, | |
+ loff_t pos, unsigned len, unsigned copied, | |
+ struct page *page, void *fsdata) | |
+{ | |
+ struct inode *inode = page->mapping->host; | |
+ | |
+ SetPageUptodate(page); | |
+ set_page_dirty(page); | |
+ | |
+ if (pos + copied > i_size_read(inode)) { | |
+ i_size_write(inode, pos + copied); | |
+ mark_inode_dirty(inode); | |
+ update_inode_page(inode); | |
+ } | |
+ | |
+ unlock_page(page); | |
+ page_cache_release(page); | |
+ return copied; | |
+} | |
+ | |
+static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | |
+ const struct iovec *iov, loff_t offset, unsigned long nr_segs) | |
+{ | |
+ struct file *file = iocb->ki_filp; | |
+ struct inode *inode = file->f_mapping->host; | |
+ | |
+ if (rw == WRITE) | |
+ return 0; | |
+ | |
+ /* Needs synchronization with the cleaner */ | |
+ return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | |
+ get_data_block_ro); | |
+} | |
+ | |
+static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) | |
+{ | |
+ struct inode *inode = page->mapping->host; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ if (S_ISDIR(inode->i_mode) && PageDirty(page)) { | |
+ dec_page_count(sbi, F2FS_DIRTY_DENTS); | |
+ inode_dec_dirty_dents(inode); | |
+ } | |
+ ClearPagePrivate(page); | |
+} | |
+ | |
+static int f2fs_release_data_page(struct page *page, gfp_t wait) | |
+{ | |
+ ClearPagePrivate(page); | |
+ return 1; | |
+} | |
+ | |
+static int f2fs_set_data_page_dirty(struct page *page) | |
+{ | |
+ struct address_space *mapping = page->mapping; | |
+ struct inode *inode = mapping->host; | |
+ | |
+ SetPageUptodate(page); | |
+ if (!PageDirty(page)) { | |
+ __set_page_dirty_nobuffers(page); | |
+ set_dirty_dir_page(inode, page); | |
+ return 1; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) | |
+{ | |
+ return generic_block_bmap(mapping, block, get_data_block_ro); | |
+} | |
+ | |
+const struct address_space_operations f2fs_dblock_aops = { | |
+ .readpage = f2fs_read_data_page, | |
+ .readpages = f2fs_read_data_pages, | |
+ .writepage = f2fs_write_data_page, | |
+ .writepages = f2fs_write_data_pages, | |
+ .write_begin = f2fs_write_begin, | |
+ .write_end = f2fs_write_end, | |
+ .set_page_dirty = f2fs_set_data_page_dirty, | |
+ .invalidatepage = f2fs_invalidate_data_page, | |
+ .releasepage = f2fs_release_data_page, | |
+ .direct_IO = f2fs_direct_IO, | |
+ .bmap = f2fs_bmap, | |
+}; | |
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c | |
new file mode 100644 | |
index 0000000..a84b0a8 | |
--- /dev/null | |
+++ b/fs/f2fs/debug.c | |
@@ -0,0 +1,353 @@ | |
+/* | |
+ * f2fs debugging statistics | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * Copyright (c) 2012 Linux Foundation | |
+ * Copyright (c) 2012 Greg Kroah-Hartman <[email protected]> | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+ | |
+#include <linux/fs.h> | |
+#include <linux/backing-dev.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/blkdev.h> | |
+#include <linux/debugfs.h> | |
+#include <linux/seq_file.h> | |
+ | |
+#include "f2fs.h" | |
+#include "node.h" | |
+#include "segment.h" | |
+#include "gc.h" | |
+ | |
+static LIST_HEAD(f2fs_stat_list); | |
+static struct dentry *debugfs_root; | |
+static DEFINE_MUTEX(f2fs_stat_mutex); | |
+ | |
+static void update_general_status(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); | |
+ int i; | |
+ | |
+ /* valid check of the segment numbers */ | |
+ si->hit_ext = sbi->read_hit_ext; | |
+ si->total_ext = sbi->total_hit_ext; | |
+ si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); | |
+ si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); | |
+ si->ndirty_dirs = sbi->n_dirty_dirs; | |
+ si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); | |
+ si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; | |
+ si->rsvd_segs = reserved_segments(sbi); | |
+ si->overp_segs = overprovision_segments(sbi); | |
+ si->valid_count = valid_user_blocks(sbi); | |
+ si->valid_node_count = valid_node_count(sbi); | |
+ si->valid_inode_count = valid_inode_count(sbi); | |
+ si->utilization = utilization(sbi); | |
+ | |
+ si->free_segs = free_segments(sbi); | |
+ si->free_secs = free_sections(sbi); | |
+ si->prefree_count = prefree_segments(sbi); | |
+ si->dirty_count = dirty_segments(sbi); | |
+ si->node_pages = sbi->node_inode->i_mapping->nrpages; | |
+ si->meta_pages = sbi->meta_inode->i_mapping->nrpages; | |
+ si->nats = NM_I(sbi)->nat_cnt; | |
+ si->sits = SIT_I(sbi)->dirty_sentries; | |
+ si->fnids = NM_I(sbi)->fcnt; | |
+ si->bg_gc = sbi->bg_gc; | |
+ si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) | |
+ * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) | |
+ / 2; | |
+ si->util_valid = (int)(written_block_count(sbi) >> | |
+ sbi->log_blocks_per_seg) | |
+ * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) | |
+ / 2; | |
+ si->util_invalid = 50 - si->util_free - si->util_valid; | |
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { | |
+ struct curseg_info *curseg = CURSEG_I(sbi, i); | |
+ si->curseg[i] = curseg->segno; | |
+ si->cursec[i] = curseg->segno / sbi->segs_per_sec; | |
+ si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; | |
+ } | |
+ | |
+ for (i = 0; i < 2; i++) { | |
+ si->segment_count[i] = sbi->segment_count[i]; | |
+ si->block_count[i] = sbi->block_count[i]; | |
+ } | |
+} | |
+ | |
+/* | |
+ * This function calculates BDF of every segments | |
+ */ | |
+static void update_sit_info(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); | |
+ unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ unsigned int segno, vblocks; | |
+ int ndirty = 0; | |
+ | |
+ bimodal = 0; | |
+ total_vblocks = 0; | |
+ blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); | |
+ hblks_per_sec = blks_per_sec / 2; | |
+ mutex_lock(&sit_i->sentry_lock); | |
+ for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { | |
+ vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); | |
+ dist = abs(vblocks - hblks_per_sec); | |
+ bimodal += dist * dist; | |
+ | |
+ if (vblocks > 0 && vblocks < blks_per_sec) { | |
+ total_vblocks += vblocks; | |
+ ndirty++; | |
+ } | |
+ } | |
+ mutex_unlock(&sit_i->sentry_lock); | |
+ dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; | |
+ si->bimodal = bimodal / dist; | |
+ if (si->dirty_count) | |
+ si->avg_vblocks = total_vblocks / ndirty; | |
+ else | |
+ si->avg_vblocks = 0; | |
+} | |
+ | |
+/* | |
+ * This function calculates memory footprint. | |
+ */ | |
+static void update_mem_info(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); | |
+ unsigned npages; | |
+ | |
+ if (si->base_mem) | |
+ goto get_cache; | |
+ | |
+ si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; | |
+ si->base_mem += 2 * sizeof(struct f2fs_inode_info); | |
+ si->base_mem += sizeof(*sbi->ckpt); | |
+ | |
+ /* build sm */ | |
+ si->base_mem += sizeof(struct f2fs_sm_info); | |
+ | |
+ /* build sit */ | |
+ si->base_mem += sizeof(struct sit_info); | |
+ si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); | |
+ si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); | |
+ si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); | |
+ if (sbi->segs_per_sec > 1) | |
+ si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); | |
+ si->base_mem += __bitmap_size(sbi, SIT_BITMAP); | |
+ | |
+ /* build free segmap */ | |
+ si->base_mem += sizeof(struct free_segmap_info); | |
+ si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); | |
+ si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); | |
+ | |
+ /* build curseg */ | |
+ si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; | |
+ si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE; | |
+ | |
+ /* build dirty segmap */ | |
+ si->base_mem += sizeof(struct dirty_seglist_info); | |
+ si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); | |
+ si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); | |
+ | |
+ /* buld nm */ | |
+ si->base_mem += sizeof(struct f2fs_nm_info); | |
+ si->base_mem += __bitmap_size(sbi, NAT_BITMAP); | |
+ | |
+ /* build gc */ | |
+ si->base_mem += sizeof(struct f2fs_gc_kthread); | |
+ | |
+get_cache: | |
+ /* free nids */ | |
+ si->cache_mem = NM_I(sbi)->fcnt; | |
+ si->cache_mem += NM_I(sbi)->nat_cnt; | |
+ npages = sbi->node_inode->i_mapping->nrpages; | |
+ si->cache_mem += npages << PAGE_CACHE_SHIFT; | |
+ npages = sbi->meta_inode->i_mapping->nrpages; | |
+ si->cache_mem += npages << PAGE_CACHE_SHIFT; | |
+ si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); | |
+ si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); | |
+} | |
+ | |
+static int stat_show(struct seq_file *s, void *v) | |
+{ | |
+ struct f2fs_stat_info *si; | |
+ int i = 0; | |
+ int j; | |
+ | |
+ mutex_lock(&f2fs_stat_mutex); | |
+ list_for_each_entry(si, &f2fs_stat_list, stat_list) { | |
+ char devname[BDEVNAME_SIZE]; | |
+ | |
+ update_general_status(si->sbi); | |
+ | |
+ seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n", | |
+ bdevname(si->sbi->sb->s_bdev, devname), i++); | |
+ seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", | |
+ si->sit_area_segs, si->nat_area_segs); | |
+ seq_printf(s, "[SSA: %d] [MAIN: %d", | |
+ si->ssa_area_segs, si->main_area_segs); | |
+ seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", | |
+ si->overp_segs, si->rsvd_segs); | |
+ seq_printf(s, "Utilization: %d%% (%d valid blocks)\n", | |
+ si->utilization, si->valid_count); | |
+ seq_printf(s, " - Node: %u (Inode: %u, ", | |
+ si->valid_node_count, si->valid_inode_count); | |
+ seq_printf(s, "Other: %u)\n - Data: %u\n", | |
+ si->valid_node_count - si->valid_inode_count, | |
+ si->valid_count - si->valid_node_count); | |
+ seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", | |
+ si->main_area_segs, si->main_area_sections, | |
+ si->main_area_zones); | |
+ seq_printf(s, " - COLD data: %d, %d, %d\n", | |
+ si->curseg[CURSEG_COLD_DATA], | |
+ si->cursec[CURSEG_COLD_DATA], | |
+ si->curzone[CURSEG_COLD_DATA]); | |
+ seq_printf(s, " - WARM data: %d, %d, %d\n", | |
+ si->curseg[CURSEG_WARM_DATA], | |
+ si->cursec[CURSEG_WARM_DATA], | |
+ si->curzone[CURSEG_WARM_DATA]); | |
+ seq_printf(s, " - HOT data: %d, %d, %d\n", | |
+ si->curseg[CURSEG_HOT_DATA], | |
+ si->cursec[CURSEG_HOT_DATA], | |
+ si->curzone[CURSEG_HOT_DATA]); | |
+ seq_printf(s, " - Dir dnode: %d, %d, %d\n", | |
+ si->curseg[CURSEG_HOT_NODE], | |
+ si->cursec[CURSEG_HOT_NODE], | |
+ si->curzone[CURSEG_HOT_NODE]); | |
+ seq_printf(s, " - File dnode: %d, %d, %d\n", | |
+ si->curseg[CURSEG_WARM_NODE], | |
+ si->cursec[CURSEG_WARM_NODE], | |
+ si->curzone[CURSEG_WARM_NODE]); | |
+ seq_printf(s, " - Indir nodes: %d, %d, %d\n", | |
+ si->curseg[CURSEG_COLD_NODE], | |
+ si->cursec[CURSEG_COLD_NODE], | |
+ si->curzone[CURSEG_COLD_NODE]); | |
+ seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n", | |
+ si->main_area_segs - si->dirty_count - | |
+ si->prefree_count - si->free_segs, | |
+ si->dirty_count); | |
+ seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", | |
+ si->prefree_count, si->free_segs, si->free_secs); | |
+ seq_printf(s, "GC calls: %d (BG: %d)\n", | |
+ si->call_count, si->bg_gc); | |
+ seq_printf(s, " - data segments : %d\n", si->data_segs); | |
+ seq_printf(s, " - node segments : %d\n", si->node_segs); | |
+ seq_printf(s, "Try to move %d blocks\n", si->tot_blks); | |
+ seq_printf(s, " - data blocks : %d\n", si->data_blks); | |
+ seq_printf(s, " - node blocks : %d\n", si->node_blks); | |
+ seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", | |
+ si->hit_ext, si->total_ext); | |
+ seq_printf(s, "\nBalancing F2FS Async:\n"); | |
+ seq_printf(s, " - nodes %4d in %4d\n", | |
+ si->ndirty_node, si->node_pages); | |
+ seq_printf(s, " - dents %4d in dirs:%4d\n", | |
+ si->ndirty_dent, si->ndirty_dirs); | |
+ seq_printf(s, " - meta %4d in %4d\n", | |
+ si->ndirty_meta, si->meta_pages); | |
+ seq_printf(s, " - NATs %5d > %lu\n", | |
+ si->nats, NM_WOUT_THRESHOLD); | |
+ seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", | |
+ si->sits, si->fnids); | |
+ seq_puts(s, "\nDistribution of User Blocks:"); | |
+ seq_puts(s, " [ valid | invalid | free ]\n"); | |
+ seq_puts(s, " ["); | |
+ | |
+ for (j = 0; j < si->util_valid; j++) | |
+ seq_putc(s, '-'); | |
+ seq_putc(s, '|'); | |
+ | |
+ for (j = 0; j < si->util_invalid; j++) | |
+ seq_putc(s, '-'); | |
+ seq_putc(s, '|'); | |
+ | |
+ for (j = 0; j < si->util_free; j++) | |
+ seq_putc(s, '-'); | |
+ seq_puts(s, "]\n\n"); | |
+ seq_printf(s, "SSR: %u blocks in %u segments\n", | |
+ si->block_count[SSR], si->segment_count[SSR]); | |
+ seq_printf(s, "LFS: %u blocks in %u segments\n", | |
+ si->block_count[LFS], si->segment_count[LFS]); | |
+ | |
+ /* segment usage info */ | |
+ update_sit_info(si->sbi); | |
+ seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n", | |
+ si->bimodal, si->avg_vblocks); | |
+ | |
+ /* memory footprint */ | |
+ update_mem_info(si->sbi); | |
+ seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", | |
+ (si->base_mem + si->cache_mem) >> 10, | |
+ si->base_mem >> 10, si->cache_mem >> 10); | |
+ } | |
+ mutex_unlock(&f2fs_stat_mutex); | |
+ return 0; | |
+} | |
+ | |
+static int stat_open(struct inode *inode, struct file *file) | |
+{ | |
+ return single_open(file, stat_show, inode->i_private); | |
+} | |
+ | |
+static const struct file_operations stat_fops = { | |
+ .open = stat_open, | |
+ .read = seq_read, | |
+ .llseek = seq_lseek, | |
+ .release = single_release, | |
+}; | |
+ | |
+int f2fs_build_stats(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); | |
+ struct f2fs_stat_info *si; | |
+ | |
+ si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); | |
+ if (!si) | |
+ return -ENOMEM; | |
+ | |
+ si->all_area_segs = le32_to_cpu(raw_super->segment_count); | |
+ si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); | |
+ si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); | |
+ si->ssa_area_segs = le32_to_cpu(raw_super->segment_count_ssa); | |
+ si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); | |
+ si->main_area_sections = le32_to_cpu(raw_super->section_count); | |
+ si->main_area_zones = si->main_area_sections / | |
+ le32_to_cpu(raw_super->secs_per_zone); | |
+ si->sbi = sbi; | |
+ sbi->stat_info = si; | |
+ | |
+ mutex_lock(&f2fs_stat_mutex); | |
+ list_add_tail(&si->stat_list, &f2fs_stat_list); | |
+ mutex_unlock(&f2fs_stat_mutex); | |
+ | |
+ return 0; | |
+} | |
+ | |
+void f2fs_destroy_stats(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); | |
+ | |
+ mutex_lock(&f2fs_stat_mutex); | |
+ list_del(&si->stat_list); | |
+ mutex_unlock(&f2fs_stat_mutex); | |
+ | |
+ kfree(si); | |
+} | |
+ | |
+void __init f2fs_create_root_stats(void) | |
+{ | |
+ debugfs_root = debugfs_create_dir("f2fs", NULL); | |
+ if (debugfs_root) | |
+ debugfs_create_file("status", S_IRUGO, debugfs_root, | |
+ NULL, &stat_fops); | |
+} | |
+ | |
+void f2fs_destroy_root_stats(void) | |
+{ | |
+ debugfs_remove_recursive(debugfs_root); | |
+ debugfs_root = NULL; | |
+} | |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c | |
new file mode 100644 | |
index 0000000..11cdb75 | |
--- /dev/null | |
+++ b/fs/f2fs/dir.c | |
@@ -0,0 +1,714 @@ | |
+/* | |
+ * fs/f2fs/dir.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/fs.h> | |
+#include <linux/f2fs_fs.h> | |
+#include "f2fs.h" | |
+#include "node.h" | |
+#include "acl.h" | |
+#include "xattr.h" | |
+ | |
+static unsigned long dir_blocks(struct inode *inode) | |
+{ | |
+ return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1)) | |
+ >> PAGE_CACHE_SHIFT; | |
+} | |
+ | |
+static unsigned int dir_buckets(unsigned int level) | |
+{ | |
+ if (level < MAX_DIR_HASH_DEPTH / 2) | |
+ return 1 << level; | |
+ else | |
+ return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); | |
+} | |
+ | |
+static unsigned int bucket_blocks(unsigned int level) | |
+{ | |
+ if (level < MAX_DIR_HASH_DEPTH / 2) | |
+ return 2; | |
+ else | |
+ return 4; | |
+} | |
+ | |
+static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { | |
+ [F2FS_FT_UNKNOWN] = DT_UNKNOWN, | |
+ [F2FS_FT_REG_FILE] = DT_REG, | |
+ [F2FS_FT_DIR] = DT_DIR, | |
+ [F2FS_FT_CHRDEV] = DT_CHR, | |
+ [F2FS_FT_BLKDEV] = DT_BLK, | |
+ [F2FS_FT_FIFO] = DT_FIFO, | |
+ [F2FS_FT_SOCK] = DT_SOCK, | |
+ [F2FS_FT_SYMLINK] = DT_LNK, | |
+}; | |
+ | |
+#define S_SHIFT 12 | |
+static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { | |
+ [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, | |
+ [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, | |
+ [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV, | |
+ [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV, | |
+ [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO, | |
+ [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK, | |
+ [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, | |
+}; | |
+ | |
+static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) | |
+{ | |
+ mode_t mode = inode->i_mode; | |
+ de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; | |
+} | |
+ | |
+static unsigned long dir_block_index(unsigned int level, unsigned int idx) | |
+{ | |
+ unsigned long i; | |
+ unsigned long bidx = 0; | |
+ | |
+ for (i = 0; i < level; i++) | |
+ bidx += dir_buckets(i) * bucket_blocks(i); | |
+ bidx += idx * bucket_blocks(level); | |
+ return bidx; | |
+} | |
+ | |
+static bool early_match_name(const char *name, size_t namelen, | |
+ f2fs_hash_t namehash, struct f2fs_dir_entry *de) | |
+{ | |
+ if (le16_to_cpu(de->name_len) != namelen) | |
+ return false; | |
+ | |
+ if (de->hash_code != namehash) | |
+ return false; | |
+ | |
+ return true; | |
+} | |
+ | |
+static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, | |
+ const char *name, size_t namelen, int *max_slots, | |
+ f2fs_hash_t namehash, struct page **res_page, | |
+ bool nocase) | |
+{ | |
+ struct f2fs_dir_entry *de; | |
+ unsigned long bit_pos, end_pos, next_pos; | |
+ struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); | |
+ int slots; | |
+ | |
+ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | |
+ NR_DENTRY_IN_BLOCK, 0); | |
+ while (bit_pos < NR_DENTRY_IN_BLOCK) { | |
+ de = &dentry_blk->dentry[bit_pos]; | |
+ slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); | |
+ | |
+ if (nocase) { | |
+ if ((le16_to_cpu(de->name_len) == namelen) && | |
+ !strncasecmp(dentry_blk->filename[bit_pos], | |
+ name, namelen)) { | |
+ *res_page = dentry_page; | |
+ goto found; | |
+ } | |
+ } else if (early_match_name(name, namelen, namehash, de)) { | |
+ if (!memcmp(dentry_blk->filename[bit_pos], | |
+ name, namelen)) { | |
+ *res_page = dentry_page; | |
+ goto found; | |
+ } | |
+ } | |
+ next_pos = bit_pos + slots; | |
+ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | |
+ NR_DENTRY_IN_BLOCK, next_pos); | |
+ if (bit_pos >= NR_DENTRY_IN_BLOCK) | |
+ end_pos = NR_DENTRY_IN_BLOCK; | |
+ else | |
+ end_pos = bit_pos; | |
+ if (*max_slots < end_pos - next_pos) | |
+ *max_slots = end_pos - next_pos; | |
+ } | |
+ | |
+ de = NULL; | |
+ kunmap(dentry_page); | |
+found: | |
+ return de; | |
+} | |
+ | |
+static struct f2fs_dir_entry *find_in_level(struct inode *dir, | |
+ unsigned int level, const char *name, size_t namelen, | |
+ f2fs_hash_t namehash, struct page **res_page) | |
+{ | |
+ int s = GET_DENTRY_SLOTS(namelen); | |
+ unsigned int nbucket, nblock; | |
+ unsigned int bidx, end_block; | |
+ struct page *dentry_page; | |
+ struct f2fs_dir_entry *de = NULL; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | |
+ bool room = false; | |
+ int max_slots = 0; | |
+ | |
+ BUG_ON(level > MAX_DIR_HASH_DEPTH); | |
+ | |
+ nbucket = dir_buckets(level); | |
+ nblock = bucket_blocks(level); | |
+ | |
+ bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); | |
+ end_block = bidx + nblock; | |
+ | |
+ for (; bidx < end_block; bidx++) { | |
+ bool nocase = false; | |
+ | |
+ /* no need to allocate new dentry pages to all the indices */ | |
+ dentry_page = find_data_page(dir, bidx, true); | |
+ if (IS_ERR(dentry_page)) { | |
+ room = true; | |
+ continue; | |
+ } | |
+ | |
+ if (test_opt(sbi, ANDROID_EMU) && | |
+ (sbi->android_emu_flags & F2FS_ANDROID_EMU_NOCASE) && | |
+ F2FS_I(dir)->i_advise & FADVISE_ANDROID_EMU) | |
+ nocase = true; | |
+ | |
+ de = find_in_block(dentry_page, name, namelen, | |
+ &max_slots, namehash, res_page, | |
+ nocase); | |
+ if (de) | |
+ break; | |
+ | |
+ if (max_slots >= s) | |
+ room = true; | |
+ f2fs_put_page(dentry_page, 0); | |
+ } | |
+ | |
+ if (!de && room && F2FS_I(dir)->chash != namehash) { | |
+ F2FS_I(dir)->chash = namehash; | |
+ F2FS_I(dir)->clevel = level; | |
+ } | |
+ | |
+ return de; | |
+} | |
+ | |
+/* | |
+ * Find an entry in the specified directory with the wanted name. | |
+ * It returns the page where the entry was found (as a parameter - res_page), | |
+ * and the entry itself. Page is returned mapped and unlocked. | |
+ * Entry is guaranteed to be valid. | |
+ */ | |
+struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, | |
+ struct qstr *child, struct page **res_page) | |
+{ | |
+ const char *name = child->name; | |
+ size_t namelen = child->len; | |
+ unsigned long npages = dir_blocks(dir); | |
+ struct f2fs_dir_entry *de = NULL; | |
+ f2fs_hash_t name_hash; | |
+ unsigned int max_depth; | |
+ unsigned int level; | |
+ | |
+ if (namelen > F2FS_NAME_LEN) | |
+ return NULL; | |
+ | |
+ if (npages == 0) | |
+ return NULL; | |
+ | |
+ *res_page = NULL; | |
+ | |
+ name_hash = f2fs_dentry_hash(name, namelen); | |
+ max_depth = F2FS_I(dir)->i_current_depth; | |
+ | |
+ for (level = 0; level < max_depth; level++) { | |
+ de = find_in_level(dir, level, name, | |
+ namelen, name_hash, res_page); | |
+ if (de) | |
+ break; | |
+ } | |
+ if (!de && F2FS_I(dir)->chash != name_hash) { | |
+ F2FS_I(dir)->chash = name_hash; | |
+ F2FS_I(dir)->clevel = level - 1; | |
+ } | |
+ return de; | |
+} | |
+ | |
+struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) | |
+{ | |
+ struct page *page; | |
+ struct f2fs_dir_entry *de; | |
+ struct f2fs_dentry_block *dentry_blk; | |
+ | |
+ page = get_lock_data_page(dir, 0); | |
+ if (IS_ERR(page)) | |
+ return NULL; | |
+ | |
+ dentry_blk = kmap(page); | |
+ de = &dentry_blk->dentry[1]; | |
+ *p = page; | |
+ unlock_page(page); | |
+ return de; | |
+} | |
+ | |
+ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) | |
+{ | |
+ ino_t res = 0; | |
+ struct f2fs_dir_entry *de; | |
+ struct page *page; | |
+ | |
+ de = f2fs_find_entry(dir, qstr, &page); | |
+ if (de) { | |
+ res = le32_to_cpu(de->ino); | |
+ kunmap(page); | |
+ f2fs_put_page(page, 0); | |
+ } | |
+ | |
+ return res; | |
+} | |
+ | |
+void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, | |
+ struct page *page, struct inode *inode) | |
+{ | |
+ lock_page(page); | |
+ wait_on_page_writeback(page); | |
+ de->ino = cpu_to_le32(inode->i_ino); | |
+ set_de_type(de, inode); | |
+ kunmap(page); | |
+ set_page_dirty(page); | |
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; | |
+ mark_inode_dirty(dir); | |
+ | |
+ /* update parent inode number before releasing dentry page */ | |
+ F2FS_I(inode)->i_pino = dir->i_ino; | |
+ | |
+ f2fs_put_page(page, 1); | |
+} | |
+ | |
+static void init_dent_inode(const struct qstr *name, struct page *ipage) | |
+{ | |
+ struct f2fs_node *rn; | |
+ | |
+ /* copy name info. to this inode page */ | |
+ rn = F2FS_NODE(ipage); | |
+ rn->i.i_namelen = cpu_to_le32(name->len); | |
+ memcpy(rn->i.i_name, name->name, name->len); | |
+ set_page_dirty(ipage); | |
+} | |
+ | |
+int update_dent_inode(struct inode *inode, const struct qstr *name) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct page *page; | |
+ | |
+ page = get_node_page(sbi, inode->i_ino); | |
+ if (IS_ERR(page)) | |
+ return PTR_ERR(page); | |
+ | |
+ init_dent_inode(name, page); | |
+ f2fs_put_page(page, 1); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int make_empty_dir(struct inode *inode, | |
+ struct inode *parent, struct page *page) | |
+{ | |
+ struct page *dentry_page; | |
+ struct f2fs_dentry_block *dentry_blk; | |
+ struct f2fs_dir_entry *de; | |
+ void *kaddr; | |
+ | |
+ dentry_page = get_new_data_page(inode, page, 0, true); | |
+ if (IS_ERR(dentry_page)) | |
+ return PTR_ERR(dentry_page); | |
+ | |
+ kaddr = kmap_atomic(dentry_page); | |
+ dentry_blk = (struct f2fs_dentry_block *)kaddr; | |
+ | |
+ de = &dentry_blk->dentry[0]; | |
+ de->name_len = cpu_to_le16(1); | |
+ de->hash_code = 0; | |
+ de->ino = cpu_to_le32(inode->i_ino); | |
+ memcpy(dentry_blk->filename[0], ".", 1); | |
+ set_de_type(de, inode); | |
+ | |
+ de = &dentry_blk->dentry[1]; | |
+ de->hash_code = 0; | |
+ de->name_len = cpu_to_le16(2); | |
+ de->ino = cpu_to_le32(parent->i_ino); | |
+ memcpy(dentry_blk->filename[1], "..", 2); | |
+ set_de_type(de, inode); | |
+ | |
+ test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); | |
+ test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); | |
+ kunmap_atomic(kaddr); | |
+ | |
+ set_page_dirty(dentry_page); | |
+ f2fs_put_page(dentry_page, 1); | |
+ return 0; | |
+} | |
+ | |
+static struct page *init_inode_metadata(struct inode *inode, | |
+ struct inode *dir, const struct qstr *name) | |
+{ | |
+ struct page *page; | |
+ int err; | |
+ | |
+ if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { | |
+ page = new_inode_page(inode, name); | |
+ if (IS_ERR(page)) | |
+ return page; | |
+ | |
+ if (S_ISDIR(inode->i_mode)) { | |
+ err = make_empty_dir(inode, dir, page); | |
+ if (err) | |
+ goto error; | |
+ } | |
+ | |
+ err = f2fs_init_acl(inode, dir); | |
+ if (err) | |
+ goto error; | |
+ | |
+ err = f2fs_init_security(inode, dir, name, page); | |
+ if (err) | |
+ goto error; | |
+ | |
+ wait_on_page_writeback(page); | |
+ } else { | |
+ page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); | |
+ if (IS_ERR(page)) | |
+ return page; | |
+ | |
+ wait_on_page_writeback(page); | |
+ set_cold_node(inode, page); | |
+ } | |
+ | |
+ init_dent_inode(name, page); | |
+ | |
+ /* | |
+ * This file should be checkpointed during fsync. | |
+ * We lost i_pino from now on. | |
+ */ | |
+ if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { | |
+ file_lost_pino(inode); | |
+ inc_nlink(inode); | |
+ } | |
+ return page; | |
+ | |
+error: | |
+ f2fs_put_page(page, 1); | |
+ remove_inode_page(inode); | |
+ return ERR_PTR(err); | |
+} | |
+ | |
+static void update_parent_metadata(struct inode *dir, struct inode *inode, | |
+ unsigned int current_depth) | |
+{ | |
+ if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { | |
+ if (S_ISDIR(inode->i_mode)) { | |
+ inc_nlink(dir); | |
+ set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | |
+ } | |
+ clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); | |
+ } | |
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; | |
+ if (F2FS_I(dir)->i_current_depth != current_depth) { | |
+ F2FS_I(dir)->i_current_depth = current_depth; | |
+ set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | |
+ } | |
+ | |
+ if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) | |
+ update_inode_page(dir); | |
+ else | |
+ mark_inode_dirty(dir); | |
+ | |
+ if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) | |
+ clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | |
+} | |
+ | |
+static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) | |
+{ | |
+ int bit_start = 0; | |
+ int zero_start, zero_end; | |
+next: | |
+ zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, | |
+ NR_DENTRY_IN_BLOCK, | |
+ bit_start); | |
+ if (zero_start >= NR_DENTRY_IN_BLOCK) | |
+ return NR_DENTRY_IN_BLOCK; | |
+ | |
+ zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, | |
+ NR_DENTRY_IN_BLOCK, | |
+ zero_start); | |
+ if (zero_end - zero_start >= slots) | |
+ return zero_start; | |
+ | |
+ bit_start = zero_end + 1; | |
+ | |
+ if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) | |
+ return NR_DENTRY_IN_BLOCK; | |
+ goto next; | |
+} | |
+ | |
+/* | |
+ * Caller should grab and release a mutex by calling mutex_lock_op() and | |
+ * mutex_unlock_op(). | |
+ */ | |
+int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) | |
+{ | |
+ unsigned int bit_pos; | |
+ unsigned int level; | |
+ unsigned int current_depth; | |
+ unsigned long bidx, block; | |
+ f2fs_hash_t dentry_hash; | |
+ struct f2fs_dir_entry *de; | |
+ unsigned int nbucket, nblock; | |
+ size_t namelen = name->len; | |
+ struct page *dentry_page = NULL; | |
+ struct f2fs_dentry_block *dentry_blk = NULL; | |
+ int slots = GET_DENTRY_SLOTS(namelen); | |
+ struct page *page; | |
+ int err = 0; | |
+ int i; | |
+ | |
+ dentry_hash = f2fs_dentry_hash(name->name, name->len); | |
+ level = 0; | |
+ current_depth = F2FS_I(dir)->i_current_depth; | |
+ if (F2FS_I(dir)->chash == dentry_hash) { | |
+ level = F2FS_I(dir)->clevel; | |
+ F2FS_I(dir)->chash = 0; | |
+ } | |
+ | |
+start: | |
+ if (current_depth == MAX_DIR_HASH_DEPTH) | |
+ return -ENOSPC; | |
+ | |
+ /* Increase the depth, if required */ | |
+ if (level == current_depth) | |
+ ++current_depth; | |
+ | |
+ nbucket = dir_buckets(level); | |
+ nblock = bucket_blocks(level); | |
+ | |
+ bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); | |
+ | |
+ for (block = bidx; block <= (bidx + nblock - 1); block++) { | |
+ dentry_page = get_new_data_page(dir, NULL, block, true); | |
+ if (IS_ERR(dentry_page)) | |
+ return PTR_ERR(dentry_page); | |
+ | |
+ dentry_blk = kmap(dentry_page); | |
+ bit_pos = room_for_filename(dentry_blk, slots); | |
+ if (bit_pos < NR_DENTRY_IN_BLOCK) | |
+ goto add_dentry; | |
+ | |
+ kunmap(dentry_page); | |
+ f2fs_put_page(dentry_page, 1); | |
+ } | |
+ | |
+ /* Move to next level to find the empty slot for new dentry */ | |
+ ++level; | |
+ goto start; | |
+add_dentry: | |
+ wait_on_page_writeback(dentry_page); | |
+ | |
+ page = init_inode_metadata(inode, dir, name); | |
+ if (IS_ERR(page)) { | |
+ err = PTR_ERR(page); | |
+ goto fail; | |
+ } | |
+ de = &dentry_blk->dentry[bit_pos]; | |
+ de->hash_code = dentry_hash; | |
+ de->name_len = cpu_to_le16(namelen); | |
+ memcpy(dentry_blk->filename[bit_pos], name->name, name->len); | |
+ de->ino = cpu_to_le32(inode->i_ino); | |
+ set_de_type(de, inode); | |
+ for (i = 0; i < slots; i++) | |
+ test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); | |
+ set_page_dirty(dentry_page); | |
+ | |
+ /* we don't need to mark_inode_dirty now */ | |
+ F2FS_I(inode)->i_pino = dir->i_ino; | |
+ update_inode(inode, page); | |
+ f2fs_put_page(page, 1); | |
+ | |
+ update_parent_metadata(dir, inode, current_depth); | |
+fail: | |
+ clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | |
+ kunmap(dentry_page); | |
+ f2fs_put_page(dentry_page, 1); | |
+ return err; | |
+} | |
+ | |
+/* | |
+ * It only removes the dentry from the dentry page,corresponding name | |
+ * entry in name page does not need to be touched during deletion. | |
+ */ | |
+void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |
+ struct inode *inode) | |
+{ | |
+ struct f2fs_dentry_block *dentry_blk; | |
+ unsigned int bit_pos; | |
+ struct address_space *mapping = page->mapping; | |
+ struct inode *dir = mapping->host; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | |
+ int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); | |
+ void *kaddr = page_address(page); | |
+ int i; | |
+ | |
+ lock_page(page); | |
+ wait_on_page_writeback(page); | |
+ | |
+ dentry_blk = (struct f2fs_dentry_block *)kaddr; | |
+ bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; | |
+ for (i = 0; i < slots; i++) | |
+ test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); | |
+ | |
+ /* Let's check and deallocate this dentry page */ | |
+ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | |
+ NR_DENTRY_IN_BLOCK, | |
+ 0); | |
+ kunmap(page); /* kunmap - pair of f2fs_find_entry */ | |
+ set_page_dirty(page); | |
+ | |
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME; | |
+ | |
+ if (inode && S_ISDIR(inode->i_mode)) { | |
+ drop_nlink(dir); | |
+ update_inode_page(dir); | |
+ } else { | |
+ mark_inode_dirty(dir); | |
+ } | |
+ | |
+ if (inode) { | |
+ inode->i_ctime = CURRENT_TIME; | |
+ drop_nlink(inode); | |
+ if (S_ISDIR(inode->i_mode)) { | |
+ drop_nlink(inode); | |
+ i_size_write(inode, 0); | |
+ } | |
+ update_inode_page(inode); | |
+ | |
+ if (inode->i_nlink == 0) | |
+ add_orphan_inode(sbi, inode->i_ino); | |
+ else | |
+ release_orphan_inode(sbi); | |
+ } | |
+ | |
+ if (bit_pos == NR_DENTRY_IN_BLOCK) { | |
+ truncate_hole(dir, page->index, page->index + 1); | |
+ clear_page_dirty_for_io(page); | |
+ ClearPageUptodate(page); | |
+ dec_page_count(sbi, F2FS_DIRTY_DENTS); | |
+ inode_dec_dirty_dents(dir); | |
+ } | |
+ f2fs_put_page(page, 1); | |
+} | |
+ | |
+bool f2fs_empty_dir(struct inode *dir) | |
+{ | |
+ unsigned long bidx; | |
+ struct page *dentry_page; | |
+ unsigned int bit_pos; | |
+ struct f2fs_dentry_block *dentry_blk; | |
+ unsigned long nblock = dir_blocks(dir); | |
+ | |
+ for (bidx = 0; bidx < nblock; bidx++) { | |
+ void *kaddr; | |
+ dentry_page = get_lock_data_page(dir, bidx); | |
+ if (IS_ERR(dentry_page)) { | |
+ if (PTR_ERR(dentry_page) == -ENOENT) | |
+ continue; | |
+ else | |
+ return false; | |
+ } | |
+ | |
+ kaddr = kmap_atomic(dentry_page); | |
+ dentry_blk = (struct f2fs_dentry_block *)kaddr; | |
+ if (bidx == 0) | |
+ bit_pos = 2; | |
+ else | |
+ bit_pos = 0; | |
+ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | |
+ NR_DENTRY_IN_BLOCK, | |
+ bit_pos); | |
+ kunmap_atomic(kaddr); | |
+ | |
+ f2fs_put_page(dentry_page, 1); | |
+ | |
+ if (bit_pos < NR_DENTRY_IN_BLOCK) | |
+ return false; | |
+ } | |
+ return true; | |
+} | |
+ | |
+static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) | |
+{ | |
+ unsigned long pos = file->f_pos; | |
+ struct inode *inode = file->f_dentry->d_inode; | |
+ unsigned long npages = dir_blocks(inode); | |
+ unsigned char *types = NULL; | |
+ unsigned int bit_pos = 0, start_bit_pos = 0; | |
+ int over = 0; | |
+ struct f2fs_dentry_block *dentry_blk = NULL; | |
+ struct f2fs_dir_entry *de = NULL; | |
+ struct page *dentry_page = NULL; | |
+ unsigned int n = 0; | |
+ unsigned char d_type = DT_UNKNOWN; | |
+ int slots; | |
+ | |
+ types = f2fs_filetype_table; | |
+ bit_pos = (pos % NR_DENTRY_IN_BLOCK); | |
+ n = (pos / NR_DENTRY_IN_BLOCK); | |
+ | |
+ for ( ; n < npages; n++) { | |
+ dentry_page = get_lock_data_page(inode, n); | |
+ if (IS_ERR(dentry_page)) | |
+ continue; | |
+ | |
+ start_bit_pos = bit_pos; | |
+ dentry_blk = kmap(dentry_page); | |
+ while (bit_pos < NR_DENTRY_IN_BLOCK) { | |
+ d_type = DT_UNKNOWN; | |
+ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | |
+ NR_DENTRY_IN_BLOCK, | |
+ bit_pos); | |
+ if (bit_pos >= NR_DENTRY_IN_BLOCK) | |
+ break; | |
+ | |
+ de = &dentry_blk->dentry[bit_pos]; | |
+ if (types && de->file_type < F2FS_FT_MAX) | |
+ d_type = types[de->file_type]; | |
+ | |
+ over = filldir(dirent, | |
+ dentry_blk->filename[bit_pos], | |
+ le16_to_cpu(de->name_len), | |
+ (n * NR_DENTRY_IN_BLOCK) + bit_pos, | |
+ le32_to_cpu(de->ino), d_type); | |
+ if (over) { | |
+ file->f_pos += bit_pos - start_bit_pos; | |
+ goto success; | |
+ } | |
+ slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); | |
+ bit_pos += slots; | |
+ } | |
+ bit_pos = 0; | |
+ file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; | |
+ kunmap(dentry_page); | |
+ f2fs_put_page(dentry_page, 1); | |
+ dentry_page = NULL; | |
+ } | |
+success: | |
+ if (dentry_page && !IS_ERR(dentry_page)) { | |
+ kunmap(dentry_page); | |
+ f2fs_put_page(dentry_page, 1); | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+const struct file_operations f2fs_dir_operations = { | |
+ .llseek = generic_file_llseek, | |
+ .read = generic_read_dir, | |
+ .readdir = f2fs_readdir, | |
+ .fsync = f2fs_sync_file, | |
+ .unlocked_ioctl = f2fs_ioctl, | |
+}; | |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h | |
new file mode 100644 | |
index 0000000..c6c24756 | |
--- /dev/null | |
+++ b/fs/f2fs/f2fs.h | |
@@ -0,0 +1,1290 @@ | |
+/* | |
+ * fs/f2fs/f2fs.h | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#ifndef _LINUX_F2FS_H | |
+#define _LINUX_F2FS_H | |
+ | |
+#include <linux/types.h> | |
+#include <linux/page-flags.h> | |
+#include <linux/buffer_head.h> | |
+#include <linux/slab.h> | |
+#include <linux/crc32.h> | |
+#include <linux/magic.h> | |
+#include <linux/kobject.h> | |
+ | |
+/* | |
+ * For mount options | |
+ */ | |
+#define F2FS_MOUNT_BG_GC 0x00000001 | |
+#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 | |
+#define F2FS_MOUNT_DISCARD 0x00000004 | |
+#define F2FS_MOUNT_NOHEAP 0x00000008 | |
+#define F2FS_MOUNT_XATTR_USER 0x00000010 | |
+#define F2FS_MOUNT_POSIX_ACL 0x00000020 | |
+#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 | |
+#define F2FS_MOUNT_INLINE_XATTR 0x00000080 | |
+#define F2FS_MOUNT_ANDROID_EMU 0x00001000 | |
+#define F2FS_MOUNT_ERRORS_PANIC 0x00002000 | |
+#define F2FS_MOUNT_ERRORS_RECOVER 0x00004000 | |
+ | |
+#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) | |
+#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) | |
+#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) | |
+ | |
+#define ver_after(a, b) (typecheck(unsigned long long, a) && \ | |
+ typecheck(unsigned long long, b) && \ | |
+ ((long long)((a) - (b)) > 0)) | |
+ | |
+typedef u32 block_t; /* | |
+ * should not change u32, since it is the on-disk block | |
+ * address format, __le32. | |
+ */ | |
+typedef u32 nid_t; | |
+ | |
+struct f2fs_mount_info { | |
+ unsigned int opt; | |
+}; | |
+ | |
+#define CRCPOLY_LE 0xedb88320 | |
+ | |
+static inline __u32 f2fs_crc32(void *buf, size_t len) | |
+{ | |
+ unsigned char *p = (unsigned char *)buf; | |
+ __u32 crc = F2FS_SUPER_MAGIC; | |
+ int i; | |
+ | |
+ while (len--) { | |
+ crc ^= *p++; | |
+ for (i = 0; i < 8; i++) | |
+ crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); | |
+ } | |
+ return crc; | |
+} | |
+ | |
+static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size) | |
+{ | |
+ return f2fs_crc32(buf, buf_size) == blk_crc; | |
+} | |
+ | |
+/* | |
+ * For checkpoint manager | |
+ */ | |
+enum { | |
+ NAT_BITMAP, | |
+ SIT_BITMAP | |
+}; | |
+ | |
+/* for the list of orphan inodes */ | |
+struct orphan_inode_entry { | |
+ struct list_head list; /* list head */ | |
+ nid_t ino; /* inode number */ | |
+}; | |
+ | |
+/* for the list of directory inodes */ | |
+struct dir_inode_entry { | |
+ struct list_head list; /* list head */ | |
+ struct inode *inode; /* vfs inode pointer */ | |
+}; | |
+ | |
+/* for the list of fsync inodes, used only during recovery */ | |
+struct fsync_inode_entry { | |
+ struct list_head list; /* list head */ | |
+ struct inode *inode; /* vfs inode pointer */ | |
+ block_t blkaddr; /* block address locating the last inode */ | |
+}; | |
+ | |
+#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) | |
+#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits)) | |
+ | |
+#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne) | |
+#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid) | |
+#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) | |
+#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) | |
+ | |
+static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) | |
+{ | |
+ int before = nats_in_cursum(rs); | |
+ rs->n_nats = cpu_to_le16(before + i); | |
+ return before; | |
+} | |
+ | |
+static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) | |
+{ | |
+ int before = sits_in_cursum(rs); | |
+ rs->n_sits = cpu_to_le16(before + i); | |
+ return before; | |
+} | |
+ | |
+/* | |
+ * ioctl commands | |
+ */ | |
+#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS | |
+#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS | |
+ | |
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT) | |
+/* | |
+ * ioctl commands in 32 bit emulation | |
+ */ | |
+#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS | |
+#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS | |
+#endif | |
+ | |
+/* | |
+ * For INODE and NODE manager | |
+ */ | |
+/* | |
+ * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 | |
+ * as its node offset to distinguish from index node blocks. | |
+ * But some bits are used to mark the node block. | |
+ */ | |
+#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \ | |
+ >> OFFSET_BIT_SHIFT) | |
+enum { | |
+ ALLOC_NODE, /* allocate a new node page if needed */ | |
+ LOOKUP_NODE, /* look up a node without readahead */ | |
+ LOOKUP_NODE_RA, /* | |
+ * look up a node with readahead called | |
+ * by get_datablock_ro. | |
+ */ | |
+}; | |
+ | |
+#define F2FS_LINK_MAX 32000 /* maximum link count per file */ | |
+ | |
+/* for in-memory extent cache entry */ | |
+struct extent_info { | |
+ rwlock_t ext_lock; /* rwlock for consistency */ | |
+ unsigned int fofs; /* start offset in a file */ | |
+ u32 blk_addr; /* start block address of the extent */ | |
+ unsigned int len; /* length of the extent */ | |
+}; | |
+ | |
+/* | |
+ * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. | |
+ */ | |
+#define FADVISE_COLD_BIT 0x01 | |
+#define FADVISE_LOST_PINO_BIT 0x02 | |
+#define FADVISE_ANDROID_EMU 0x10 | |
+#define FADVISE_ANDROID_EMU_ROOT 0x20 | |
+ | |
+struct f2fs_inode_info { | |
+ struct inode vfs_inode; /* serve a vfs inode */ | |
+ unsigned long i_flags; /* keep an inode flags for ioctl */ | |
+ unsigned char i_advise; /* use to give file attribute hints */ | |
+ unsigned int i_current_depth; /* use only in directory structure */ | |
+ unsigned int i_pino; /* parent inode number */ | |
+ umode_t i_acl_mode; /* keep file acl mode temporarily */ | |
+ | |
+ /* Use below internally in f2fs*/ | |
+ unsigned long flags; /* use to pass per-file flags */ | |
+ atomic_t dirty_dents; /* # of dirty dentry pages */ | |
+ f2fs_hash_t chash; /* hash value of given file name */ | |
+ unsigned int clevel; /* maximum level of given file name */ | |
+ nid_t i_xattr_nid; /* node id that contains xattrs */ | |
+ unsigned long long xattr_ver; /* cp version of xattr modification */ | |
+ struct extent_info ext; /* in-memory extent cache entry */ | |
+}; | |
+ | |
+static inline void get_extent_info(struct extent_info *ext, | |
+ struct f2fs_extent i_ext) | |
+{ | |
+ write_lock(&ext->ext_lock); | |
+ ext->fofs = le32_to_cpu(i_ext.fofs); | |
+ ext->blk_addr = le32_to_cpu(i_ext.blk_addr); | |
+ ext->len = le32_to_cpu(i_ext.len); | |
+ write_unlock(&ext->ext_lock); | |
+} | |
+ | |
+static inline void set_raw_extent(struct extent_info *ext, | |
+ struct f2fs_extent *i_ext) | |
+{ | |
+ read_lock(&ext->ext_lock); | |
+ i_ext->fofs = cpu_to_le32(ext->fofs); | |
+ i_ext->blk_addr = cpu_to_le32(ext->blk_addr); | |
+ i_ext->len = cpu_to_le32(ext->len); | |
+ read_unlock(&ext->ext_lock); | |
+} | |
+ | |
+struct f2fs_nm_info { | |
+ block_t nat_blkaddr; /* base disk address of NAT */ | |
+ nid_t max_nid; /* maximum possible node ids */ | |
+ nid_t next_scan_nid; /* the next nid to be scanned */ | |
+ | |
+ /* NAT cache management */ | |
+ struct radix_tree_root nat_root;/* root of the nat entry cache */ | |
+ rwlock_t nat_tree_lock; /* protect nat_tree_lock */ | |
+ unsigned int nat_cnt; /* the # of cached nat entries */ | |
+ struct list_head nat_entries; /* cached nat entry list (clean) */ | |
+ struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ | |
+ | |
+ /* free node ids management */ | |
+ struct list_head free_nid_list; /* a list for free nids */ | |
+ spinlock_t free_nid_list_lock; /* protect free nid list */ | |
+ unsigned int fcnt; /* the number of free node id */ | |
+ struct mutex build_lock; /* lock for build free nids */ | |
+ | |
+ /* for checkpoint */ | |
+ char *nat_bitmap; /* NAT bitmap pointer */ | |
+ int bitmap_size; /* bitmap size */ | |
+}; | |
+ | |
+/* | |
+ * this structure is used as one of function parameters. | |
+ * all the information are dedicated to a given direct node block determined | |
+ * by the data offset in a file. | |
+ */ | |
+struct dnode_of_data { | |
+ struct inode *inode; /* vfs inode pointer */ | |
+ struct page *inode_page; /* its inode page, NULL is possible */ | |
+ struct page *node_page; /* cached direct node page */ | |
+ nid_t nid; /* node id of the direct node block */ | |
+ unsigned int ofs_in_node; /* data offset in the node page */ | |
+ bool inode_page_locked; /* inode page is locked or not */ | |
+ block_t data_blkaddr; /* block address of the node block */ | |
+}; | |
+ | |
+static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, | |
+ struct page *ipage, struct page *npage, nid_t nid) | |
+{ | |
+ memset(dn, 0, sizeof(*dn)); | |
+ dn->inode = inode; | |
+ dn->inode_page = ipage; | |
+ dn->node_page = npage; | |
+ dn->nid = nid; | |
+} | |
+ | |
+/* | |
+ * For SIT manager | |
+ * | |
+ * By default, there are 6 active log areas across the whole main area. | |
+ * When considering hot and cold data separation to reduce cleaning overhead, | |
+ * we split 3 for data logs and 3 for node logs as hot, warm, and cold types, | |
+ * respectively. | |
+ * In the current design, you should not change the numbers intentionally. | |
+ * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6 | |
+ * logs individually according to the underlying devices. (default: 6) | |
+ * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for | |
+ * data and 8 for node logs. | |
+ */ | |
+#define NR_CURSEG_DATA_TYPE (3) | |
+#define NR_CURSEG_NODE_TYPE (3) | |
+#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) | |
+ | |
+enum { | |
+ CURSEG_HOT_DATA = 0, /* directory entry blocks */ | |
+ CURSEG_WARM_DATA, /* data blocks */ | |
+ CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ | |
+ CURSEG_HOT_NODE, /* direct node blocks of directory files */ | |
+ CURSEG_WARM_NODE, /* direct node blocks of normal files */ | |
+ CURSEG_COLD_NODE, /* indirect node blocks */ | |
+ NO_CHECK_TYPE | |
+}; | |
+ | |
+struct f2fs_sm_info { | |
+ struct sit_info *sit_info; /* whole segment information */ | |
+ struct free_segmap_info *free_info; /* free segment information */ | |
+ struct dirty_seglist_info *dirty_info; /* dirty segment information */ | |
+ struct curseg_info *curseg_array; /* active segment information */ | |
+ | |
+ struct list_head wblist_head; /* list of under-writeback pages */ | |
+ spinlock_t wblist_lock; /* lock for checkpoint */ | |
+ | |
+ block_t seg0_blkaddr; /* block address of 0'th segment */ | |
+ block_t main_blkaddr; /* start block address of main area */ | |
+ block_t ssa_blkaddr; /* start block address of SSA area */ | |
+ | |
+ unsigned int segment_count; /* total # of segments */ | |
+ unsigned int main_segments; /* # of segments in main area */ | |
+ unsigned int reserved_segments; /* # of reserved segments */ | |
+ unsigned int ovp_segments; /* # of overprovision segments */ | |
+}; | |
+ | |
+/* | |
+ * For superblock | |
+ */ | |
+/* | |
+ * COUNT_TYPE for monitoring | |
+ * | |
+ * f2fs monitors the number of several block types such as on-writeback, | |
+ * dirty dentry blocks, dirty node blocks, and dirty meta blocks. | |
+ */ | |
+enum count_type { | |
+ F2FS_WRITEBACK, | |
+ F2FS_DIRTY_DENTS, | |
+ F2FS_DIRTY_NODES, | |
+ F2FS_DIRTY_META, | |
+ NR_COUNT_TYPE, | |
+}; | |
+ | |
+/* | |
+ * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS]. | |
+ * The checkpoint procedure blocks all the locks in this fs_lock array. | |
+ * Some FS operations grab free locks, and if there is no free lock, | |
+ * then wait to grab a lock in a round-robin manner. | |
+ */ | |
+#define NR_GLOBAL_LOCKS 8 | |
+ | |
+/* | |
+ * The below are the page types of bios used in submti_bio(). | |
+ * The available types are: | |
+ * DATA User data pages. It operates as async mode. | |
+ * NODE Node pages. It operates as async mode. | |
+ * META FS metadata pages such as SIT, NAT, CP. | |
+ * NR_PAGE_TYPE The number of page types. | |
+ * META_FLUSH Make sure the previous pages are written | |
+ * with waiting the bio's completion | |
+ * ... Only can be used with META. | |
+ */ | |
+enum page_type { | |
+ DATA, | |
+ NODE, | |
+ META, | |
+ NR_PAGE_TYPE, | |
+ META_FLUSH, | |
+}; | |
+ | |
+/* | |
+ * Android sdcard emulation flags | |
+ */ | |
+#define F2FS_ANDROID_EMU_NOCASE 0x00000001 | |
+ | |
+struct f2fs_sb_info { | |
+ struct super_block *sb; /* pointer to VFS super block */ | |
+ struct proc_dir_entry *s_proc; /* proc entry */ | |
+ struct buffer_head *raw_super_buf; /* buffer head of raw sb */ | |
+ struct f2fs_super_block *raw_super; /* raw super block pointer */ | |
+ int s_dirty; /* dirty flag for checkpoint */ | |
+ | |
+ /* for node-related operations */ | |
+ struct f2fs_nm_info *nm_info; /* node manager */ | |
+ struct inode *node_inode; /* cache node blocks */ | |
+ | |
+ /* for segment-related operations */ | |
+ struct f2fs_sm_info *sm_info; /* segment manager */ | |
+ struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ | |
+ sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ | |
+ struct rw_semaphore bio_sem; /* IO semaphore */ | |
+ | |
+ /* for checkpoint */ | |
+ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ | |
+ struct inode *meta_inode; /* cache meta blocks */ | |
+ struct mutex cp_mutex; /* checkpoint procedure lock */ | |
+ struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */ | |
+ struct mutex node_write; /* locking node writes */ | |
+ struct mutex writepages; /* mutex for writepages() */ | |
+ unsigned char next_lock_num; /* round-robin global locks */ | |
+ int por_doing; /* recovery is doing or not */ | |
+ int on_build_free_nids; /* build_free_nids is doing */ | |
+ | |
+ /* for orphan inode management */ | |
+ struct list_head orphan_inode_list; /* orphan inode list */ | |
+ struct mutex orphan_inode_mutex; /* for orphan inode list */ | |
+ unsigned int n_orphans; /* # of orphan inodes */ | |
+ | |
+ /* for directory inode management */ | |
+ struct list_head dir_inode_list; /* dir inode list */ | |
+ spinlock_t dir_inode_lock; /* for dir inode list lock */ | |
+ | |
+ /* basic file system units */ | |
+ unsigned int log_sectors_per_block; /* log2 sectors per block */ | |
+ unsigned int log_blocksize; /* log2 block size */ | |
+ unsigned int blocksize; /* block size */ | |
+ unsigned int root_ino_num; /* root inode number*/ | |
+ unsigned int node_ino_num; /* node inode number*/ | |
+ unsigned int meta_ino_num; /* meta inode number*/ | |
+ unsigned int log_blocks_per_seg; /* log2 blocks per segment */ | |
+ unsigned int blocks_per_seg; /* blocks per segment */ | |
+ unsigned int segs_per_sec; /* segments per section */ | |
+ unsigned int secs_per_zone; /* sections per zone */ | |
+ unsigned int total_sections; /* total section count */ | |
+ unsigned int total_node_count; /* total node block count */ | |
+ unsigned int total_valid_node_count; /* valid node block count */ | |
+ unsigned int total_valid_inode_count; /* valid inode count */ | |
+ int active_logs; /* # of active logs */ | |
+ | |
+ block_t user_block_count; /* # of user blocks */ | |
+ block_t total_valid_block_count; /* # of valid blocks */ | |
+ block_t alloc_valid_block_count; /* # of allocated blocks */ | |
+ block_t last_valid_block_count; /* for recovery */ | |
+ u32 s_next_generation; /* for NFS support */ | |
+ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ | |
+ | |
+ struct f2fs_mount_info mount_opt; /* mount options */ | |
+ | |
+ /* for cleaning operations */ | |
+ struct mutex gc_mutex; /* mutex for GC */ | |
+ struct f2fs_gc_kthread *gc_thread; /* GC thread */ | |
+ unsigned int cur_victim_sec; /* current victim section num */ | |
+ | |
+ /* | |
+ * for stat information. | |
+ * one is for the LFS mode, and the other is for the SSR mode. | |
+ */ | |
+#ifdef CONFIG_F2FS_STAT_FS | |
+ struct f2fs_stat_info *stat_info; /* FS status information */ | |
+ unsigned int segment_count[2]; /* # of allocated segments */ | |
+ unsigned int block_count[2]; /* # of allocated blocks */ | |
+ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ | |
+ int bg_gc; /* background gc calls */ | |
+ unsigned int n_dirty_dirs; /* # of dir inodes */ | |
+#endif | |
+ unsigned int last_victim[2]; /* last victim segment # */ | |
+ spinlock_t stat_lock; /* lock for stat operations */ | |
+ | |
+ /* For sysfs suppport */ | |
+ struct kobject s_kobj; | |
+ struct completion s_kobj_unregister; | |
+ | |
+ /* For Android sdcard emulation */ | |
+ u32 android_emu_uid; | |
+ u32 android_emu_gid; | |
+ umode_t android_emu_mode; | |
+ int android_emu_flags; | |
+}; | |
+ | |
+/* | |
+ * Inline functions | |
+ */ | |
+static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) | |
+{ | |
+ return container_of(inode, struct f2fs_inode_info, vfs_inode); | |
+} | |
+ | |
+static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) | |
+{ | |
+ return sb->s_fs_info; | |
+} | |
+ | |
+static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) | |
+{ | |
+ return (struct f2fs_super_block *)(sbi->raw_super); | |
+} | |
+ | |
+static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) | |
+{ | |
+ return (struct f2fs_checkpoint *)(sbi->ckpt); | |
+} | |
+ | |
+static inline struct f2fs_node *F2FS_NODE(struct page *page) | |
+{ | |
+ return (struct f2fs_node *)page_address(page); | |
+} | |
+ | |
+static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) | |
+{ | |
+ return (struct f2fs_nm_info *)(sbi->nm_info); | |
+} | |
+ | |
+static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi) | |
+{ | |
+ return (struct f2fs_sm_info *)(sbi->sm_info); | |
+} | |
+ | |
+static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi) | |
+{ | |
+ return (struct sit_info *)(SM_I(sbi)->sit_info); | |
+} | |
+ | |
+static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi) | |
+{ | |
+ return (struct free_segmap_info *)(SM_I(sbi)->free_info); | |
+} | |
+ | |
+static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) | |
+{ | |
+ return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); | |
+} | |
+ | |
+static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) | |
+{ | |
+ sbi->s_dirty = 1; | |
+} | |
+ | |
+static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) | |
+{ | |
+ sbi->s_dirty = 0; | |
+} | |
+ | |
+static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) | |
+{ | |
+ return le64_to_cpu(cp->checkpoint_ver); | |
+} | |
+ | |
+static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) | |
+{ | |
+ unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); | |
+ return ckpt_flags & f; | |
+} | |
+ | |
+static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) | |
+{ | |
+ unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); | |
+ ckpt_flags |= f; | |
+ cp->ckpt_flags = cpu_to_le32(ckpt_flags); | |
+} | |
+ | |
+static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) | |
+{ | |
+ unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); | |
+ ckpt_flags &= (~f); | |
+ cp->ckpt_flags = cpu_to_le32(ckpt_flags); | |
+} | |
+ | |
+static inline void mutex_lock_all(struct f2fs_sb_info *sbi) | |
+{ | |
+ int i; | |
+ | |
+ for (i = 0; i < NR_GLOBAL_LOCKS; i++) { | |
+ /* | |
+ * This is the only time we take multiple fs_lock[] | |
+ * instances; the order is immaterial since we | |
+ * always hold cp_mutex, which serializes multiple | |
+ * such operations. | |
+ */ | |
+ mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); | |
+ } | |
+} | |
+ | |
+static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) | |
+{ | |
+ int i = 0; | |
+ for (; i < NR_GLOBAL_LOCKS; i++) | |
+ mutex_unlock(&sbi->fs_lock[i]); | |
+} | |
+ | |
+static inline int mutex_lock_op(struct f2fs_sb_info *sbi) | |
+{ | |
+ unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS; | |
+ int i = 0; | |
+ | |
+ for (; i < NR_GLOBAL_LOCKS; i++) | |
+ if (mutex_trylock(&sbi->fs_lock[i])) | |
+ return i; | |
+ | |
+ mutex_lock(&sbi->fs_lock[next_lock]); | |
+ sbi->next_lock_num++; | |
+ return next_lock; | |
+} | |
+ | |
+static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock) | |
+{ | |
+ if (ilock < 0) | |
+ return; | |
+ BUG_ON(ilock >= NR_GLOBAL_LOCKS); | |
+ mutex_unlock(&sbi->fs_lock[ilock]); | |
+} | |
+ | |
+/* | |
+ * Check whether the given nid is within node id range. | |
+ */ | |
+static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) | |
+{ | |
+ WARN_ON((nid >= NM_I(sbi)->max_nid)); | |
+ if (nid >= NM_I(sbi)->max_nid) | |
+ return -EINVAL; | |
+ return 0; | |
+} | |
+ | |
+#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 | |
+ | |
+/* | |
+ * Check whether the inode has blocks or not | |
+ */ | |
+static inline int F2FS_HAS_BLOCKS(struct inode *inode) | |
+{ | |
+ if (F2FS_I(inode)->i_xattr_nid) | |
+ return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); | |
+ else | |
+ return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); | |
+} | |
+ | |
+static inline int f2fs_handle_error(struct f2fs_sb_info *sbi) | |
+{ | |
+ if (test_opt(sbi, ERRORS_PANIC)) | |
+ BUG(); | |
+ if (test_opt(sbi, ERRORS_RECOVER)) | |
+ return 1; | |
+ return 0; | |
+} | |
+ | |
+static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, | |
+ struct inode *inode, blkcnt_t count) | |
+{ | |
+ block_t valid_block_count; | |
+ | |
+ spin_lock(&sbi->stat_lock); | |
+ valid_block_count = | |
+ sbi->total_valid_block_count + (block_t)count; | |
+ if (valid_block_count > sbi->user_block_count) { | |
+ spin_unlock(&sbi->stat_lock); | |
+ return false; | |
+ } | |
+ inode->i_blocks += count; | |
+ sbi->total_valid_block_count = valid_block_count; | |
+ sbi->alloc_valid_block_count += (block_t)count; | |
+ spin_unlock(&sbi->stat_lock); | |
+ return true; | |
+} | |
+ | |
+static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, | |
+ struct inode *inode, | |
+ blkcnt_t count) | |
+{ | |
+ spin_lock(&sbi->stat_lock); | |
+ | |
+ if (sbi->total_valid_block_count < (block_t)count) { | |
+ pr_crit("F2FS-fs (%s): block accounting error: %u < %llu\n", | |
+ sbi->sb->s_id, sbi->total_valid_block_count, count); | |
+ f2fs_handle_error(sbi); | |
+ sbi->total_valid_block_count = count; | |
+ } | |
+ if (inode->i_blocks < count) { | |
+ pr_crit("F2FS-fs (%s): inode accounting error: %llu < %llu\n", | |
+ sbi->sb->s_id, inode->i_blocks, count); | |
+ f2fs_handle_error(sbi); | |
+ inode->i_blocks = count; | |
+ } | |
+ | |
+ inode->i_blocks -= count; | |
+ sbi->total_valid_block_count -= (block_t)count; | |
+ spin_unlock(&sbi->stat_lock); | |
+ return 0; | |
+} | |
+ | |
+static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) | |
+{ | |
+ atomic_inc(&sbi->nr_pages[count_type]); | |
+ F2FS_SET_SB_DIRT(sbi); | |
+} | |
+ | |
+static inline void inode_inc_dirty_dents(struct inode *inode) | |
+{ | |
+ atomic_inc(&F2FS_I(inode)->dirty_dents); | |
+} | |
+ | |
+static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) | |
+{ | |
+ atomic_dec(&sbi->nr_pages[count_type]); | |
+} | |
+ | |
+static inline void inode_dec_dirty_dents(struct inode *inode) | |
+{ | |
+ atomic_dec(&F2FS_I(inode)->dirty_dents); | |
+} | |
+ | |
+static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) | |
+{ | |
+ return atomic_read(&sbi->nr_pages[count_type]); | |
+} | |
+ | |
+static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) | |
+{ | |
+ unsigned int pages_per_sec = sbi->segs_per_sec * | |
+ (1 << sbi->log_blocks_per_seg); | |
+ return ((get_pages(sbi, block_type) + pages_per_sec - 1) | |
+ >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; | |
+} | |
+ | |
+static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) | |
+{ | |
+ block_t ret; | |
+ spin_lock(&sbi->stat_lock); | |
+ ret = sbi->total_valid_block_count; | |
+ spin_unlock(&sbi->stat_lock); | |
+ return ret; | |
+} | |
+ | |
+static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) | |
+{ | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ | |
+ /* return NAT or SIT bitmap */ | |
+ if (flag == NAT_BITMAP) | |
+ return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); | |
+ else if (flag == SIT_BITMAP) | |
+ return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) | |
+{ | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ int offset = (flag == NAT_BITMAP) ? | |
+ le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; | |
+ return &ckpt->sit_nat_version_bitmap + offset; | |
+} | |
+ | |
+static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) | |
+{ | |
+ block_t start_addr; | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ unsigned long long ckpt_version = cur_cp_version(ckpt); | |
+ | |
+ start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); | |
+ | |
+ /* | |
+ * odd numbered checkpoint should at cp segment 0 | |
+ * and even segent must be at cp segment 1 | |
+ */ | |
+ if (!(ckpt_version & 1)) | |
+ start_addr += sbi->blocks_per_seg; | |
+ | |
+ return start_addr; | |
+} | |
+ | |
+static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) | |
+{ | |
+ return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); | |
+} | |
+ | |
+static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, | |
+ struct inode *inode, | |
+ unsigned int count) | |
+{ | |
+ block_t valid_block_count; | |
+ unsigned int valid_node_count; | |
+ | |
+ spin_lock(&sbi->stat_lock); | |
+ | |
+ valid_block_count = sbi->total_valid_block_count + (block_t)count; | |
+ valid_node_count = sbi->total_valid_node_count + count; | |
+ | |
+ if (valid_block_count > sbi->user_block_count) { | |
+ spin_unlock(&sbi->stat_lock); | |
+ return false; | |
+ } | |
+ | |
+ if (valid_node_count > sbi->total_node_count) { | |
+ spin_unlock(&sbi->stat_lock); | |
+ return false; | |
+ } | |
+ | |
+ if (inode) | |
+ inode->i_blocks += count; | |
+ sbi->alloc_valid_block_count += (block_t)count; | |
+ sbi->total_valid_node_count = valid_node_count; | |
+ sbi->total_valid_block_count = valid_block_count; | |
+ spin_unlock(&sbi->stat_lock); | |
+ | |
+ return true; | |
+} | |
+ | |
+static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, | |
+ struct inode *inode, | |
+ unsigned int count) | |
+{ | |
+ spin_lock(&sbi->stat_lock); | |
+ | |
+ if (sbi->total_valid_block_count < count) { | |
+ pr_crit("F2FS-fs (%s): block accounting error: %u < %u\n", | |
+ sbi->sb->s_id, sbi->total_valid_block_count, count); | |
+ f2fs_handle_error(sbi); | |
+ sbi->total_valid_block_count = count; | |
+ } | |
+ if (sbi->total_valid_node_count < count) { | |
+ pr_crit("F2FS-fs (%s): node accounting error: %u < %u\n", | |
+ sbi->sb->s_id, sbi->total_valid_node_count, count); | |
+ f2fs_handle_error(sbi); | |
+ sbi->total_valid_node_count = count; | |
+ } | |
+ if (inode->i_blocks < count) { | |
+ pr_crit("F2FS-fs (%s): inode accounting error: %llu < %u\n", | |
+ sbi->sb->s_id, inode->i_blocks, count); | |
+ f2fs_handle_error(sbi); | |
+ inode->i_blocks = count; | |
+ } | |
+ | |
+ inode->i_blocks -= count; | |
+ sbi->total_valid_node_count -= count; | |
+ sbi->total_valid_block_count -= (block_t)count; | |
+ | |
+ spin_unlock(&sbi->stat_lock); | |
+} | |
+ | |
+static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) | |
+{ | |
+ unsigned int ret; | |
+ spin_lock(&sbi->stat_lock); | |
+ ret = sbi->total_valid_node_count; | |
+ spin_unlock(&sbi->stat_lock); | |
+ return ret; | |
+} | |
+ | |
+static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) | |
+{ | |
+ spin_lock(&sbi->stat_lock); | |
+ BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); | |
+ sbi->total_valid_inode_count++; | |
+ spin_unlock(&sbi->stat_lock); | |
+} | |
+ | |
+static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) | |
+{ | |
+ spin_lock(&sbi->stat_lock); | |
+ BUG_ON(!sbi->total_valid_inode_count); | |
+ sbi->total_valid_inode_count--; | |
+ spin_unlock(&sbi->stat_lock); | |
+ return 0; | |
+} | |
+ | |
+static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) | |
+{ | |
+ unsigned int ret; | |
+ spin_lock(&sbi->stat_lock); | |
+ ret = sbi->total_valid_inode_count; | |
+ spin_unlock(&sbi->stat_lock); | |
+ return ret; | |
+} | |
+ | |
+static inline void f2fs_put_page(struct page *page, int unlock) | |
+{ | |
+ if (!page || IS_ERR(page)) | |
+ return; | |
+ | |
+ if (unlock) { | |
+ BUG_ON(!PageLocked(page)); | |
+ unlock_page(page); | |
+ } | |
+ page_cache_release(page); | |
+} | |
+ | |
+static inline void f2fs_put_dnode(struct dnode_of_data *dn) | |
+{ | |
+ if (dn->node_page) | |
+ f2fs_put_page(dn->node_page, 1); | |
+ if (dn->inode_page && dn->node_page != dn->inode_page) | |
+ f2fs_put_page(dn->inode_page, 0); | |
+ dn->node_page = NULL; | |
+ dn->inode_page = NULL; | |
+} | |
+ | |
+static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, | |
+ size_t size, void (*ctor)(void *)) | |
+{ | |
+ return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); | |
+} | |
+ | |
+#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) | |
+ | |
+static inline bool IS_INODE(struct page *page) | |
+{ | |
+ struct f2fs_node *p = F2FS_NODE(page); | |
+ return RAW_IS_INODE(p); | |
+} | |
+ | |
+static inline __le32 *blkaddr_in_node(struct f2fs_node *node) | |
+{ | |
+ return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; | |
+} | |
+ | |
+static inline block_t datablock_addr(struct page *node_page, | |
+ unsigned int offset) | |
+{ | |
+ struct f2fs_node *raw_node; | |
+ __le32 *addr_array; | |
+ raw_node = F2FS_NODE(node_page); | |
+ addr_array = blkaddr_in_node(raw_node); | |
+ return le32_to_cpu(addr_array[offset]); | |
+} | |
+ | |
+static inline int f2fs_test_bit(unsigned int nr, char *addr) | |
+{ | |
+ int mask; | |
+ | |
+ addr += (nr >> 3); | |
+ mask = 1 << (7 - (nr & 0x07)); | |
+ return mask & *addr; | |
+} | |
+ | |
+static inline int f2fs_set_bit(unsigned int nr, char *addr) | |
+{ | |
+ int mask; | |
+ int ret; | |
+ | |
+ addr += (nr >> 3); | |
+ mask = 1 << (7 - (nr & 0x07)); | |
+ ret = mask & *addr; | |
+ *addr |= mask; | |
+ return ret; | |
+} | |
+ | |
+static inline int f2fs_clear_bit(unsigned int nr, char *addr) | |
+{ | |
+ int mask; | |
+ int ret; | |
+ | |
+ addr += (nr >> 3); | |
+ mask = 1 << (7 - (nr & 0x07)); | |
+ ret = mask & *addr; | |
+ *addr &= ~mask; | |
+ return ret; | |
+} | |
+ | |
+/* used for f2fs_inode_info->flags */ | |
+enum { | |
+ FI_NEW_INODE, /* indicate newly allocated inode */ | |
+ FI_DIRTY_INODE, /* indicate inode is dirty or not */ | |
+ FI_INC_LINK, /* need to increment i_nlink */ | |
+ FI_ACL_MODE, /* indicate acl mode */ | |
+ FI_NO_ALLOC, /* should not allocate any blocks */ | |
+ FI_UPDATE_DIR, /* should update inode block for consistency */ | |
+ FI_DELAY_IPUT, /* used for the recovery */ | |
+ FI_INLINE_XATTR, /* used for inline xattr */ | |
+}; | |
+ | |
+static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) | |
+{ | |
+ set_bit(flag, &fi->flags); | |
+} | |
+ | |
+static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) | |
+{ | |
+ return test_bit(flag, &fi->flags); | |
+} | |
+ | |
+static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) | |
+{ | |
+ clear_bit(flag, &fi->flags); | |
+} | |
+ | |
+static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) | |
+{ | |
+ fi->i_acl_mode = mode; | |
+ set_inode_flag(fi, FI_ACL_MODE); | |
+} | |
+ | |
+static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) | |
+{ | |
+ if (is_inode_flag_set(fi, FI_ACL_MODE)) { | |
+ clear_inode_flag(fi, FI_ACL_MODE); | |
+ return 1; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+int f2fs_android_emu(struct f2fs_sb_info *, struct inode *, u32 *, u32 *, | |
+ umode_t *); | |
+ | |
+#define IS_ANDROID_EMU(sbi, fi, pfi) \ | |
+ (test_opt((sbi), ANDROID_EMU) && \ | |
+ (((fi)->i_advise & FADVISE_ANDROID_EMU) || \ | |
+ ((pfi)->i_advise & FADVISE_ANDROID_EMU))) | |
+ | |
+static inline void get_inline_info(struct f2fs_inode_info *fi, | |
+ struct f2fs_inode *ri) | |
+{ | |
+ if (ri->i_inline & F2FS_INLINE_XATTR) | |
+ set_inode_flag(fi, FI_INLINE_XATTR); | |
+} | |
+ | |
+static inline void set_raw_inline(struct f2fs_inode_info *fi, | |
+ struct f2fs_inode *ri) | |
+{ | |
+ ri->i_inline = 0; | |
+ | |
+ if (is_inode_flag_set(fi, FI_INLINE_XATTR)) | |
+ ri->i_inline |= F2FS_INLINE_XATTR; | |
+} | |
+ | |
+static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) | |
+{ | |
+ if (is_inode_flag_set(fi, FI_INLINE_XATTR)) | |
+ return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; | |
+ return DEF_ADDRS_PER_INODE; | |
+} | |
+ | |
+static inline void *inline_xattr_addr(struct page *page) | |
+{ | |
+ struct f2fs_inode *ri; | |
+ ri = (struct f2fs_inode *)page_address(page); | |
+ return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - | |
+ F2FS_INLINE_XATTR_ADDRS]); | |
+} | |
+ | |
+static inline int inline_xattr_size(struct inode *inode) | |
+{ | |
+ if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) | |
+ return F2FS_INLINE_XATTR_ADDRS << 2; | |
+ else | |
+ return 0; | |
+} | |
+ | |
+static inline int f2fs_readonly(struct super_block *sb) | |
+{ | |
+ return sb->s_flags & MS_RDONLY; | |
+} | |
+ | |
+/* | |
+ * file.c | |
+ */ | |
+int f2fs_sync_file(struct file *, loff_t, loff_t, int); | |
+void truncate_data_blocks(struct dnode_of_data *); | |
+void f2fs_truncate(struct inode *); | |
+int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | |
+int f2fs_setattr(struct dentry *, struct iattr *); | |
+int truncate_hole(struct inode *, pgoff_t, pgoff_t); | |
+int truncate_data_blocks_range(struct dnode_of_data *, int); | |
+long f2fs_ioctl(struct file *, unsigned int, unsigned long); | |
+long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); | |
+ | |
+/* | |
+ * inode.c | |
+ */ | |
+void f2fs_set_inode_flags(struct inode *); | |
+struct inode *f2fs_iget(struct super_block *, unsigned long); | |
+void update_inode(struct inode *, struct page *); | |
+int update_inode_page(struct inode *); | |
+int f2fs_write_inode(struct inode *, struct writeback_control *); | |
+void f2fs_evict_inode(struct inode *); | |
+ | |
+/* | |
+ * namei.c | |
+ */ | |
+struct dentry *f2fs_get_parent(struct dentry *child); | |
+ | |
+/* | |
+ * dir.c | |
+ */ | |
+struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, | |
+ struct page **); | |
+struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); | |
+ino_t f2fs_inode_by_name(struct inode *, struct qstr *); | |
+void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, | |
+ struct page *, struct inode *); | |
+int update_dent_inode(struct inode *, const struct qstr *); | |
+int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); | |
+void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); | |
+int f2fs_make_empty(struct inode *, struct inode *); | |
+bool f2fs_empty_dir(struct inode *); | |
+ | |
+static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) | |
+{ | |
+ return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, | |
+ inode); | |
+} | |
+ | |
+/* | |
+ * super.c | |
+ */ | |
+int f2fs_sync_fs(struct super_block *, int); | |
+extern __printf(3, 4) | |
+void f2fs_msg(struct super_block *, const char *, const char *, ...); | |
+ | |
+/* | |
+ * hash.c | |
+ */ | |
+f2fs_hash_t f2fs_dentry_hash(const char *, size_t); | |
+ | |
+/* | |
+ * node.c | |
+ */ | |
+struct dnode_of_data; | |
+struct node_info; | |
+ | |
+int is_checkpointed_node(struct f2fs_sb_info *, nid_t); | |
+void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); | |
+int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); | |
+int truncate_inode_blocks(struct inode *, pgoff_t); | |
+int truncate_xattr_node(struct inode *, struct page *); | |
+int remove_inode_page(struct inode *); | |
+struct page *new_inode_page(struct inode *, const struct qstr *); | |
+struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); | |
+void ra_node_page(struct f2fs_sb_info *, nid_t); | |
+struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); | |
+struct page *get_node_page_ra(struct page *, int); | |
+void sync_inode_page(struct dnode_of_data *); | |
+int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); | |
+bool alloc_nid(struct f2fs_sb_info *, nid_t *); | |
+void alloc_nid_done(struct f2fs_sb_info *, nid_t); | |
+void alloc_nid_failed(struct f2fs_sb_info *, nid_t); | |
+void recover_node_page(struct f2fs_sb_info *, struct page *, | |
+ struct f2fs_summary *, struct node_info *, block_t); | |
+int recover_inode_page(struct f2fs_sb_info *, struct page *); | |
+int restore_node_summary(struct f2fs_sb_info *, unsigned int, | |
+ struct f2fs_summary_block *); | |
+void flush_nat_entries(struct f2fs_sb_info *); | |
+int build_node_manager(struct f2fs_sb_info *); | |
+void destroy_node_manager(struct f2fs_sb_info *); | |
+int __init create_node_manager_caches(void); | |
+void destroy_node_manager_caches(void); | |
+ | |
+/* | |
+ * segment.c | |
+ */ | |
+void f2fs_balance_fs(struct f2fs_sb_info *); | |
+void invalidate_blocks(struct f2fs_sb_info *, block_t); | |
+void clear_prefree_segments(struct f2fs_sb_info *); | |
+int npages_for_summary_flush(struct f2fs_sb_info *); | |
+void allocate_new_segments(struct f2fs_sb_info *); | |
+struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); | |
+struct bio *f2fs_bio_alloc(struct block_device *, int); | |
+void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); | |
+void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); | |
+void write_meta_page(struct f2fs_sb_info *, struct page *); | |
+void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, | |
+ block_t, block_t *); | |
+void write_data_page(struct inode *, struct page *, struct dnode_of_data*, | |
+ block_t, block_t *); | |
+void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); | |
+void recover_data_page(struct f2fs_sb_info *, struct page *, | |
+ struct f2fs_summary *, block_t, block_t); | |
+void rewrite_node_page(struct f2fs_sb_info *, struct page *, | |
+ struct f2fs_summary *, block_t, block_t); | |
+void write_data_summaries(struct f2fs_sb_info *, block_t); | |
+void write_node_summaries(struct f2fs_sb_info *, block_t); | |
+int lookup_journal_in_cursum(struct f2fs_summary_block *, | |
+ int, unsigned int, int); | |
+void flush_sit_entries(struct f2fs_sb_info *); | |
+int build_segment_manager(struct f2fs_sb_info *); | |
+void destroy_segment_manager(struct f2fs_sb_info *); | |
+ | |
+/* | |
+ * checkpoint.c | |
+ */ | |
+struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); | |
+struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); | |
+long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); | |
+int acquire_orphan_inode(struct f2fs_sb_info *); | |
+void release_orphan_inode(struct f2fs_sb_info *); | |
+void add_orphan_inode(struct f2fs_sb_info *, nid_t); | |
+void remove_orphan_inode(struct f2fs_sb_info *, nid_t); | |
+int recover_orphan_inodes(struct f2fs_sb_info *); | |
+int get_valid_checkpoint(struct f2fs_sb_info *); | |
+void set_dirty_dir_page(struct inode *, struct page *); | |
+void add_dirty_dir_inode(struct inode *); | |
+void remove_dirty_dir_inode(struct inode *); | |
+struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); | |
+void sync_dirty_dir_inodes(struct f2fs_sb_info *); | |
+void write_checkpoint(struct f2fs_sb_info *, bool); | |
+void init_orphan_info(struct f2fs_sb_info *); | |
+int __init create_checkpoint_caches(void); | |
+void destroy_checkpoint_caches(void); | |
+ | |
+/* | |
+ * data.c | |
+ */ | |
+int reserve_new_block(struct dnode_of_data *); | |
+void update_extent_cache(block_t, struct dnode_of_data *); | |
+struct page *find_data_page(struct inode *, pgoff_t, bool); | |
+struct page *get_lock_data_page(struct inode *, pgoff_t); | |
+struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); | |
+int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); | |
+int do_write_data_page(struct page *); | |
+ | |
+/* | |
+ * gc.c | |
+ */ | |
+int start_gc_thread(struct f2fs_sb_info *); | |
+void stop_gc_thread(struct f2fs_sb_info *); | |
+block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); | |
+int f2fs_gc(struct f2fs_sb_info *); | |
+void build_gc_manager(struct f2fs_sb_info *); | |
+int __init create_gc_caches(void); | |
+void destroy_gc_caches(void); | |
+ | |
+/* | |
+ * recovery.c | |
+ */ | |
+int recover_fsync_data(struct f2fs_sb_info *); | |
+bool space_for_roll_forward(struct f2fs_sb_info *); | |
+ | |
+/* | |
+ * debug.c | |
+ */ | |
+#ifdef CONFIG_F2FS_STAT_FS | |
+struct f2fs_stat_info { | |
+ struct list_head stat_list; | |
+ struct f2fs_sb_info *sbi; | |
+ struct mutex stat_lock; | |
+ int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; | |
+ int main_area_segs, main_area_sections, main_area_zones; | |
+ int hit_ext, total_ext; | |
+ int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; | |
+ int nats, sits, fnids; | |
+ int total_count, utilization; | |
+ int bg_gc; | |
+ unsigned int valid_count, valid_node_count, valid_inode_count; | |
+ unsigned int bimodal, avg_vblocks; | |
+ int util_free, util_valid, util_invalid; | |
+ int rsvd_segs, overp_segs; | |
+ int dirty_count, node_pages, meta_pages; | |
+ int prefree_count, call_count; | |
+ int tot_segs, node_segs, data_segs, free_segs, free_secs; | |
+ int tot_blks, data_blks, node_blks; | |
+ int curseg[NR_CURSEG_TYPE]; | |
+ int cursec[NR_CURSEG_TYPE]; | |
+ int curzone[NR_CURSEG_TYPE]; | |
+ | |
+ unsigned int segment_count[2]; | |
+ unsigned int block_count[2]; | |
+ unsigned base_mem, cache_mem; | |
+}; | |
+ | |
+static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) | |
+{ | |
+ return (struct f2fs_stat_info*)sbi->stat_info; | |
+} | |
+ | |
+#define stat_inc_call_count(si) ((si)->call_count++) | |
+ | |
+#define stat_inc_seg_count(sbi, type) \ | |
+ do { \ | |
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ | |
+ (si)->tot_segs++; \ | |
+ if (type == SUM_TYPE_DATA) \ | |
+ si->data_segs++; \ | |
+ else \ | |
+ si->node_segs++; \ | |
+ } while (0) | |
+ | |
+#define stat_inc_tot_blk_count(si, blks) \ | |
+ (si->tot_blks += (blks)) | |
+ | |
+#define stat_inc_data_blk_count(sbi, blks) \ | |
+ do { \ | |
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ | |
+ stat_inc_tot_blk_count(si, blks); \ | |
+ si->data_blks += (blks); \ | |
+ } while (0) | |
+ | |
+#define stat_inc_node_blk_count(sbi, blks) \ | |
+ do { \ | |
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ | |
+ stat_inc_tot_blk_count(si, blks); \ | |
+ si->node_blks += (blks); \ | |
+ } while (0) | |
+ | |
+int f2fs_build_stats(struct f2fs_sb_info *); | |
+void f2fs_destroy_stats(struct f2fs_sb_info *); | |
+void __init f2fs_create_root_stats(void); | |
+void f2fs_destroy_root_stats(void); | |
+#else | |
+#define stat_inc_call_count(si) | |
+#define stat_inc_seg_count(si, type) | |
+#define stat_inc_tot_blk_count(si, blks) | |
+#define stat_inc_data_blk_count(si, blks) | |
+#define stat_inc_node_blk_count(sbi, blks) | |
+ | |
+static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } | |
+static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } | |
+static inline void __init f2fs_create_root_stats(void) { } | |
+static inline void f2fs_destroy_root_stats(void) { } | |
+#endif | |
+ | |
+extern const struct file_operations f2fs_dir_operations; | |
+extern const struct file_operations f2fs_file_operations; | |
+extern const struct inode_operations f2fs_file_inode_operations; | |
+extern const struct address_space_operations f2fs_dblock_aops; | |
+extern const struct address_space_operations f2fs_node_aops; | |
+extern const struct address_space_operations f2fs_meta_aops; | |
+extern const struct inode_operations f2fs_dir_inode_operations; | |
+extern const struct inode_operations f2fs_symlink_inode_operations; | |
+extern const struct inode_operations f2fs_special_inode_operations; | |
+#endif | |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c | |
new file mode 100644 | |
index 0000000..cf4b51c | |
--- /dev/null | |
+++ b/fs/f2fs/file.c | |
@@ -0,0 +1,725 @@ | |
+/* | |
+ * fs/f2fs/file.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/fs.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/stat.h> | |
+#include <linux/buffer_head.h> | |
+#include <linux/writeback.h> | |
+#include <linux/blkdev.h> | |
+#include <linux/falloc.h> | |
+#include <linux/types.h> | |
+#include <linux/compat.h> | |
+#include <linux/uaccess.h> | |
+#include <linux/mount.h> | |
+ | |
+#include "f2fs.h" | |
+#include "node.h" | |
+#include "segment.h" | |
+#include "xattr.h" | |
+#include "acl.h" | |
+#include <trace/events/f2fs.h> | |
+ | |
+static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |
+ struct vm_fault *vmf) | |
+{ | |
+ struct page *page = vmf->page; | |
+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ block_t old_blk_addr; | |
+ struct dnode_of_data dn; | |
+ int err, ilock; | |
+ | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ /* Wait if fs is frozen. This is racy so we check again later on | |
+ * and retry if the fs has been frozen after the page lock has | |
+ * been acquired | |
+ */ | |
+ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | |
+ | |
+ /* block allocation */ | |
+ ilock = mutex_lock_op(sbi); | |
+ set_new_dnode(&dn, inode, NULL, NULL, 0); | |
+ err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); | |
+ if (err) { | |
+ mutex_unlock_op(sbi, ilock); | |
+ goto out; | |
+ } | |
+ | |
+ old_blk_addr = dn.data_blkaddr; | |
+ | |
+ if (old_blk_addr == NULL_ADDR) { | |
+ err = reserve_new_block(&dn); | |
+ if (err) { | |
+ f2fs_put_dnode(&dn); | |
+ mutex_unlock_op(sbi, ilock); | |
+ goto out; | |
+ } | |
+ } | |
+ f2fs_put_dnode(&dn); | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+ file_update_time(vma->vm_file); | |
+ lock_page(page); | |
+ if (page->mapping != inode->i_mapping || | |
+ page_offset(page) > i_size_read(inode) || | |
+ !PageUptodate(page)) { | |
+ unlock_page(page); | |
+ err = -EFAULT; | |
+ goto out; | |
+ } | |
+ | |
+ /* | |
+ * check to see if the page is mapped already (no holes) | |
+ */ | |
+ if (PageMappedToDisk(page)) | |
+ goto mapped; | |
+ | |
+ /* page is wholly or partially inside EOF */ | |
+ if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { | |
+ unsigned offset; | |
+ offset = i_size_read(inode) & ~PAGE_CACHE_MASK; | |
+ zero_user_segment(page, offset, PAGE_CACHE_SIZE); | |
+ } | |
+ set_page_dirty(page); | |
+ SetPageUptodate(page); | |
+ | |
+mapped: | |
+ /* fill the page */ | |
+ wait_on_page_writeback(page); | |
+out: | |
+ return block_page_mkwrite_return(err); | |
+} | |
+ | |
+static const struct vm_operations_struct f2fs_file_vm_ops = { | |
+ .fault = filemap_fault, | |
+ .page_mkwrite = f2fs_vm_page_mkwrite, | |
+}; | |
+ | |
+static int get_parent_ino(struct inode *inode, nid_t *pino) | |
+{ | |
+ struct dentry *dentry; | |
+ | |
+ inode = igrab(inode); | |
+ | |
+ /* Alex - the following is equivalent to: dentry = d_find_any_alias(inode); */ | |
+ dentry = NULL; | |
+ spin_lock(&inode->i_lock); | |
+ if (!list_empty(&inode->i_dentry)) { | |
+ dentry = list_first_entry(&inode->i_dentry, | |
+ struct dentry, d_alias); | |
+ dget(dentry); | |
+ } | |
+ spin_unlock(&inode->i_lock); | |
+ | |
+ iput(inode); | |
+ if (!dentry) | |
+ return 0; | |
+ | |
+ if (update_dent_inode(inode, &dentry->d_name)) { | |
+ dput(dentry); | |
+ return 0; | |
+ } | |
+ | |
+ *pino = parent_ino(dentry); | |
+ dput(dentry); | |
+ return 1; | |
+} | |
+ | |
+int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |
+{ | |
+ struct inode *inode = file->f_mapping->host; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ int ret = 0; | |
+ bool need_cp = false; | |
+ struct writeback_control wbc = { | |
+ .sync_mode = WB_SYNC_ALL, | |
+ .nr_to_write = LONG_MAX, | |
+ .for_reclaim = 0, | |
+ }; | |
+ | |
+ if (f2fs_readonly(inode->i_sb)) | |
+ return 0; | |
+ | |
+ trace_f2fs_sync_file_enter(inode); | |
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | |
+ if (ret) { | |
+ trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); | |
+ return ret; | |
+ } | |
+ | |
+ /* guarantee free sections for fsync */ | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ mutex_lock(&inode->i_mutex); | |
+ | |
+ /* | |
+ * Both of fdatasync() and fsync() are able to be recovered from | |
+ * sudden-power-off. | |
+ */ | |
+ if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) | |
+ need_cp = true; | |
+ else if (file_wrong_pino(inode)) | |
+ need_cp = true; | |
+ else if (!space_for_roll_forward(sbi)) | |
+ need_cp = true; | |
+ else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) | |
+ need_cp = true; | |
+ else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) | |
+ need_cp = true; | |
+ | |
+ if (need_cp) { | |
+ nid_t pino; | |
+ | |
+ F2FS_I(inode)->xattr_ver = 0; | |
+ | |
+ /* all the dirty node pages should be flushed for POR */ | |
+ ret = f2fs_sync_fs(inode->i_sb, 1); | |
+ if (file_wrong_pino(inode) && inode->i_nlink == 1 && | |
+ get_parent_ino(inode, &pino)) { | |
+ F2FS_I(inode)->i_pino = pino; | |
+ file_got_pino(inode); | |
+ mark_inode_dirty_sync(inode); | |
+ ret = f2fs_write_inode(inode, NULL); | |
+ if (ret) | |
+ goto out; | |
+ } | |
+ } else { | |
+ /* if there is no written node page, write its inode page */ | |
+ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { | |
+ mark_inode_dirty_sync(inode); | |
+ ret = f2fs_write_inode(inode, NULL); | |
+ if (ret) | |
+ goto out; | |
+ } | |
+ filemap_fdatawait_range(sbi->node_inode->i_mapping, | |
+ 0, LONG_MAX); | |
+ ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | |
+ } | |
+out: | |
+ mutex_unlock(&inode->i_mutex); | |
+ trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); | |
+ return ret; | |
+} | |
+ | |
+static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) | |
+{ | |
+ file_accessed(file); | |
+ vma->vm_ops = &f2fs_file_vm_ops; | |
+ return 0; | |
+} | |
+ | |
+int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | |
+{ | |
+ int nr_free = 0, ofs = dn->ofs_in_node; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | |
+ struct f2fs_node *raw_node; | |
+ __le32 *addr; | |
+ | |
+ raw_node = F2FS_NODE(dn->node_page); | |
+ addr = blkaddr_in_node(raw_node) + ofs; | |
+ | |
+ for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { | |
+ block_t blkaddr = le32_to_cpu(*addr); | |
+ if (blkaddr == NULL_ADDR) | |
+ continue; | |
+ | |
+ update_extent_cache(NULL_ADDR, dn); | |
+ invalidate_blocks(sbi, blkaddr); | |
+ nr_free++; | |
+ } | |
+ if (nr_free) { | |
+ dec_valid_block_count(sbi, dn->inode, nr_free); | |
+ set_page_dirty(dn->node_page); | |
+ sync_inode_page(dn); | |
+ } | |
+ dn->ofs_in_node = ofs; | |
+ | |
+ trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, | |
+ dn->ofs_in_node, nr_free); | |
+ return nr_free; | |
+} | |
+ | |
+void truncate_data_blocks(struct dnode_of_data *dn) | |
+{ | |
+ truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); | |
+} | |
+ | |
+static void truncate_partial_data_page(struct inode *inode, u64 from) | |
+{ | |
+ unsigned offset = from & (PAGE_CACHE_SIZE - 1); | |
+ struct page *page; | |
+ | |
+ if (!offset) | |
+ return; | |
+ | |
+ page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); | |
+ if (IS_ERR(page)) | |
+ return; | |
+ | |
+ lock_page(page); | |
+ if (page->mapping != inode->i_mapping) { | |
+ f2fs_put_page(page, 1); | |
+ return; | |
+ } | |
+ wait_on_page_writeback(page); | |
+ zero_user(page, offset, PAGE_CACHE_SIZE - offset); | |
+ set_page_dirty(page); | |
+ f2fs_put_page(page, 1); | |
+} | |
+ | |
+static int truncate_blocks(struct inode *inode, u64 from) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ unsigned int blocksize = inode->i_sb->s_blocksize; | |
+ struct dnode_of_data dn; | |
+ pgoff_t free_from; | |
+ int count = 0, ilock = -1; | |
+ int err; | |
+ | |
+ trace_f2fs_truncate_blocks_enter(inode, from); | |
+ | |
+ free_from = (pgoff_t) | |
+ ((from + blocksize - 1) >> (sbi->log_blocksize)); | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ set_new_dnode(&dn, inode, NULL, NULL, 0); | |
+ err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); | |
+ if (err) { | |
+ if (err == -ENOENT) | |
+ goto free_next; | |
+ mutex_unlock_op(sbi, ilock); | |
+ trace_f2fs_truncate_blocks_exit(inode, err); | |
+ return err; | |
+ } | |
+ | |
+ if (IS_INODE(dn.node_page)) | |
+ count = ADDRS_PER_INODE(F2FS_I(inode)); | |
+ else | |
+ count = ADDRS_PER_BLOCK; | |
+ | |
+ count -= dn.ofs_in_node; | |
+ BUG_ON(count < 0); | |
+ | |
+ if (dn.ofs_in_node || IS_INODE(dn.node_page)) { | |
+ truncate_data_blocks_range(&dn, count); | |
+ free_from += count; | |
+ } | |
+ | |
+ f2fs_put_dnode(&dn); | |
+free_next: | |
+ err = truncate_inode_blocks(inode, free_from); | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+ /* lastly zero out the first data page */ | |
+ truncate_partial_data_page(inode, from); | |
+ | |
+ trace_f2fs_truncate_blocks_exit(inode, err); | |
+ return err; | |
+} | |
+ | |
+void f2fs_truncate(struct inode *inode) | |
+{ | |
+ int err; | |
+ | |
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | |
+ S_ISLNK(inode->i_mode))) | |
+ return; | |
+ | |
+ trace_f2fs_truncate(inode); | |
+ | |
+ err = truncate_blocks(inode, i_size_read(inode)); | |
+ if (err) { | |
+ f2fs_msg(inode->i_sb, KERN_ERR, "truncate failed with %d", | |
+ err); | |
+ f2fs_handle_error(F2FS_SB(inode->i_sb)); | |
+ } else { | |
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME; | |
+ mark_inode_dirty(inode); | |
+ } | |
+} | |
+ | |
+int f2fs_getattr(struct vfsmount *mnt, | |
+ struct dentry *dentry, struct kstat *stat) | |
+{ | |
+ struct inode *inode = dentry->d_inode; | |
+ generic_fillattr(inode, stat); | |
+ stat->blocks <<= 3; | |
+ return 0; | |
+} | |
+ | |
+#ifdef CONFIG_F2FS_FS_POSIX_ACL | |
+static void __setattr_copy(struct inode *inode, const struct iattr *attr) | |
+{ | |
+ struct f2fs_inode_info *fi = F2FS_I(inode); | |
+ unsigned int ia_valid = attr->ia_valid; | |
+ | |
+ if (ia_valid & ATTR_UID) | |
+ inode->i_uid = attr->ia_uid; | |
+ if (ia_valid & ATTR_GID) | |
+ inode->i_gid = attr->ia_gid; | |
+ if (ia_valid & ATTR_ATIME) | |
+ inode->i_atime = timespec_trunc(attr->ia_atime, | |
+ inode->i_sb->s_time_gran); | |
+ if (ia_valid & ATTR_MTIME) | |
+ inode->i_mtime = timespec_trunc(attr->ia_mtime, | |
+ inode->i_sb->s_time_gran); | |
+ if (ia_valid & ATTR_CTIME) | |
+ inode->i_ctime = timespec_trunc(attr->ia_ctime, | |
+ inode->i_sb->s_time_gran); | |
+ if (ia_valid & ATTR_MODE) { | |
+ umode_t mode = attr->ia_mode; | |
+ | |
+ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) | |
+ mode &= ~S_ISGID; | |
+ set_acl_inode(fi, mode); | |
+ } | |
+} | |
+#else | |
+#define __setattr_copy setattr_copy | |
+#endif | |
+ | |
+int f2fs_setattr(struct dentry *dentry, struct iattr *attr) | |
+{ | |
+ struct inode *inode = dentry->d_inode; | |
+ struct f2fs_inode_info *fi = F2FS_I(inode); | |
+ struct f2fs_inode_info *pfi = F2FS_I(dentry->d_parent->d_inode); | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ int err; | |
+ | |
+ err = inode_change_ok(inode, attr); | |
+ if (err) | |
+ return err; | |
+ | |
+ if (IS_ANDROID_EMU(sbi, fi, pfi)) | |
+ f2fs_android_emu(sbi, inode, &attr->ia_uid, &attr->ia_gid, | |
+ &attr->ia_mode); | |
+ | |
+ if ((attr->ia_valid & ATTR_SIZE) && | |
+ attr->ia_size != i_size_read(inode)) { | |
+ truncate_setsize(inode, attr->ia_size); | |
+ f2fs_truncate(inode); | |
+ f2fs_balance_fs(F2FS_SB(inode->i_sb)); | |
+ } | |
+ | |
+ __setattr_copy(inode, attr); | |
+ | |
+ if (attr->ia_valid & ATTR_MODE) { | |
+ err = f2fs_acl_chmod(inode); | |
+ if (err || is_inode_flag_set(fi, FI_ACL_MODE)) { | |
+ inode->i_mode = fi->i_acl_mode; | |
+ clear_inode_flag(fi, FI_ACL_MODE); | |
+ } | |
+ } | |
+ | |
+ mark_inode_dirty(inode); | |
+ return err; | |
+} | |
+ | |
+const struct inode_operations f2fs_file_inode_operations = { | |
+ .getattr = f2fs_getattr, | |
+ .setattr = f2fs_setattr, | |
+ .get_acl = f2fs_get_acl, | |
+#ifdef CONFIG_F2FS_FS_XATTR | |
+ .setxattr = generic_setxattr, | |
+ .getxattr = generic_getxattr, | |
+ .listxattr = f2fs_listxattr, | |
+ .removexattr = generic_removexattr, | |
+#endif | |
+}; | |
+ | |
+static void fill_zero(struct inode *inode, pgoff_t index, | |
+ loff_t start, loff_t len) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct page *page; | |
+ int ilock; | |
+ | |
+ if (!len) | |
+ return; | |
+ | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ page = get_new_data_page(inode, NULL, index, false); | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+ if (!IS_ERR(page)) { | |
+ wait_on_page_writeback(page); | |
+ zero_user(page, start, len); | |
+ set_page_dirty(page); | |
+ f2fs_put_page(page, 1); | |
+ } | |
+} | |
+ | |
+int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) | |
+{ | |
+ pgoff_t index; | |
+ int err; | |
+ | |
+ for (index = pg_start; index < pg_end; index++) { | |
+ struct dnode_of_data dn; | |
+ | |
+ set_new_dnode(&dn, inode, NULL, NULL, 0); | |
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE); | |
+ if (err) { | |
+ if (err == -ENOENT) | |
+ continue; | |
+ return err; | |
+ } | |
+ | |
+ if (dn.data_blkaddr != NULL_ADDR) | |
+ truncate_data_blocks_range(&dn, 1); | |
+ f2fs_put_dnode(&dn); | |
+ } | |
+ return 0; | |
+} | |
+ | |
+static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) | |
+{ | |
+ pgoff_t pg_start, pg_end; | |
+ loff_t off_start, off_end; | |
+ int ret = 0; | |
+ | |
+ pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; | |
+ pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; | |
+ | |
+ off_start = offset & (PAGE_CACHE_SIZE - 1); | |
+ off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); | |
+ | |
+ if (pg_start == pg_end) { | |
+ fill_zero(inode, pg_start, off_start, | |
+ off_end - off_start); | |
+ } else { | |
+ if (off_start) | |
+ fill_zero(inode, pg_start++, off_start, | |
+ PAGE_CACHE_SIZE - off_start); | |
+ if (off_end) | |
+ fill_zero(inode, pg_end, 0, off_end); | |
+ | |
+ if (pg_start < pg_end) { | |
+ struct address_space *mapping = inode->i_mapping; | |
+ loff_t blk_start, blk_end; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ int ilock; | |
+ | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ blk_start = pg_start << PAGE_CACHE_SHIFT; | |
+ blk_end = pg_end << PAGE_CACHE_SHIFT; | |
+ truncate_inode_pages_range(mapping, blk_start, | |
+ blk_end - 1); | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ ret = truncate_hole(inode, pg_start, pg_end); | |
+ mutex_unlock_op(sbi, ilock); | |
+ } | |
+ } | |
+ | |
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && | |
+ i_size_read(inode) <= (offset + len)) { | |
+ i_size_write(inode, offset); | |
+ mark_inode_dirty(inode); | |
+ } | |
+ | |
+ return ret; | |
+} | |
+ | |
+static int expand_inode_data(struct inode *inode, loff_t offset, | |
+ loff_t len, int mode) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ pgoff_t index, pg_start, pg_end; | |
+ loff_t new_size = i_size_read(inode); | |
+ loff_t off_start, off_end; | |
+ int ret = 0; | |
+ | |
+ ret = inode_newsize_ok(inode, (len + offset)); | |
+ if (ret) | |
+ return ret; | |
+ | |
+ pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; | |
+ pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; | |
+ | |
+ off_start = offset & (PAGE_CACHE_SIZE - 1); | |
+ off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); | |
+ | |
+ for (index = pg_start; index <= pg_end; index++) { | |
+ struct dnode_of_data dn; | |
+ int ilock; | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ set_new_dnode(&dn, inode, NULL, NULL, 0); | |
+ ret = get_dnode_of_data(&dn, index, ALLOC_NODE); | |
+ if (ret) { | |
+ mutex_unlock_op(sbi, ilock); | |
+ break; | |
+ } | |
+ | |
+ if (dn.data_blkaddr == NULL_ADDR) { | |
+ ret = reserve_new_block(&dn); | |
+ if (ret) { | |
+ f2fs_put_dnode(&dn); | |
+ mutex_unlock_op(sbi, ilock); | |
+ break; | |
+ } | |
+ } | |
+ f2fs_put_dnode(&dn); | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+ if (pg_start == pg_end) | |
+ new_size = offset + len; | |
+ else if (index == pg_start && off_start) | |
+ new_size = (index + 1) << PAGE_CACHE_SHIFT; | |
+ else if (index == pg_end) | |
+ new_size = (index << PAGE_CACHE_SHIFT) + off_end; | |
+ else | |
+ new_size += PAGE_CACHE_SIZE; | |
+ } | |
+ | |
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && | |
+ i_size_read(inode) < new_size) { | |
+ i_size_write(inode, new_size); | |
+ mark_inode_dirty(inode); | |
+ } | |
+ | |
+ return ret; | |
+} | |
+ | |
+static long f2fs_fallocate(struct file *file, int mode, | |
+ loff_t offset, loff_t len) | |
+{ | |
+ struct inode *inode = file->f_path.dentry->d_inode; | |
+ long ret; | |
+ | |
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | |
+ return -EOPNOTSUPP; | |
+ | |
+ if (mode & FALLOC_FL_PUNCH_HOLE) | |
+ ret = punch_hole(inode, offset, len, mode); | |
+ else | |
+ ret = expand_inode_data(inode, offset, len, mode); | |
+ | |
+ if (!ret) { | |
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME; | |
+ mark_inode_dirty(inode); | |
+ } | |
+ trace_f2fs_fallocate(inode, mode, offset, len, ret); | |
+ return ret; | |
+} | |
+ | |
+#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) | |
+#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) | |
+ | |
+static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) | |
+{ | |
+ if (S_ISDIR(mode)) | |
+ return flags; | |
+ else if (S_ISREG(mode)) | |
+ return flags & F2FS_REG_FLMASK; | |
+ else | |
+ return flags & F2FS_OTHER_FLMASK; | |
+} | |
+ | |
+long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
+{ | |
+ struct inode *inode = filp->f_dentry->d_inode; | |
+ struct f2fs_inode_info *fi = F2FS_I(inode); | |
+ unsigned int flags; | |
+ int ret; | |
+ | |
+ switch (cmd) { | |
+ case F2FS_IOC_GETFLAGS: | |
+ flags = fi->i_flags & FS_FL_USER_VISIBLE; | |
+ return put_user(flags, (int __user *) arg); | |
+ case F2FS_IOC_SETFLAGS: | |
+ { | |
+ unsigned int oldflags; | |
+ | |
+ ret = mnt_want_write(filp->f_path.mnt); | |
+ if (ret) | |
+ return ret; | |
+ | |
+ if (!inode_owner_or_capable(inode)) { | |
+ ret = -EACCES; | |
+ goto out; | |
+ } | |
+ | |
+ if (get_user(flags, (int __user *) arg)) { | |
+ ret = -EFAULT; | |
+ goto out; | |
+ } | |
+ | |
+ flags = f2fs_mask_flags(inode->i_mode, flags); | |
+ | |
+ mutex_lock(&inode->i_mutex); | |
+ | |
+ oldflags = fi->i_flags; | |
+ | |
+ if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { | |
+ if (!capable(CAP_LINUX_IMMUTABLE)) { | |
+ mutex_unlock(&inode->i_mutex); | |
+ ret = -EPERM; | |
+ goto out; | |
+ } | |
+ } | |
+ | |
+ flags = flags & FS_FL_USER_MODIFIABLE; | |
+ flags |= oldflags & ~FS_FL_USER_MODIFIABLE; | |
+ fi->i_flags = flags; | |
+ mutex_unlock(&inode->i_mutex); | |
+ | |
+ f2fs_set_inode_flags(inode); | |
+ inode->i_ctime = CURRENT_TIME; | |
+ mark_inode_dirty(inode); | |
+out: | |
+ mnt_drop_write(filp->f_path.mnt); | |
+ return ret; | |
+ } | |
+ default: | |
+ return -ENOTTY; | |
+ } | |
+} | |
+ | |
+#ifdef CONFIG_COMPAT | |
+long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |
+{ | |
+ switch (cmd) { | |
+ case F2FS_IOC32_GETFLAGS: | |
+ cmd = F2FS_IOC_GETFLAGS; | |
+ break; | |
+ case F2FS_IOC32_SETFLAGS: | |
+ cmd = F2FS_IOC_SETFLAGS; | |
+ break; | |
+ default: | |
+ return -ENOIOCTLCMD; | |
+ } | |
+ return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); | |
+} | |
+#endif | |
+ | |
+const struct file_operations f2fs_file_operations = { | |
+ .llseek = generic_file_llseek, | |
+ .read = do_sync_read, | |
+ .write = do_sync_write, | |
+ .aio_read = generic_file_aio_read, | |
+ .aio_write = generic_file_aio_write, | |
+ .open = generic_file_open, | |
+ .mmap = f2fs_file_mmap, | |
+ .fsync = f2fs_sync_file, | |
+ .fallocate = f2fs_fallocate, | |
+ .unlocked_ioctl = f2fs_ioctl, | |
+#ifdef CONFIG_COMPAT | |
+ .compat_ioctl = f2fs_compat_ioctl, | |
+#endif | |
+ .splice_read = generic_file_splice_read, | |
+ .splice_write = generic_file_splice_write, | |
+}; | |
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c | |
new file mode 100644 | |
index 0000000..e51c1b0 | |
--- /dev/null | |
+++ b/fs/f2fs/gc.c | |
@@ -0,0 +1,738 @@ | |
+/* | |
+ * fs/f2fs/gc.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/fs.h> | |
+#include <linux/module.h> | |
+#include <linux/backing-dev.h> | |
+#include <linux/init.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/kthread.h> | |
+#include <linux/delay.h> | |
+#include <linux/freezer.h> | |
+#include <linux/blkdev.h> | |
+ | |
+#include "f2fs.h" | |
+#include "node.h" | |
+#include "segment.h" | |
+#include "gc.h" | |
+#include <trace/events/f2fs.h> | |
+ | |
+static struct kmem_cache *winode_slab; | |
+ | |
+static int gc_thread_func(void *data) | |
+{ | |
+ struct f2fs_sb_info *sbi = data; | |
+ struct f2fs_gc_kthread *gc_th = sbi->gc_thread; | |
+ wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; | |
+ long wait_ms; | |
+ | |
+ wait_ms = gc_th->min_sleep_time; | |
+ | |
+ do { | |
+ if (try_to_freeze()) | |
+ continue; | |
+ else | |
+ wait_event_interruptible_timeout(*wq, | |
+ kthread_should_stop(), | |
+ msecs_to_jiffies(wait_ms)); | |
+ if (kthread_should_stop()) | |
+ break; | |
+ | |
+ /* | |
+ * [GC triggering condition] | |
+ * 0. GC is not conducted currently. | |
+ * 1. There are enough dirty segments. | |
+ * 2. IO subsystem is idle by checking the # of writeback pages. | |
+ * 3. IO subsystem is idle by checking the # of requests in | |
+ * bdev's request list. | |
+ * | |
+ * Note) We have to avoid triggering GCs too much frequently. | |
+ * Because it is possible that some segments can be | |
+ * invalidated soon after by user update or deletion. | |
+ * So, I'd like to wait some time to collect dirty segments. | |
+ */ | |
+ if (!mutex_trylock(&sbi->gc_mutex)) | |
+ continue; | |
+ | |
+ if (!is_idle(sbi)) { | |
+ wait_ms = increase_sleep_time(gc_th, wait_ms); | |
+ mutex_unlock(&sbi->gc_mutex); | |
+ continue; | |
+ } | |
+ | |
+ if (has_enough_invalid_blocks(sbi)) | |
+ wait_ms = decrease_sleep_time(gc_th, wait_ms); | |
+ else | |
+ wait_ms = increase_sleep_time(gc_th, wait_ms); | |
+ | |
+#ifdef CONFIG_F2FS_STAT_FS | |
+ sbi->bg_gc++; | |
+#endif | |
+ | |
+ /* if return value is not zero, no victim was selected */ | |
+ if (f2fs_gc(sbi)) | |
+ wait_ms = gc_th->no_gc_sleep_time; | |
+ } while (!kthread_should_stop()); | |
+ return 0; | |
+} | |
+ | |
+int start_gc_thread(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_gc_kthread *gc_th; | |
+ dev_t dev = sbi->sb->s_bdev->bd_dev; | |
+ int err = 0; | |
+ | |
+ if (!test_opt(sbi, BG_GC)) | |
+ goto out; | |
+ gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); | |
+ if (!gc_th) { | |
+ err = -ENOMEM; | |
+ goto out; | |
+ } | |
+ | |
+ gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; | |
+ gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; | |
+ gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; | |
+ | |
+ gc_th->gc_idle = 0; | |
+ | |
+ sbi->gc_thread = gc_th; | |
+ init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); | |
+ sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, | |
+ "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); | |
+ if (IS_ERR(gc_th->f2fs_gc_task)) { | |
+ err = PTR_ERR(gc_th->f2fs_gc_task); | |
+ kfree(gc_th); | |
+ sbi->gc_thread = NULL; | |
+ } | |
+ | |
+out: | |
+ return err; | |
+} | |
+ | |
+void stop_gc_thread(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_gc_kthread *gc_th = sbi->gc_thread; | |
+ if (!gc_th) | |
+ return; | |
+ kthread_stop(gc_th->f2fs_gc_task); | |
+ kfree(gc_th); | |
+ sbi->gc_thread = NULL; | |
+} | |
+ | |
+static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) | |
+{ | |
+ int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; | |
+ | |
+ if (gc_th && gc_th->gc_idle) { | |
+ if (gc_th->gc_idle == 1) | |
+ gc_mode = GC_CB; | |
+ else if (gc_th->gc_idle == 2) | |
+ gc_mode = GC_GREEDY; | |
+ } | |
+ return gc_mode; | |
+} | |
+ | |
+static void select_policy(struct f2fs_sb_info *sbi, int gc_type, | |
+ int type, struct victim_sel_policy *p) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ | |
+ if (p->alloc_mode == SSR) { | |
+ p->gc_mode = GC_GREEDY; | |
+ p->dirty_segmap = dirty_i->dirty_segmap[type]; | |
+ p->ofs_unit = 1; | |
+ } else { | |
+ p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); | |
+ p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; | |
+ p->ofs_unit = sbi->segs_per_sec; | |
+ } | |
+ p->offset = sbi->last_victim[p->gc_mode]; | |
+} | |
+ | |
+static unsigned int get_max_cost(struct f2fs_sb_info *sbi, | |
+ struct victim_sel_policy *p) | |
+{ | |
+ /* SSR allocates in a segment unit */ | |
+ if (p->alloc_mode == SSR) | |
+ return 1 << sbi->log_blocks_per_seg; | |
+ if (p->gc_mode == GC_GREEDY) | |
+ return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; | |
+ else if (p->gc_mode == GC_CB) | |
+ return UINT_MAX; | |
+ else /* No other gc_mode */ | |
+ return 0; | |
+} | |
+ | |
+static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ unsigned int hint = 0; | |
+ unsigned int secno; | |
+ | |
+ /* | |
+ * If the gc_type is FG_GC, we can select victim segments | |
+ * selected by background GC before. | |
+ * Those segments guarantee they have small valid blocks. | |
+ */ | |
+next: | |
+ secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++); | |
+ if (secno < TOTAL_SECS(sbi)) { | |
+ if (sec_usage_check(sbi, secno)) | |
+ goto next; | |
+ clear_bit(secno, dirty_i->victim_secmap); | |
+ return secno * sbi->segs_per_sec; | |
+ } | |
+ return NULL_SEGNO; | |
+} | |
+ | |
+static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ unsigned int secno = GET_SECNO(sbi, segno); | |
+ unsigned int start = secno * sbi->segs_per_sec; | |
+ unsigned long long mtime = 0; | |
+ unsigned int vblocks; | |
+ unsigned char age = 0; | |
+ unsigned char u; | |
+ unsigned int i; | |
+ | |
+ for (i = 0; i < sbi->segs_per_sec; i++) | |
+ mtime += get_seg_entry(sbi, start + i)->mtime; | |
+ vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); | |
+ | |
+ mtime = div_u64(mtime, sbi->segs_per_sec); | |
+ vblocks = div_u64(vblocks, sbi->segs_per_sec); | |
+ | |
+ u = (vblocks * 100) >> sbi->log_blocks_per_seg; | |
+ | |
+ /* Handle if the system time is changed by user */ | |
+ if (mtime < sit_i->min_mtime) | |
+ sit_i->min_mtime = mtime; | |
+ if (mtime > sit_i->max_mtime) | |
+ sit_i->max_mtime = mtime; | |
+ if (sit_i->max_mtime != sit_i->min_mtime) | |
+ age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), | |
+ sit_i->max_mtime - sit_i->min_mtime); | |
+ | |
+ return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); | |
+} | |
+ | |
+static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, | |
+ struct victim_sel_policy *p) | |
+{ | |
+ if (p->alloc_mode == SSR) | |
+ return get_seg_entry(sbi, segno)->ckpt_valid_blocks; | |
+ | |
+ /* alloc_mode == LFS */ | |
+ if (p->gc_mode == GC_GREEDY) | |
+ return get_valid_blocks(sbi, segno, sbi->segs_per_sec); | |
+ else | |
+ return get_cb_cost(sbi, segno); | |
+} | |
+ | |
+/* | |
+ * This function is called from two paths. | |
+ * One is garbage collection and the other is SSR segment selection. | |
+ * When it is called during GC, it just gets a victim segment | |
+ * and it does not remove it from dirty seglist. | |
+ * When it is called from SSR segment selection, it finds a segment | |
+ * which has minimum valid blocks and removes it from dirty seglist. | |
+ */ | |
+static int get_victim_by_default(struct f2fs_sb_info *sbi, | |
+ unsigned int *result, int gc_type, int type, char alloc_mode) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ struct victim_sel_policy p; | |
+ unsigned int secno, max_cost; | |
+ int nsearched = 0; | |
+ | |
+ p.alloc_mode = alloc_mode; | |
+ select_policy(sbi, gc_type, type, &p); | |
+ | |
+ p.min_segno = NULL_SEGNO; | |
+ p.min_cost = max_cost = get_max_cost(sbi, &p); | |
+ | |
+ mutex_lock(&dirty_i->seglist_lock); | |
+ | |
+ if (p.alloc_mode == LFS && gc_type == FG_GC) { | |
+ p.min_segno = check_bg_victims(sbi); | |
+ if (p.min_segno != NULL_SEGNO) | |
+ goto got_it; | |
+ } | |
+ | |
+ while (1) { | |
+ unsigned long cost; | |
+ unsigned int segno; | |
+ | |
+ segno = find_next_bit(p.dirty_segmap, | |
+ TOTAL_SEGS(sbi), p.offset); | |
+ if (segno >= TOTAL_SEGS(sbi)) { | |
+ if (sbi->last_victim[p.gc_mode]) { | |
+ sbi->last_victim[p.gc_mode] = 0; | |
+ p.offset = 0; | |
+ continue; | |
+ } | |
+ break; | |
+ } | |
+ p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; | |
+ secno = GET_SECNO(sbi, segno); | |
+ | |
+ if (sec_usage_check(sbi, secno)) | |
+ continue; | |
+ if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) | |
+ continue; | |
+ | |
+ cost = get_gc_cost(sbi, segno, &p); | |
+ | |
+ if (p.min_cost > cost) { | |
+ p.min_segno = segno; | |
+ p.min_cost = cost; | |
+ } | |
+ | |
+ if (cost == max_cost) | |
+ continue; | |
+ | |
+ if (nsearched++ >= MAX_VICTIM_SEARCH) { | |
+ sbi->last_victim[p.gc_mode] = segno; | |
+ break; | |
+ } | |
+ } | |
+ if (p.min_segno != NULL_SEGNO) { | |
+got_it: | |
+ if (p.alloc_mode == LFS) { | |
+ secno = GET_SECNO(sbi, p.min_segno); | |
+ if (gc_type == FG_GC) | |
+ sbi->cur_victim_sec = secno; | |
+ else | |
+ set_bit(secno, dirty_i->victim_secmap); | |
+ } | |
+ *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; | |
+ | |
+ trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, | |
+ sbi->cur_victim_sec, | |
+ prefree_segments(sbi), free_segments(sbi)); | |
+ } | |
+ mutex_unlock(&dirty_i->seglist_lock); | |
+ | |
+ return (p.min_segno == NULL_SEGNO) ? 0 : 1; | |
+} | |
+ | |
+static const struct victim_selection default_v_ops = { | |
+ .get_victim = get_victim_by_default, | |
+}; | |
+ | |
+static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) | |
+{ | |
+ struct inode_entry *ie; | |
+ | |
+ list_for_each_entry(ie, ilist, list) | |
+ if (ie->inode->i_ino == ino) | |
+ return ie->inode; | |
+ return NULL; | |
+} | |
+ | |
+static void add_gc_inode(struct inode *inode, struct list_head *ilist) | |
+{ | |
+ struct inode_entry *new_ie; | |
+ | |
+ if (inode == find_gc_inode(inode->i_ino, ilist)) { | |
+ iput(inode); | |
+ return; | |
+ } | |
+repeat: | |
+ new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); | |
+ if (!new_ie) { | |
+ cond_resched(); | |
+ goto repeat; | |
+ } | |
+ new_ie->inode = inode; | |
+ list_add_tail(&new_ie->list, ilist); | |
+} | |
+ | |
+static void put_gc_inode(struct list_head *ilist) | |
+{ | |
+ struct inode_entry *ie, *next_ie; | |
+ list_for_each_entry_safe(ie, next_ie, ilist, list) { | |
+ iput(ie->inode); | |
+ list_del(&ie->list); | |
+ kmem_cache_free(winode_slab, ie); | |
+ } | |
+} | |
+ | |
+static int check_valid_map(struct f2fs_sb_info *sbi, | |
+ unsigned int segno, int offset) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ struct seg_entry *sentry; | |
+ int ret; | |
+ | |
+ mutex_lock(&sit_i->sentry_lock); | |
+ sentry = get_seg_entry(sbi, segno); | |
+ ret = f2fs_test_bit(offset, sentry->cur_valid_map); | |
+ mutex_unlock(&sit_i->sentry_lock); | |
+ return ret; | |
+} | |
+ | |
+/* | |
+ * This function compares node address got in summary with that in NAT. | |
+ * On validity, copy that node with cold status, otherwise (invalid node) | |
+ * ignore that. | |
+ */ | |
+static void gc_node_segment(struct f2fs_sb_info *sbi, | |
+ struct f2fs_summary *sum, unsigned int segno, int gc_type) | |
+{ | |
+ bool initial = true; | |
+ struct f2fs_summary *entry; | |
+ int off; | |
+ | |
+next_step: | |
+ entry = sum; | |
+ | |
+ for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { | |
+ nid_t nid = le32_to_cpu(entry->nid); | |
+ struct page *node_page; | |
+ | |
+ /* stop BG_GC if there is not enough free sections. */ | |
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) | |
+ return; | |
+ | |
+ if (check_valid_map(sbi, segno, off) == 0) | |
+ continue; | |
+ | |
+ if (initial) { | |
+ ra_node_page(sbi, nid); | |
+ continue; | |
+ } | |
+ node_page = get_node_page(sbi, nid); | |
+ if (IS_ERR(node_page)) | |
+ continue; | |
+ | |
+ /* set page dirty and write it */ | |
+ if (gc_type == FG_GC) { | |
+ f2fs_wait_on_page_writeback(node_page, NODE, true); | |
+ set_page_dirty(node_page); | |
+ } else { | |
+ if (!PageWriteback(node_page)) | |
+ set_page_dirty(node_page); | |
+ } | |
+ f2fs_put_page(node_page, 1); | |
+ stat_inc_node_blk_count(sbi, 1); | |
+ } | |
+ | |
+ if (initial) { | |
+ initial = false; | |
+ goto next_step; | |
+ } | |
+ | |
+ if (gc_type == FG_GC) { | |
+ struct writeback_control wbc = { | |
+ .sync_mode = WB_SYNC_ALL, | |
+ .nr_to_write = LONG_MAX, | |
+ .for_reclaim = 0, | |
+ }; | |
+ sync_node_pages(sbi, 0, &wbc); | |
+ | |
+ /* | |
+ * In the case of FG_GC, it'd be better to reclaim this victim | |
+ * completely. | |
+ */ | |
+ if (get_valid_blocks(sbi, segno, 1) != 0) | |
+ goto next_step; | |
+ } | |
+} | |
+ | |
+/* | |
+ * Calculate start block index indicating the given node offset. | |
+ * Be careful, caller should give this node offset only indicating direct node | |
+ * blocks. If any node offsets, which point the other types of node blocks such | |
+ * as indirect or double indirect node blocks, are given, it must be a caller's | |
+ * bug. | |
+ */ | |
+block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi) | |
+{ | |
+ unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; | |
+ unsigned int bidx; | |
+ | |
+ if (node_ofs == 0) | |
+ return 0; | |
+ | |
+ if (node_ofs <= 2) { | |
+ bidx = node_ofs - 1; | |
+ } else if (node_ofs <= indirect_blks) { | |
+ int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); | |
+ bidx = node_ofs - 2 - dec; | |
+ } else { | |
+ int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); | |
+ bidx = node_ofs - 5 - dec; | |
+ } | |
+ return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); | |
+} | |
+ | |
+static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, | |
+ struct node_info *dni, block_t blkaddr, unsigned int *nofs) | |
+{ | |
+ struct page *node_page; | |
+ nid_t nid; | |
+ unsigned int ofs_in_node; | |
+ block_t source_blkaddr; | |
+ | |
+ nid = le32_to_cpu(sum->nid); | |
+ ofs_in_node = le16_to_cpu(sum->ofs_in_node); | |
+ | |
+ node_page = get_node_page(sbi, nid); | |
+ if (IS_ERR(node_page)) | |
+ return 0; | |
+ | |
+ get_node_info(sbi, nid, dni); | |
+ | |
+ if (sum->version != dni->version) { | |
+ f2fs_put_page(node_page, 1); | |
+ return 0; | |
+ } | |
+ | |
+ *nofs = ofs_of_node(node_page); | |
+ source_blkaddr = datablock_addr(node_page, ofs_in_node); | |
+ f2fs_put_page(node_page, 1); | |
+ | |
+ if (source_blkaddr != blkaddr) | |
+ return 0; | |
+ return 1; | |
+} | |
+ | |
+static void move_data_page(struct inode *inode, struct page *page, int gc_type) | |
+{ | |
+ if (gc_type == BG_GC) { | |
+ if (PageWriteback(page)) | |
+ goto out; | |
+ set_page_dirty(page); | |
+ set_cold_data(page); | |
+ } else { | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ | |
+ f2fs_wait_on_page_writeback(page, DATA, true); | |
+ | |
+ if (clear_page_dirty_for_io(page) && | |
+ S_ISDIR(inode->i_mode)) { | |
+ dec_page_count(sbi, F2FS_DIRTY_DENTS); | |
+ inode_dec_dirty_dents(inode); | |
+ } | |
+ set_cold_data(page); | |
+ do_write_data_page(page); | |
+ clear_cold_data(page); | |
+ } | |
+out: | |
+ f2fs_put_page(page, 1); | |
+} | |
+ | |
+/* | |
+ * This function tries to get parent node of victim data block, and identifies | |
+ * data block validity. If the block is valid, copy that with cold status and | |
+ * modify parent node. | |
+ * If the parent node is not valid or the data block address is different, | |
+ * the victim data block is ignored. | |
+ */ | |
+static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, | |
+ struct list_head *ilist, unsigned int segno, int gc_type) | |
+{ | |
+ struct super_block *sb = sbi->sb; | |
+ struct f2fs_summary *entry; | |
+ block_t start_addr; | |
+ int off; | |
+ int phase = 0; | |
+ | |
+ start_addr = START_BLOCK(sbi, segno); | |
+ | |
+next_step: | |
+ entry = sum; | |
+ | |
+ for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { | |
+ struct page *data_page; | |
+ struct inode *inode; | |
+ struct node_info dni; /* dnode info for the data */ | |
+ unsigned int ofs_in_node, nofs; | |
+ block_t start_bidx; | |
+ | |
+ /* stop BG_GC if there is not enough free sections. */ | |
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) | |
+ return; | |
+ | |
+ if (check_valid_map(sbi, segno, off) == 0) | |
+ continue; | |
+ | |
+ if (phase == 0) { | |
+ ra_node_page(sbi, le32_to_cpu(entry->nid)); | |
+ continue; | |
+ } | |
+ | |
+ /* Get an inode by ino with checking validity */ | |
+ if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0) | |
+ continue; | |
+ | |
+ if (phase == 1) { | |
+ ra_node_page(sbi, dni.ino); | |
+ continue; | |
+ } | |
+ | |
+ ofs_in_node = le16_to_cpu(entry->ofs_in_node); | |
+ | |
+ if (phase == 2) { | |
+ inode = f2fs_iget(sb, dni.ino); | |
+ if (IS_ERR(inode)) | |
+ continue; | |
+ | |
+ start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); | |
+ | |
+ data_page = find_data_page(inode, | |
+ start_bidx + ofs_in_node, false); | |
+ if (IS_ERR(data_page)) | |
+ goto next_iput; | |
+ | |
+ f2fs_put_page(data_page, 0); | |
+ add_gc_inode(inode, ilist); | |
+ } else { | |
+ inode = find_gc_inode(dni.ino, ilist); | |
+ if (inode) { | |
+ start_bidx = start_bidx_of_node(nofs, | |
+ F2FS_I(inode)); | |
+ data_page = get_lock_data_page(inode, | |
+ start_bidx + ofs_in_node); | |
+ if (IS_ERR(data_page)) | |
+ continue; | |
+ move_data_page(inode, data_page, gc_type); | |
+ stat_inc_data_blk_count(sbi, 1); | |
+ } | |
+ } | |
+ continue; | |
+next_iput: | |
+ iput(inode); | |
+ } | |
+ | |
+ if (++phase < 4) | |
+ goto next_step; | |
+ | |
+ if (gc_type == FG_GC) { | |
+ f2fs_submit_bio(sbi, DATA, true); | |
+ | |
+ /* | |
+ * In the case of FG_GC, it'd be better to reclaim this victim | |
+ * completely. | |
+ */ | |
+ if (get_valid_blocks(sbi, segno, 1) != 0) { | |
+ phase = 2; | |
+ goto next_step; | |
+ } | |
+ } | |
+} | |
+ | |
+static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, | |
+ int gc_type, int type) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ int ret; | |
+ mutex_lock(&sit_i->sentry_lock); | |
+ ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); | |
+ mutex_unlock(&sit_i->sentry_lock); | |
+ return ret; | |
+} | |
+ | |
+static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, | |
+ struct list_head *ilist, int gc_type) | |
+{ | |
+ struct page *sum_page; | |
+ struct f2fs_summary_block *sum; | |
+ struct blk_plug plug; | |
+ | |
+ /* read segment summary of victim */ | |
+ sum_page = get_sum_page(sbi, segno); | |
+ if (IS_ERR(sum_page)) | |
+ return; | |
+ | |
+ blk_start_plug(&plug); | |
+ | |
+ sum = page_address(sum_page); | |
+ | |
+ switch (GET_SUM_TYPE((&sum->footer))) { | |
+ case SUM_TYPE_NODE: | |
+ gc_node_segment(sbi, sum->entries, segno, gc_type); | |
+ break; | |
+ case SUM_TYPE_DATA: | |
+ gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); | |
+ break; | |
+ } | |
+ blk_finish_plug(&plug); | |
+ | |
+ stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); | |
+ stat_inc_call_count(sbi->stat_info); | |
+ | |
+ f2fs_put_page(sum_page, 1); | |
+} | |
+ | |
+int f2fs_gc(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct list_head ilist; | |
+ unsigned int segno, i; | |
+ int gc_type = BG_GC; | |
+ int nfree = 0; | |
+ int ret = -1; | |
+ | |
+ INIT_LIST_HEAD(&ilist); | |
+gc_more: | |
+ if (!(sbi->sb->s_flags & MS_ACTIVE)) | |
+ goto stop; | |
+ | |
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { | |
+ gc_type = FG_GC; | |
+ write_checkpoint(sbi, false); | |
+ } | |
+ | |
+ if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) | |
+ goto stop; | |
+ ret = 0; | |
+ | |
+ for (i = 0; i < sbi->segs_per_sec; i++) | |
+ do_garbage_collect(sbi, segno + i, &ilist, gc_type); | |
+ | |
+ if (gc_type == FG_GC) { | |
+ sbi->cur_victim_sec = NULL_SEGNO; | |
+ nfree++; | |
+ WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec)); | |
+ } | |
+ | |
+ if (has_not_enough_free_secs(sbi, nfree)) | |
+ goto gc_more; | |
+ | |
+ if (gc_type == FG_GC) | |
+ write_checkpoint(sbi, false); | |
+stop: | |
+ mutex_unlock(&sbi->gc_mutex); | |
+ | |
+ put_gc_inode(&ilist); | |
+ return ret; | |
+} | |
+ | |
+void build_gc_manager(struct f2fs_sb_info *sbi) | |
+{ | |
+ DIRTY_I(sbi)->v_ops = &default_v_ops; | |
+} | |
+ | |
+int __init create_gc_caches(void) | |
+{ | |
+ winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", | |
+ sizeof(struct inode_entry), NULL); | |
+ if (!winode_slab) | |
+ return -ENOMEM; | |
+ return 0; | |
+} | |
+ | |
+void destroy_gc_caches(void) | |
+{ | |
+ kmem_cache_destroy(winode_slab); | |
+} | |
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h | |
new file mode 100644 | |
index 0000000..f2a50cb | |
--- /dev/null | |
+++ b/fs/f2fs/gc.h | |
@@ -0,0 +1,110 @@ | |
+/* | |
+ * fs/f2fs/gc.h | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#define GC_THREAD_MIN_WB_PAGES 1 /* | |
+ * a threshold to determine | |
+ * whether IO subsystem is idle | |
+ * or not | |
+ */ | |
+#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ | |
+#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 | |
+#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ | |
+#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ | |
+#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ | |
+ | |
+/* Search max. number of dirty segments to select a victim segment */ | |
+#define MAX_VICTIM_SEARCH 20 | |
+ | |
+struct f2fs_gc_kthread { | |
+ struct task_struct *f2fs_gc_task; | |
+ wait_queue_head_t gc_wait_queue_head; | |
+ | |
+ /* for gc sleep time */ | |
+ unsigned int min_sleep_time; | |
+ unsigned int max_sleep_time; | |
+ unsigned int no_gc_sleep_time; | |
+ | |
+ /* for changing gc mode */ | |
+ unsigned int gc_idle; | |
+}; | |
+ | |
+struct inode_entry { | |
+ struct list_head list; | |
+ struct inode *inode; | |
+}; | |
+ | |
+/* | |
+ * inline functions | |
+ */ | |
+static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) | |
+{ | |
+ if (free_segments(sbi) < overprovision_segments(sbi)) | |
+ return 0; | |
+ else | |
+ return (free_segments(sbi) - overprovision_segments(sbi)) | |
+ << sbi->log_blocks_per_seg; | |
+} | |
+ | |
+static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi) | |
+{ | |
+ return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100; | |
+} | |
+ | |
+static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) | |
+{ | |
+ block_t reclaimable_user_blocks = sbi->user_block_count - | |
+ written_block_count(sbi); | |
+ return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; | |
+} | |
+ | |
+static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) | |
+{ | |
+ if (wait == gc_th->no_gc_sleep_time) | |
+ return wait; | |
+ | |
+ wait += gc_th->min_sleep_time; | |
+ if (wait > gc_th->max_sleep_time) | |
+ wait = gc_th->max_sleep_time; | |
+ return wait; | |
+} | |
+ | |
+static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) | |
+{ | |
+ if (wait == gc_th->no_gc_sleep_time) | |
+ wait = gc_th->max_sleep_time; | |
+ | |
+ wait -= gc_th->min_sleep_time; | |
+ if (wait <= gc_th->min_sleep_time) | |
+ wait = gc_th->min_sleep_time; | |
+ return wait; | |
+} | |
+ | |
+static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) | |
+{ | |
+ block_t invalid_user_blocks = sbi->user_block_count - | |
+ written_block_count(sbi); | |
+ /* | |
+ * Background GC is triggered with the following condition. | |
+ * 1. There are a number of invalid blocks. | |
+ * 2. There is not enough free space. | |
+ */ | |
+ if (invalid_user_blocks > limit_invalid_user_blocks(sbi) && | |
+ free_user_blocks(sbi) < limit_free_user_blocks(sbi)) | |
+ return true; | |
+ return false; | |
+} | |
+ | |
+static inline int is_idle(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct block_device *bdev = sbi->sb->s_bdev; | |
+ struct request_queue *q = bdev_get_queue(bdev); | |
+ struct request_list *rl = &q->rq; | |
+ return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); | |
+} | |
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c | |
new file mode 100644 | |
index 0000000..6eb8d26 | |
--- /dev/null | |
+++ b/fs/f2fs/hash.c | |
@@ -0,0 +1,101 @@ | |
+/* | |
+ * fs/f2fs/hash.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * Portions of this code from linux/fs/ext3/hash.c | |
+ * | |
+ * Copyright (C) 2002 by Theodore Ts'o | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/types.h> | |
+#include <linux/fs.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/cryptohash.h> | |
+#include <linux/pagemap.h> | |
+ | |
+#include "f2fs.h" | |
+ | |
+/* | |
+ * Hashing code copied from ext3 | |
+ */ | |
+#define DELTA 0x9E3779B9 | |
+ | |
+static void TEA_transform(unsigned int buf[4], unsigned int const in[]) | |
+{ | |
+ __u32 sum = 0; | |
+ __u32 b0 = buf[0], b1 = buf[1]; | |
+ __u32 a = in[0], b = in[1], c = in[2], d = in[3]; | |
+ int n = 16; | |
+ | |
+ do { | |
+ sum += DELTA; | |
+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); | |
+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); | |
+ } while (--n); | |
+ | |
+ buf[0] += b0; | |
+ buf[1] += b1; | |
+} | |
+ | |
+static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) | |
+{ | |
+ unsigned pad, val; | |
+ int i; | |
+ | |
+ pad = (__u32)len | ((__u32)len << 8); | |
+ pad |= pad << 16; | |
+ | |
+ val = pad; | |
+ if (len > num * 4) | |
+ len = num * 4; | |
+ for (i = 0; i < len; i++) { | |
+ if ((i % 4) == 0) | |
+ val = pad; | |
+ val = msg[i] + (val << 8); | |
+ if ((i % 4) == 3) { | |
+ *buf++ = val; | |
+ val = pad; | |
+ num--; | |
+ } | |
+ } | |
+ if (--num >= 0) | |
+ *buf++ = val; | |
+ while (--num >= 0) | |
+ *buf++ = pad; | |
+} | |
+ | |
+f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) | |
+{ | |
+ __u32 hash; | |
+ f2fs_hash_t f2fs_hash; | |
+ const char *p; | |
+ __u32 in[8], buf[4]; | |
+ | |
+ if ((len <= 2) && (name[0] == '.') && | |
+ (name[1] == '.' || name[1] == '\0')) | |
+ return 0; | |
+ | |
+ /* Initialize the default seed for the hash checksum functions */ | |
+ buf[0] = 0x67452301; | |
+ buf[1] = 0xefcdab89; | |
+ buf[2] = 0x98badcfe; | |
+ buf[3] = 0x10325476; | |
+ | |
+ p = name; | |
+ while (1) { | |
+ str2hashbuf(p, len, in, 4); | |
+ TEA_transform(buf, in); | |
+ p += 16; | |
+ if (len <= 16) | |
+ break; | |
+ len -= 16; | |
+ } | |
+ hash = buf[0]; | |
+ f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); | |
+ return f2fs_hash; | |
+} | |
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c | |
new file mode 100644 | |
index 0000000..b65e8f2 | |
--- /dev/null | |
+++ b/fs/f2fs/inode.c | |
@@ -0,0 +1,273 @@ | |
+/* | |
+ * fs/f2fs/inode.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/fs.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/buffer_head.h> | |
+#include <linux/writeback.h> | |
+ | |
+#include "f2fs.h" | |
+#include "node.h" | |
+ | |
+#include <trace/events/f2fs.h> | |
+ | |
+void f2fs_set_inode_flags(struct inode *inode) | |
+{ | |
+ unsigned int flags = F2FS_I(inode)->i_flags; | |
+ | |
+ inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | | |
+ S_NOATIME | S_DIRSYNC); | |
+ | |
+ if (flags & FS_SYNC_FL) | |
+ inode->i_flags |= S_SYNC; | |
+ if (flags & FS_APPEND_FL) | |
+ inode->i_flags |= S_APPEND; | |
+ if (flags & FS_IMMUTABLE_FL) | |
+ inode->i_flags |= S_IMMUTABLE; | |
+ if (flags & FS_NOATIME_FL) | |
+ inode->i_flags |= S_NOATIME; | |
+ if (flags & FS_DIRSYNC_FL) | |
+ inode->i_flags |= S_DIRSYNC; | |
+} | |
+ | |
+static int do_read_inode(struct inode *inode) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct f2fs_inode_info *fi = F2FS_I(inode); | |
+ struct page *node_page; | |
+ struct f2fs_node *rn; | |
+ struct f2fs_inode *ri; | |
+ | |
+ /* Check if ino is within scope */ | |
+ if (check_nid_range(sbi, inode->i_ino)) { | |
+ f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", | |
+ (unsigned long) inode->i_ino); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ node_page = get_node_page(sbi, inode->i_ino); | |
+ if (IS_ERR(node_page)) | |
+ return PTR_ERR(node_page); | |
+ | |
+ rn = F2FS_NODE(node_page); | |
+ ri = &(rn->i); | |
+ | |
+ inode->i_mode = le16_to_cpu(ri->i_mode); | |
+ inode->i_uid = le32_to_cpu(ri->i_uid); | |
+ inode->i_gid = le32_to_cpu(ri->i_gid); | |
+ set_nlink(inode, le32_to_cpu(ri->i_links)); | |
+ inode->i_size = le64_to_cpu(ri->i_size); | |
+ inode->i_blocks = le64_to_cpu(ri->i_blocks); | |
+ | |
+ inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); | |
+ inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); | |
+ inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime); | |
+ inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec); | |
+ inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); | |
+ inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); | |
+ inode->i_generation = le32_to_cpu(ri->i_generation); | |
+ if (ri->i_addr[0]) | |
+ inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); | |
+ else | |
+ inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); | |
+ | |
+ fi->i_current_depth = le32_to_cpu(ri->i_current_depth); | |
+ fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); | |
+ fi->i_flags = le32_to_cpu(ri->i_flags); | |
+ fi->flags = 0; | |
+ fi->i_advise = ri->i_advise; | |
+ fi->i_pino = le32_to_cpu(ri->i_pino); | |
+ get_extent_info(&fi->ext, ri->i_ext); | |
+ get_inline_info(fi, ri); | |
+ f2fs_put_page(node_page, 1); | |
+ return 0; | |
+} | |
+ | |
+struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ struct inode *inode; | |
+ int ret = 0; | |
+ | |
+ inode = iget_locked(sb, ino); | |
+ if (!inode) | |
+ return ERR_PTR(-ENOMEM); | |
+ | |
+ if (!(inode->i_state & I_NEW)) { | |
+ trace_f2fs_iget(inode); | |
+ return inode; | |
+ } | |
+ if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) | |
+ goto make_now; | |
+ | |
+ ret = do_read_inode(inode); | |
+ if (ret) | |
+ goto bad_inode; | |
+make_now: | |
+ if (ino == F2FS_NODE_INO(sbi)) { | |
+ inode->i_mapping->a_ops = &f2fs_node_aops; | |
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); | |
+ } else if (ino == F2FS_META_INO(sbi)) { | |
+ inode->i_mapping->a_ops = &f2fs_meta_aops; | |
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); | |
+ } else if (S_ISREG(inode->i_mode)) { | |
+ inode->i_op = &f2fs_file_inode_operations; | |
+ inode->i_fop = &f2fs_file_operations; | |
+ inode->i_mapping->a_ops = &f2fs_dblock_aops; | |
+ } else if (S_ISDIR(inode->i_mode)) { | |
+ inode->i_op = &f2fs_dir_inode_operations; | |
+ inode->i_fop = &f2fs_dir_operations; | |
+ inode->i_mapping->a_ops = &f2fs_dblock_aops; | |
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); | |
+ } else if (S_ISLNK(inode->i_mode)) { | |
+ inode->i_op = &f2fs_symlink_inode_operations; | |
+ inode->i_mapping->a_ops = &f2fs_dblock_aops; | |
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || | |
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { | |
+ inode->i_op = &f2fs_special_inode_operations; | |
+ init_special_inode(inode, inode->i_mode, inode->i_rdev); | |
+ } else { | |
+ ret = -EIO; | |
+ goto bad_inode; | |
+ } | |
+ unlock_new_inode(inode); | |
+ trace_f2fs_iget(inode); | |
+ return inode; | |
+ | |
+bad_inode: | |
+ iget_failed(inode); | |
+ trace_f2fs_iget_exit(inode, ret); | |
+ return ERR_PTR(ret); | |
+} | |
+ | |
+void update_inode(struct inode *inode, struct page *node_page) | |
+{ | |
+ struct f2fs_node *rn; | |
+ struct f2fs_inode *ri; | |
+ | |
+ f2fs_wait_on_page_writeback(node_page, NODE, false); | |
+ | |
+ rn = F2FS_NODE(node_page); | |
+ ri = &(rn->i); | |
+ | |
+ ri->i_mode = cpu_to_le16(inode->i_mode); | |
+ ri->i_advise = F2FS_I(inode)->i_advise; | |
+ ri->i_uid = cpu_to_le32(inode->i_uid); | |
+ ri->i_gid = cpu_to_le32(inode->i_gid); | |
+ ri->i_links = cpu_to_le32(inode->i_nlink); | |
+ ri->i_size = cpu_to_le64(i_size_read(inode)); | |
+ ri->i_blocks = cpu_to_le64(inode->i_blocks); | |
+ set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); | |
+ set_raw_inline(F2FS_I(inode), ri); | |
+ | |
+ ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); | |
+ ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | |
+ ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); | |
+ ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); | |
+ ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | |
+ ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | |
+ ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth); | |
+ ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); | |
+ ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); | |
+ ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); | |
+ ri->i_generation = cpu_to_le32(inode->i_generation); | |
+ | |
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | |
+ if (old_valid_dev(inode->i_rdev)) { | |
+ ri->i_addr[0] = | |
+ cpu_to_le32(old_encode_dev(inode->i_rdev)); | |
+ ri->i_addr[1] = 0; | |
+ } else { | |
+ ri->i_addr[0] = 0; | |
+ ri->i_addr[1] = | |
+ cpu_to_le32(new_encode_dev(inode->i_rdev)); | |
+ ri->i_addr[2] = 0; | |
+ } | |
+ } | |
+ | |
+ set_cold_node(inode, node_page); | |
+ set_page_dirty(node_page); | |
+ clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); | |
+} | |
+ | |
+int update_inode_page(struct inode *inode) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct page *node_page; | |
+ | |
+ node_page = get_node_page(sbi, inode->i_ino); | |
+ if (IS_ERR(node_page)) | |
+ return PTR_ERR(node_page); | |
+ | |
+ update_inode(inode, node_page); | |
+ f2fs_put_page(node_page, 1); | |
+ return 0; | |
+} | |
+ | |
+int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ int ret, ilock; | |
+ | |
+ if (inode->i_ino == F2FS_NODE_INO(sbi) || | |
+ inode->i_ino == F2FS_META_INO(sbi)) | |
+ return 0; | |
+ | |
+ if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) | |
+ return 0; | |
+ | |
+ /* | |
+ * We need to lock here to prevent from producing dirty node pages | |
+ * during the urgent cleaning time when runing out of free sections. | |
+ */ | |
+ ilock = mutex_lock_op(sbi); | |
+ ret = update_inode_page(inode); | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+ if (wbc) | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ return ret; | |
+} | |
+ | |
+/* | |
+ * Called at the last iput() if i_nlink is zero | |
+ */ | |
+void f2fs_evict_inode(struct inode *inode) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ int ilock; | |
+ | |
+ trace_f2fs_evict_inode(inode); | |
+ truncate_inode_pages(&inode->i_data, 0); | |
+ | |
+ if (inode->i_ino == F2FS_NODE_INO(sbi) || | |
+ inode->i_ino == F2FS_META_INO(sbi)) | |
+ goto no_delete; | |
+ | |
+ BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents)); | |
+ remove_dirty_dir_inode(inode); | |
+ | |
+ if (inode->i_nlink || is_bad_inode(inode)) | |
+ goto no_delete; | |
+ | |
+ set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); | |
+ i_size_write(inode, 0); | |
+ | |
+ if (F2FS_HAS_BLOCKS(inode)) | |
+ f2fs_truncate(inode); | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ remove_inode_page(inode); | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+no_delete: | |
+ end_writeback(inode); | |
+} | |
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c | |
new file mode 100644 | |
index 0000000..aa0c453 | |
--- /dev/null | |
+++ b/fs/f2fs/namei.c | |
@@ -0,0 +1,557 @@ | |
+/* | |
+ * fs/f2fs/namei.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/fs.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/pagemap.h> | |
+#include <linux/sched.h> | |
+#include <linux/ctype.h> | |
+ | |
+#include "f2fs.h" | |
+#include "node.h" | |
+#include "xattr.h" | |
+#include "acl.h" | |
+#include <trace/events/f2fs.h> | |
+ | |
+static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) | |
+{ | |
+ struct super_block *sb = dir->i_sb; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ nid_t ino; | |
+ struct inode *inode; | |
+ bool nid_free = false; | |
+ int err, ilock; | |
+ | |
+ inode = new_inode(sb); | |
+ if (!inode) | |
+ return ERR_PTR(-ENOMEM); | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ if (!alloc_nid(sbi, &ino)) { | |
+ mutex_unlock_op(sbi, ilock); | |
+ err = -ENOSPC; | |
+ goto fail; | |
+ } | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+ if (IS_ANDROID_EMU(sbi, F2FS_I(dir), F2FS_I(dir))) | |
+ f2fs_android_emu(sbi, inode, &inode->i_uid, | |
+ &inode->i_gid, &mode); | |
+ else { | |
+ inode->i_uid = current_fsuid(); | |
+ | |
+ if (dir->i_mode & S_ISGID) { | |
+ inode->i_gid = dir->i_gid; | |
+ if (S_ISDIR(mode)) | |
+ mode |= S_ISGID; | |
+ } else { | |
+ inode->i_gid = current_fsgid(); | |
+ } | |
+ } | |
+ | |
+ inode->i_ino = ino; | |
+ inode->i_mode = mode; | |
+ inode->i_blocks = 0; | |
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | |
+ inode->i_generation = sbi->s_next_generation++; | |
+ | |
+ err = insert_inode_locked(inode); | |
+ if (err) { | |
+ err = -EINVAL; | |
+ nid_free = true; | |
+ goto out; | |
+ } | |
+ trace_f2fs_new_inode(inode, 0); | |
+ mark_inode_dirty(inode); | |
+ return inode; | |
+ | |
+out: | |
+ clear_nlink(inode); | |
+ unlock_new_inode(inode); | |
+fail: | |
+ trace_f2fs_new_inode(inode, err); | |
+ make_bad_inode(inode); | |
+ iput(inode); | |
+ if (nid_free) | |
+ alloc_nid_failed(sbi, ino); | |
+ return ERR_PTR(err); | |
+} | |
+ | |
+static int is_multimedia_file(const unsigned char *s, const char *sub) | |
+{ | |
+ size_t slen = strlen(s); | |
+ size_t sublen = strlen(sub); | |
+ | |
+ if (sublen > slen) | |
+ return 0; | |
+ | |
+ return !strncasecmp(s + slen - sublen, sub, sublen); | |
+} | |
+ | |
+/* | |
+ * Set multimedia files as cold files for hot/cold data separation | |
+ */ | |
+static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, | |
+ const unsigned char *name) | |
+{ | |
+ int i; | |
+ __u8 (*extlist)[8] = sbi->raw_super->extension_list; | |
+ | |
+ int count = le32_to_cpu(sbi->raw_super->extension_count); | |
+ for (i = 0; i < count; i++) { | |
+ if (is_multimedia_file(name, extlist[i])) { | |
+ file_set_cold(inode); | |
+ break; | |
+ } | |
+ } | |
+} | |
+ | |
+static int f2fs_create(struct inode *dir, struct dentry *dentry, int mode, | |
+ struct nameidata *nd) | |
+{ | |
+ struct super_block *sb = dir->i_sb; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ struct inode *inode; | |
+ nid_t ino = 0; | |
+ int err, ilock; | |
+ | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ inode = f2fs_new_inode(dir, mode); | |
+ if (IS_ERR(inode)) | |
+ return PTR_ERR(inode); | |
+ | |
+ if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) | |
+ set_cold_files(sbi, inode, dentry->d_name.name); | |
+ | |
+ inode->i_op = &f2fs_file_inode_operations; | |
+ inode->i_fop = &f2fs_file_operations; | |
+ inode->i_mapping->a_ops = &f2fs_dblock_aops; | |
+ ino = inode->i_ino; | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ err = f2fs_add_link(dentry, inode); | |
+ mutex_unlock_op(sbi, ilock); | |
+ if (err) | |
+ goto out; | |
+ | |
+ alloc_nid_done(sbi, ino); | |
+ | |
+ d_instantiate(dentry, inode); | |
+ unlock_new_inode(inode); | |
+ return 0; | |
+out: | |
+ clear_nlink(inode); | |
+ unlock_new_inode(inode); | |
+ make_bad_inode(inode); | |
+ iput(inode); | |
+ alloc_nid_failed(sbi, ino); | |
+ return err; | |
+} | |
+ | |
+static int f2fs_link(struct dentry *old_dentry, struct inode *dir, | |
+ struct dentry *dentry) | |
+{ | |
+ struct inode *inode = old_dentry->d_inode; | |
+ struct super_block *sb; | |
+ struct f2fs_sb_info *sbi; | |
+ int err, ilock; | |
+ | |
+ if (inode->i_nlink >= F2FS_LINK_MAX) | |
+ return -EMLINK; | |
+ | |
+ sb = dir->i_sb; | |
+ sbi = F2FS_SB(sb); | |
+ | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ inode->i_ctime = CURRENT_TIME; | |
+ ihold(inode); | |
+ | |
+ set_inode_flag(F2FS_I(inode), FI_INC_LINK); | |
+ ilock = mutex_lock_op(sbi); | |
+ err = f2fs_add_link(dentry, inode); | |
+ mutex_unlock_op(sbi, ilock); | |
+ if (err) | |
+ goto out; | |
+ | |
+ d_instantiate(dentry, inode); | |
+ return 0; | |
+out: | |
+ clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | |
+ iput(inode); | |
+ return err; | |
+} | |
+ | |
+struct dentry *f2fs_get_parent(struct dentry *child) | |
+{ | |
+ struct qstr dotdot = {.name = "..", .len = 2}; | |
+ unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot); | |
+ if (!ino) | |
+ return ERR_PTR(-ENOENT); | |
+ return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); | |
+} | |
+ | |
+static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, | |
+ struct nameidata *nd) | |
+{ | |
+ struct inode *inode = NULL; | |
+ struct f2fs_dir_entry *de; | |
+ struct page *page; | |
+ | |
+ if (dentry->d_name.len > F2FS_NAME_LEN) | |
+ return ERR_PTR(-ENAMETOOLONG); | |
+ | |
+ de = f2fs_find_entry(dir, &dentry->d_name, &page); | |
+ if (de) { | |
+ nid_t ino = le32_to_cpu(de->ino); | |
+ kunmap(page); | |
+ f2fs_put_page(page, 0); | |
+ | |
+ inode = f2fs_iget(dir->i_sb, ino); | |
+ if (IS_ERR(inode)) | |
+ return ERR_CAST(inode); | |
+ } | |
+ | |
+ return d_splice_alias(inode, dentry); | |
+} | |
+ | |
+static int f2fs_unlink(struct inode *dir, struct dentry *dentry) | |
+{ | |
+ struct super_block *sb = dir->i_sb; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ struct inode *inode = dentry->d_inode; | |
+ struct f2fs_dir_entry *de; | |
+ struct page *page; | |
+ int err = -ENOENT; | |
+ int ilock; | |
+ | |
+ trace_f2fs_unlink_enter(dir, dentry); | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ de = f2fs_find_entry(dir, &dentry->d_name, &page); | |
+ if (!de) | |
+ goto fail; | |
+ | |
+ err = acquire_orphan_inode(sbi); | |
+ if (err) { | |
+ kunmap(page); | |
+ f2fs_put_page(page, 0); | |
+ goto fail; | |
+ } | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ f2fs_delete_entry(de, page, inode); | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+ /* In order to evict this inode, we set it dirty */ | |
+ mark_inode_dirty(inode); | |
+fail: | |
+ trace_f2fs_unlink_exit(inode, err); | |
+ return err; | |
+} | |
+ | |
+static int f2fs_symlink(struct inode *dir, struct dentry *dentry, | |
+ const char *symname) | |
+{ | |
+ struct super_block *sb = dir->i_sb; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ struct inode *inode; | |
+ size_t symlen = strlen(symname) + 1; | |
+ int err, ilock; | |
+ | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); | |
+ if (IS_ERR(inode)) | |
+ return PTR_ERR(inode); | |
+ | |
+ inode->i_op = &f2fs_symlink_inode_operations; | |
+ inode->i_mapping->a_ops = &f2fs_dblock_aops; | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ err = f2fs_add_link(dentry, inode); | |
+ mutex_unlock_op(sbi, ilock); | |
+ if (err) | |
+ goto out; | |
+ | |
+ err = page_symlink(inode, symname, symlen); | |
+ alloc_nid_done(sbi, inode->i_ino); | |
+ | |
+ d_instantiate(dentry, inode); | |
+ unlock_new_inode(inode); | |
+ return err; | |
+out: | |
+ clear_nlink(inode); | |
+ unlock_new_inode(inode); | |
+ make_bad_inode(inode); | |
+ iput(inode); | |
+ alloc_nid_failed(sbi, inode->i_ino); | |
+ return err; | |
+} | |
+ | |
+static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |
+{ | |
+ struct f2fs_sb_info *sbi; | |
+ struct inode *inode; | |
+ int err, ilock; | |
+ | |
+ if (dir->i_nlink >= F2FS_LINK_MAX) | |
+ return -EMLINK; | |
+ | |
+ sbi = F2FS_SB(dir->i_sb); | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ inode = f2fs_new_inode(dir, S_IFDIR | mode); | |
+ if (IS_ERR(inode)) | |
+ return PTR_ERR(inode); | |
+ | |
+ inode->i_op = &f2fs_dir_inode_operations; | |
+ inode->i_fop = &f2fs_dir_operations; | |
+ inode->i_mapping->a_ops = &f2fs_dblock_aops; | |
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); | |
+ | |
+ set_inode_flag(F2FS_I(inode), FI_INC_LINK); | |
+ ilock = mutex_lock_op(sbi); | |
+ err = f2fs_add_link(dentry, inode); | |
+ mutex_unlock_op(sbi, ilock); | |
+ if (err) | |
+ goto out_fail; | |
+ | |
+ alloc_nid_done(sbi, inode->i_ino); | |
+ | |
+ d_instantiate(dentry, inode); | |
+ unlock_new_inode(inode); | |
+ | |
+ return 0; | |
+ | |
+out_fail: | |
+ clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | |
+ clear_nlink(inode); | |
+ unlock_new_inode(inode); | |
+ make_bad_inode(inode); | |
+ iput(inode); | |
+ alloc_nid_failed(sbi, inode->i_ino); | |
+ return err; | |
+} | |
+ | |
+static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) | |
+{ | |
+ struct inode *inode = dentry->d_inode; | |
+ if (f2fs_empty_dir(inode)) | |
+ return f2fs_unlink(dir, dentry); | |
+ return -ENOTEMPTY; | |
+} | |
+ | |
+static int f2fs_mknod(struct inode *dir, struct dentry *dentry, | |
+ int mode, dev_t rdev) | |
+{ | |
+ struct super_block *sb = dir->i_sb; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ struct inode *inode; | |
+ int err = 0; | |
+ int ilock; | |
+ | |
+ if (!new_valid_dev(rdev)) | |
+ return -EINVAL; | |
+ | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ inode = f2fs_new_inode(dir, mode); | |
+ if (IS_ERR(inode)) | |
+ return PTR_ERR(inode); | |
+ | |
+ init_special_inode(inode, inode->i_mode, rdev); | |
+ inode->i_op = &f2fs_special_inode_operations; | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ err = f2fs_add_link(dentry, inode); | |
+ mutex_unlock_op(sbi, ilock); | |
+ if (err) | |
+ goto out; | |
+ | |
+ alloc_nid_done(sbi, inode->i_ino); | |
+ d_instantiate(dentry, inode); | |
+ unlock_new_inode(inode); | |
+ return 0; | |
+out: | |
+ clear_nlink(inode); | |
+ unlock_new_inode(inode); | |
+ make_bad_inode(inode); | |
+ iput(inode); | |
+ alloc_nid_failed(sbi, inode->i_ino); | |
+ return err; | |
+} | |
+ | |
+static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, | |
+ struct inode *new_dir, struct dentry *new_dentry) | |
+{ | |
+ struct super_block *sb = old_dir->i_sb; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ struct inode *old_inode = old_dentry->d_inode; | |
+ struct inode *new_inode = new_dentry->d_inode; | |
+ struct page *old_dir_page; | |
+ struct page *old_page, *new_page; | |
+ struct f2fs_dir_entry *old_dir_entry = NULL; | |
+ struct f2fs_dir_entry *old_entry; | |
+ struct f2fs_dir_entry *new_entry; | |
+ int err = -ENOENT, ilock = -1; | |
+ | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); | |
+ if (!old_entry) | |
+ goto out; | |
+ | |
+ if (S_ISDIR(old_inode->i_mode)) { | |
+ err = -EIO; | |
+ old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); | |
+ if (!old_dir_entry) | |
+ goto out_old; | |
+ } | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ | |
+ if (new_inode) { | |
+ | |
+ err = -ENOTEMPTY; | |
+ if (old_dir_entry && !f2fs_empty_dir(new_inode)) | |
+ goto out_dir; | |
+ | |
+ err = -ENOENT; | |
+ new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, | |
+ &new_page); | |
+ if (!new_entry) | |
+ goto out_dir; | |
+ | |
+ err = acquire_orphan_inode(sbi); | |
+ if (err) | |
+ goto put_out_dir; | |
+ | |
+ if (update_dent_inode(old_inode, &new_dentry->d_name)) { | |
+ release_orphan_inode(sbi); | |
+ goto put_out_dir; | |
+ } | |
+ | |
+ f2fs_set_link(new_dir, new_entry, new_page, old_inode); | |
+ | |
+ new_inode->i_ctime = CURRENT_TIME; | |
+ if (old_dir_entry) | |
+ drop_nlink(new_inode); | |
+ drop_nlink(new_inode); | |
+ | |
+ if (!new_inode->i_nlink) | |
+ add_orphan_inode(sbi, new_inode->i_ino); | |
+ else | |
+ release_orphan_inode(sbi); | |
+ | |
+ update_inode_page(old_inode); | |
+ update_inode_page(new_inode); | |
+ } else { | |
+ if (old_dir_entry) { | |
+ err = -EMLINK; | |
+ if (new_dir->i_nlink >= F2FS_LINK_MAX) | |
+ goto out_dir; | |
+ } | |
+ | |
+ err = f2fs_add_link(new_dentry, old_inode); | |
+ if (err) | |
+ goto out_dir; | |
+ | |
+ if (old_dir_entry) { | |
+ inc_nlink(new_dir); | |
+ update_inode_page(new_dir); | |
+ } | |
+ } | |
+ | |
+ old_inode->i_ctime = CURRENT_TIME; | |
+ mark_inode_dirty(old_inode); | |
+ | |
+ f2fs_delete_entry(old_entry, old_page, NULL); | |
+ | |
+ if (old_dir_entry) { | |
+ if (old_dir != new_dir) { | |
+ f2fs_set_link(old_inode, old_dir_entry, | |
+ old_dir_page, new_dir); | |
+ } else { | |
+ kunmap(old_dir_page); | |
+ f2fs_put_page(old_dir_page, 0); | |
+ } | |
+ drop_nlink(old_dir); | |
+ update_inode_page(old_dir); | |
+ } | |
+ | |
+ mutex_unlock_op(sbi, ilock); | |
+ return 0; | |
+ | |
+put_out_dir: | |
+ if (PageLocked(new_page)) | |
+ f2fs_put_page(new_page, 1); | |
+ else | |
+ f2fs_put_page(new_page, 0); | |
+out_dir: | |
+ if (old_dir_entry) { | |
+ kunmap(old_dir_page); | |
+ f2fs_put_page(old_dir_page, 0); | |
+ } | |
+ mutex_unlock_op(sbi, ilock); | |
+out_old: | |
+ kunmap(old_page); | |
+ f2fs_put_page(old_page, 0); | |
+out: | |
+ return err; | |
+} | |
+ | |
+const struct inode_operations f2fs_dir_inode_operations = { | |
+ .create = f2fs_create, | |
+ .lookup = f2fs_lookup, | |
+ .link = f2fs_link, | |
+ .unlink = f2fs_unlink, | |
+ .symlink = f2fs_symlink, | |
+ .mkdir = f2fs_mkdir, | |
+ .rmdir = f2fs_rmdir, | |
+ .mknod = f2fs_mknod, | |
+ .rename = f2fs_rename, | |
+ .getattr = f2fs_getattr, | |
+ .setattr = f2fs_setattr, | |
+ .get_acl = f2fs_get_acl, | |
+#ifdef CONFIG_F2FS_FS_XATTR | |
+ .setxattr = generic_setxattr, | |
+ .getxattr = generic_getxattr, | |
+ .listxattr = f2fs_listxattr, | |
+ .removexattr = generic_removexattr, | |
+#endif | |
+}; | |
+ | |
+const struct inode_operations f2fs_symlink_inode_operations = { | |
+ .readlink = generic_readlink, | |
+ .follow_link = page_follow_link_light, | |
+ .put_link = page_put_link, | |
+ .getattr = f2fs_getattr, | |
+ .setattr = f2fs_setattr, | |
+#ifdef CONFIG_F2FS_FS_XATTR | |
+ .setxattr = generic_setxattr, | |
+ .getxattr = generic_getxattr, | |
+ .listxattr = f2fs_listxattr, | |
+ .removexattr = generic_removexattr, | |
+#endif | |
+}; | |
+ | |
+const struct inode_operations f2fs_special_inode_operations = { | |
+ .getattr = f2fs_getattr, | |
+ .setattr = f2fs_setattr, | |
+ .get_acl = f2fs_get_acl, | |
+#ifdef CONFIG_F2FS_FS_XATTR | |
+ .setxattr = generic_setxattr, | |
+ .getxattr = generic_getxattr, | |
+ .listxattr = f2fs_listxattr, | |
+ .removexattr = generic_removexattr, | |
+#endif | |
+}; | |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c | |
new file mode 100644 | |
index 0000000..eac5122 | |
--- /dev/null | |
+++ b/fs/f2fs/node.c | |
@@ -0,0 +1,1859 @@ | |
+/* | |
+ * fs/f2fs/node.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/fs.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/mpage.h> | |
+#include <linux/backing-dev.h> | |
+#include <linux/blkdev.h> | |
+#include <linux/pagevec.h> | |
+#include <linux/swap.h> | |
+ | |
+#include "f2fs.h" | |
+#include "node.h" | |
+#include "segment.h" | |
+#include <trace/events/f2fs.h> | |
+ | |
+static struct kmem_cache *nat_entry_slab; | |
+static struct kmem_cache *free_nid_slab; | |
+ | |
+static void clear_node_page_dirty(struct page *page) | |
+{ | |
+ struct address_space *mapping = page->mapping; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | |
+ unsigned int long flags; | |
+ | |
+ if (PageDirty(page)) { | |
+ spin_lock_irqsave(&mapping->tree_lock, flags); | |
+ radix_tree_tag_clear(&mapping->page_tree, | |
+ page_index(page), | |
+ PAGECACHE_TAG_DIRTY); | |
+ spin_unlock_irqrestore(&mapping->tree_lock, flags); | |
+ | |
+ clear_page_dirty_for_io(page); | |
+ dec_page_count(sbi, F2FS_DIRTY_NODES); | |
+ } | |
+ ClearPageUptodate(page); | |
+} | |
+ | |
+static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) | |
+{ | |
+ pgoff_t index = current_nat_addr(sbi, nid); | |
+ return get_meta_page(sbi, index); | |
+} | |
+ | |
+static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) | |
+{ | |
+ struct page *src_page; | |
+ struct page *dst_page; | |
+ pgoff_t src_off; | |
+ pgoff_t dst_off; | |
+ void *src_addr; | |
+ void *dst_addr; | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ | |
+ src_off = current_nat_addr(sbi, nid); | |
+ dst_off = next_nat_addr(sbi, src_off); | |
+ | |
+ /* get current nat block page with lock */ | |
+ src_page = get_meta_page(sbi, src_off); | |
+ | |
+ /* Dirty src_page means that it is already the new target NAT page. */ | |
+ if (PageDirty(src_page)) | |
+ return src_page; | |
+ | |
+ dst_page = grab_meta_page(sbi, dst_off); | |
+ | |
+ src_addr = page_address(src_page); | |
+ dst_addr = page_address(dst_page); | |
+ memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); | |
+ set_page_dirty(dst_page); | |
+ f2fs_put_page(src_page, 1); | |
+ | |
+ set_to_next_nat(nm_i, nid); | |
+ | |
+ return dst_page; | |
+} | |
+ | |
+/* | |
+ * Readahead NAT pages | |
+ */ | |
+static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) | |
+{ | |
+ struct address_space *mapping = sbi->meta_inode->i_mapping; | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct blk_plug plug; | |
+ struct page *page; | |
+ pgoff_t index; | |
+ int i; | |
+ | |
+ blk_start_plug(&plug); | |
+ | |
+ for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { | |
+ if (nid >= nm_i->max_nid) | |
+ nid = 0; | |
+ index = current_nat_addr(sbi, nid); | |
+ | |
+ page = grab_cache_page(mapping, index); | |
+ if (!page) | |
+ continue; | |
+ if (PageUptodate(page)) { | |
+ f2fs_put_page(page, 1); | |
+ continue; | |
+ } | |
+ if (f2fs_readpage(sbi, page, index, READ)) | |
+ continue; | |
+ | |
+ f2fs_put_page(page, 0); | |
+ } | |
+ blk_finish_plug(&plug); | |
+} | |
+ | |
+static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) | |
+{ | |
+ return radix_tree_lookup(&nm_i->nat_root, n); | |
+} | |
+ | |
+static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, | |
+ nid_t start, unsigned int nr, struct nat_entry **ep) | |
+{ | |
+ return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); | |
+} | |
+ | |
+static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) | |
+{ | |
+ list_del(&e->list); | |
+ radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); | |
+ nm_i->nat_cnt--; | |
+ kmem_cache_free(nat_entry_slab, e); | |
+} | |
+ | |
+int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct nat_entry *e; | |
+ int is_cp = 1; | |
+ | |
+ read_lock(&nm_i->nat_tree_lock); | |
+ e = __lookup_nat_cache(nm_i, nid); | |
+ if (e && !e->checkpointed) | |
+ is_cp = 0; | |
+ read_unlock(&nm_i->nat_tree_lock); | |
+ return is_cp; | |
+} | |
+ | |
+static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) | |
+{ | |
+ struct nat_entry *new; | |
+ | |
+ new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); | |
+ if (!new) | |
+ return NULL; | |
+ if (radix_tree_insert(&nm_i->nat_root, nid, new)) { | |
+ kmem_cache_free(nat_entry_slab, new); | |
+ return NULL; | |
+ } | |
+ memset(new, 0, sizeof(struct nat_entry)); | |
+ nat_set_nid(new, nid); | |
+ list_add_tail(&new->list, &nm_i->nat_entries); | |
+ nm_i->nat_cnt++; | |
+ return new; | |
+} | |
+ | |
+static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, | |
+ struct f2fs_nat_entry *ne) | |
+{ | |
+ struct nat_entry *e; | |
+retry: | |
+ write_lock(&nm_i->nat_tree_lock); | |
+ e = __lookup_nat_cache(nm_i, nid); | |
+ if (!e) { | |
+ e = grab_nat_entry(nm_i, nid); | |
+ if (!e) { | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ goto retry; | |
+ } | |
+ nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); | |
+ nat_set_ino(e, le32_to_cpu(ne->ino)); | |
+ nat_set_version(e, ne->version); | |
+ e->checkpointed = true; | |
+ } | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+} | |
+ | |
+static int set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, | |
+ block_t new_blkaddr) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct nat_entry *e; | |
+retry: | |
+ write_lock(&nm_i->nat_tree_lock); | |
+ e = __lookup_nat_cache(nm_i, ni->nid); | |
+ if (!e) { | |
+ e = grab_nat_entry(nm_i, ni->nid); | |
+ if (!e) { | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ goto retry; | |
+ } | |
+ e->ni = *ni; | |
+ e->checkpointed = true; | |
+ BUG_ON(ni->blk_addr == NEW_ADDR); | |
+ } else if (new_blkaddr == NEW_ADDR) { | |
+ /* | |
+ * when nid is reallocated, | |
+ * previous nat entry can be remained in nat cache. | |
+ * So, reinitialize it with new information. | |
+ */ | |
+ e->ni = *ni; | |
+ if (ni->blk_addr != NULL_ADDR) { | |
+ f2fs_msg(sbi->sb, KERN_ERR, "node block address is " | |
+ "already set: %u", ni->blk_addr); | |
+ f2fs_handle_error(sbi); | |
+ /* just give up on this node */ | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ return -EIO; | |
+ } | |
+ } | |
+ | |
+ if (new_blkaddr == NEW_ADDR) | |
+ e->checkpointed = false; | |
+ | |
+ /* sanity check */ | |
+ BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); | |
+ BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && | |
+ new_blkaddr == NULL_ADDR); | |
+ BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && | |
+ new_blkaddr == NEW_ADDR); | |
+ BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && | |
+ nat_get_blkaddr(e) != NULL_ADDR && | |
+ new_blkaddr == NEW_ADDR); | |
+ | |
+ /* increament version no as node is removed */ | |
+ if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { | |
+ unsigned char version = nat_get_version(e); | |
+ nat_set_version(e, inc_node_version(version)); | |
+ } | |
+ | |
+ /* change address */ | |
+ nat_set_blkaddr(e, new_blkaddr); | |
+ __set_nat_cache_dirty(nm_i, e); | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ return 0; | |
+} | |
+ | |
+static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ | |
+ if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) | |
+ return 0; | |
+ | |
+ write_lock(&nm_i->nat_tree_lock); | |
+ while (nr_shrink && !list_empty(&nm_i->nat_entries)) { | |
+ struct nat_entry *ne; | |
+ ne = list_first_entry(&nm_i->nat_entries, | |
+ struct nat_entry, list); | |
+ __del_from_nat_cache(nm_i, ne); | |
+ nr_shrink--; | |
+ } | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ return nr_shrink; | |
+} | |
+ | |
+/* | |
+ * This function returns always success | |
+ */ | |
+void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | |
+ struct f2fs_summary_block *sum = curseg->sum_blk; | |
+ nid_t start_nid = START_NID(nid); | |
+ struct f2fs_nat_block *nat_blk; | |
+ struct page *page = NULL; | |
+ struct f2fs_nat_entry ne; | |
+ struct nat_entry *e; | |
+ int i; | |
+ | |
+ memset(&ne, 0, sizeof(struct f2fs_nat_entry)); | |
+ ni->nid = nid; | |
+ | |
+ /* Check nat cache */ | |
+ read_lock(&nm_i->nat_tree_lock); | |
+ e = __lookup_nat_cache(nm_i, nid); | |
+ if (e) { | |
+ ni->ino = nat_get_ino(e); | |
+ ni->blk_addr = nat_get_blkaddr(e); | |
+ ni->version = nat_get_version(e); | |
+ } | |
+ read_unlock(&nm_i->nat_tree_lock); | |
+ if (e) | |
+ return; | |
+ | |
+ /* Check current segment summary */ | |
+ mutex_lock(&curseg->curseg_mutex); | |
+ i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); | |
+ if (i >= 0) { | |
+ ne = nat_in_journal(sum, i); | |
+ node_info_from_raw_nat(ni, &ne); | |
+ } | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+ if (i >= 0) | |
+ goto cache; | |
+ | |
+ /* Fill node_info from nat page */ | |
+ page = get_current_nat_page(sbi, start_nid); | |
+ nat_blk = (struct f2fs_nat_block *)page_address(page); | |
+ ne = nat_blk->entries[nid - start_nid]; | |
+ node_info_from_raw_nat(ni, &ne); | |
+ f2fs_put_page(page, 1); | |
+cache: | |
+ /* cache nat entry */ | |
+ cache_nat_entry(NM_I(sbi), nid, &ne); | |
+} | |
+ | |
+/* | |
+ * The maximum depth is four. | |
+ * Offset[0] will have raw inode offset. | |
+ */ | |
+static int get_node_path(struct f2fs_inode_info *fi, long block, | |
+ int offset[4], unsigned int noffset[4]) | |
+{ | |
+ const long direct_index = ADDRS_PER_INODE(fi); | |
+ const long direct_blks = ADDRS_PER_BLOCK; | |
+ const long dptrs_per_blk = NIDS_PER_BLOCK; | |
+ const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; | |
+ const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; | |
+ int n = 0; | |
+ int level = 0; | |
+ | |
+ noffset[0] = 0; | |
+ | |
+ if (block < direct_index) { | |
+ offset[n] = block; | |
+ goto got; | |
+ } | |
+ block -= direct_index; | |
+ if (block < direct_blks) { | |
+ offset[n++] = NODE_DIR1_BLOCK; | |
+ noffset[n] = 1; | |
+ offset[n] = block; | |
+ level = 1; | |
+ goto got; | |
+ } | |
+ block -= direct_blks; | |
+ if (block < direct_blks) { | |
+ offset[n++] = NODE_DIR2_BLOCK; | |
+ noffset[n] = 2; | |
+ offset[n] = block; | |
+ level = 1; | |
+ goto got; | |
+ } | |
+ block -= direct_blks; | |
+ if (block < indirect_blks) { | |
+ offset[n++] = NODE_IND1_BLOCK; | |
+ noffset[n] = 3; | |
+ offset[n++] = block / direct_blks; | |
+ noffset[n] = 4 + offset[n - 1]; | |
+ offset[n] = block % direct_blks; | |
+ level = 2; | |
+ goto got; | |
+ } | |
+ block -= indirect_blks; | |
+ if (block < indirect_blks) { | |
+ offset[n++] = NODE_IND2_BLOCK; | |
+ noffset[n] = 4 + dptrs_per_blk; | |
+ offset[n++] = block / direct_blks; | |
+ noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; | |
+ offset[n] = block % direct_blks; | |
+ level = 2; | |
+ goto got; | |
+ } | |
+ block -= indirect_blks; | |
+ if (block < dindirect_blks) { | |
+ offset[n++] = NODE_DIND_BLOCK; | |
+ noffset[n] = 5 + (dptrs_per_blk * 2); | |
+ offset[n++] = block / indirect_blks; | |
+ noffset[n] = 6 + (dptrs_per_blk * 2) + | |
+ offset[n - 1] * (dptrs_per_blk + 1); | |
+ offset[n++] = (block / direct_blks) % dptrs_per_blk; | |
+ noffset[n] = 7 + (dptrs_per_blk * 2) + | |
+ offset[n - 2] * (dptrs_per_blk + 1) + | |
+ offset[n - 1]; | |
+ offset[n] = block % direct_blks; | |
+ level = 3; | |
+ goto got; | |
+ } else { | |
+ BUG(); | |
+ } | |
+got: | |
+ return level; | |
+} | |
+ | |
+/* | |
+ * Caller should call f2fs_put_dnode(dn). | |
+ * Also, it should grab and release a mutex by calling mutex_lock_op() and | |
+ * mutex_unlock_op() only if ro is not set RDONLY_NODE. | |
+ * In the case of RDONLY_NODE, we don't need to care about mutex. | |
+ */ | |
+int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | |
+ struct page *npage[4]; | |
+ struct page *parent; | |
+ int offset[4]; | |
+ unsigned int noffset[4]; | |
+ nid_t nids[4]; | |
+ int level, i; | |
+ int err = 0; | |
+ | |
+ level = get_node_path(F2FS_I(dn->inode), index, offset, noffset); | |
+ | |
+ nids[0] = dn->inode->i_ino; | |
+ npage[0] = dn->inode_page; | |
+ | |
+ if (!npage[0]) { | |
+ npage[0] = get_node_page(sbi, nids[0]); | |
+ if (IS_ERR(npage[0])) | |
+ return PTR_ERR(npage[0]); | |
+ } | |
+ parent = npage[0]; | |
+ if (level != 0) | |
+ nids[1] = get_nid(parent, offset[0], true); | |
+ dn->inode_page = npage[0]; | |
+ dn->inode_page_locked = true; | |
+ | |
+ /* get indirect or direct nodes */ | |
+ for (i = 1; i <= level; i++) { | |
+ bool done = false; | |
+ | |
+ if (!nids[i] && mode == ALLOC_NODE) { | |
+ /* alloc new node */ | |
+ if (!alloc_nid(sbi, &(nids[i]))) { | |
+ err = -ENOSPC; | |
+ goto release_pages; | |
+ } | |
+ | |
+ dn->nid = nids[i]; | |
+ npage[i] = new_node_page(dn, noffset[i], NULL); | |
+ if (IS_ERR(npage[i])) { | |
+ alloc_nid_failed(sbi, nids[i]); | |
+ err = PTR_ERR(npage[i]); | |
+ goto release_pages; | |
+ } | |
+ | |
+ set_nid(parent, offset[i - 1], nids[i], i == 1); | |
+ alloc_nid_done(sbi, nids[i]); | |
+ done = true; | |
+ } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { | |
+ npage[i] = get_node_page_ra(parent, offset[i - 1]); | |
+ if (IS_ERR(npage[i])) { | |
+ err = PTR_ERR(npage[i]); | |
+ goto release_pages; | |
+ } | |
+ done = true; | |
+ } | |
+ if (i == 1) { | |
+ dn->inode_page_locked = false; | |
+ unlock_page(parent); | |
+ } else { | |
+ f2fs_put_page(parent, 1); | |
+ } | |
+ | |
+ if (!done) { | |
+ npage[i] = get_node_page(sbi, nids[i]); | |
+ if (IS_ERR(npage[i])) { | |
+ err = PTR_ERR(npage[i]); | |
+ f2fs_put_page(npage[0], 0); | |
+ goto release_out; | |
+ } | |
+ } | |
+ if (i < level) { | |
+ parent = npage[i]; | |
+ nids[i + 1] = get_nid(parent, offset[i], false); | |
+ } | |
+ } | |
+ dn->nid = nids[level]; | |
+ dn->ofs_in_node = offset[level]; | |
+ dn->node_page = npage[level]; | |
+ dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); | |
+ return 0; | |
+ | |
+release_pages: | |
+ f2fs_put_page(parent, 1); | |
+ if (i > 1) | |
+ f2fs_put_page(npage[0], 0); | |
+release_out: | |
+ dn->inode_page = NULL; | |
+ dn->node_page = NULL; | |
+ return err; | |
+} | |
+ | |
+static void truncate_node(struct dnode_of_data *dn) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | |
+ struct node_info ni; | |
+ | |
+ get_node_info(sbi, dn->nid, &ni); | |
+ if (dn->inode->i_blocks == 0) { | |
+ if (ni.blk_addr != NULL_ADDR) { | |
+ f2fs_msg(sbi->sb, KERN_ERR, | |
+ "empty node still has block address %u ", | |
+ ni.blk_addr); | |
+ f2fs_handle_error(sbi); | |
+ } | |
+ goto invalidate; | |
+ } | |
+ BUG_ON(ni.blk_addr == NULL_ADDR); | |
+ | |
+ /* Deallocate node address */ | |
+ invalidate_blocks(sbi, ni.blk_addr); | |
+ dec_valid_node_count(sbi, dn->inode, 1); | |
+ set_node_addr(sbi, &ni, NULL_ADDR); | |
+ | |
+ if (dn->nid == dn->inode->i_ino) { | |
+ remove_orphan_inode(sbi, dn->nid); | |
+ dec_valid_inode_count(sbi); | |
+ } else { | |
+ sync_inode_page(dn); | |
+ } | |
+invalidate: | |
+ clear_node_page_dirty(dn->node_page); | |
+ F2FS_SET_SB_DIRT(sbi); | |
+ | |
+ f2fs_put_page(dn->node_page, 1); | |
+ dn->node_page = NULL; | |
+ trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); | |
+} | |
+ | |
+static int truncate_dnode(struct dnode_of_data *dn) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | |
+ struct page *page; | |
+ | |
+ if (dn->nid == 0) | |
+ return 1; | |
+ | |
+ /* get direct node */ | |
+ page = get_node_page(sbi, dn->nid); | |
+ if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) | |
+ return 1; | |
+ else if (IS_ERR(page)) | |
+ return PTR_ERR(page); | |
+ | |
+ /* Make dnode_of_data for parameter */ | |
+ dn->node_page = page; | |
+ dn->ofs_in_node = 0; | |
+ truncate_data_blocks(dn); | |
+ truncate_node(dn); | |
+ return 1; | |
+} | |
+ | |
+static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, | |
+ int ofs, int depth) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | |
+ struct dnode_of_data rdn = *dn; | |
+ struct page *page; | |
+ struct f2fs_node *rn; | |
+ nid_t child_nid; | |
+ unsigned int child_nofs; | |
+ int freed = 0; | |
+ int i, ret; | |
+ | |
+ if (dn->nid == 0) | |
+ return NIDS_PER_BLOCK + 1; | |
+ | |
+ trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); | |
+ | |
+ page = get_node_page(sbi, dn->nid); | |
+ if (IS_ERR(page)) { | |
+ trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); | |
+ return PTR_ERR(page); | |
+ } | |
+ | |
+ rn = F2FS_NODE(page); | |
+ if (depth < 3) { | |
+ for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { | |
+ child_nid = le32_to_cpu(rn->in.nid[i]); | |
+ if (child_nid == 0) | |
+ continue; | |
+ rdn.nid = child_nid; | |
+ ret = truncate_dnode(&rdn); | |
+ if (ret < 0) | |
+ goto out_err; | |
+ set_nid(page, i, 0, false); | |
+ } | |
+ } else { | |
+ child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; | |
+ for (i = ofs; i < NIDS_PER_BLOCK; i++) { | |
+ child_nid = le32_to_cpu(rn->in.nid[i]); | |
+ if (child_nid == 0) { | |
+ child_nofs += NIDS_PER_BLOCK + 1; | |
+ continue; | |
+ } | |
+ rdn.nid = child_nid; | |
+ ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); | |
+ if (ret == (NIDS_PER_BLOCK + 1)) { | |
+ set_nid(page, i, 0, false); | |
+ child_nofs += ret; | |
+ } else if (ret < 0 && ret != -ENOENT) { | |
+ goto out_err; | |
+ } | |
+ } | |
+ freed = child_nofs; | |
+ } | |
+ | |
+ if (!ofs) { | |
+ /* remove current indirect node */ | |
+ dn->node_page = page; | |
+ truncate_node(dn); | |
+ freed++; | |
+ } else { | |
+ f2fs_put_page(page, 1); | |
+ } | |
+ trace_f2fs_truncate_nodes_exit(dn->inode, freed); | |
+ return freed; | |
+ | |
+out_err: | |
+ f2fs_put_page(page, 1); | |
+ trace_f2fs_truncate_nodes_exit(dn->inode, ret); | |
+ return ret; | |
+} | |
+ | |
+static int truncate_partial_nodes(struct dnode_of_data *dn, | |
+ struct f2fs_inode *ri, int *offset, int depth) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | |
+ struct page *pages[2]; | |
+ nid_t nid[3]; | |
+ nid_t child_nid; | |
+ int err = 0; | |
+ int i; | |
+ int idx = depth - 2; | |
+ | |
+ nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); | |
+ if (!nid[0]) | |
+ return 0; | |
+ | |
+ /* get indirect nodes in the path */ | |
+ for (i = 0; i < depth - 1; i++) { | |
+ /* refernece count'll be increased */ | |
+ pages[i] = get_node_page(sbi, nid[i]); | |
+ if (IS_ERR(pages[i])) { | |
+ depth = i + 1; | |
+ err = PTR_ERR(pages[i]); | |
+ goto fail; | |
+ } | |
+ nid[i + 1] = get_nid(pages[i], offset[i + 1], false); | |
+ } | |
+ | |
+ /* free direct nodes linked to a partial indirect node */ | |
+ for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { | |
+ child_nid = get_nid(pages[idx], i, false); | |
+ if (!child_nid) | |
+ continue; | |
+ dn->nid = child_nid; | |
+ err = truncate_dnode(dn); | |
+ if (err < 0) | |
+ goto fail; | |
+ set_nid(pages[idx], i, 0, false); | |
+ } | |
+ | |
+ if (offset[depth - 1] == 0) { | |
+ dn->node_page = pages[idx]; | |
+ dn->nid = nid[idx]; | |
+ truncate_node(dn); | |
+ } else { | |
+ f2fs_put_page(pages[idx], 1); | |
+ } | |
+ offset[idx]++; | |
+ offset[depth - 1] = 0; | |
+fail: | |
+ for (i = depth - 3; i >= 0; i--) | |
+ f2fs_put_page(pages[i], 1); | |
+ | |
+ trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); | |
+ | |
+ return err; | |
+} | |
+ | |
+/* | |
+ * All the block addresses of data and nodes should be nullified. | |
+ */ | |
+int truncate_inode_blocks(struct inode *inode, pgoff_t from) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct address_space *node_mapping = sbi->node_inode->i_mapping; | |
+ int err = 0, cont = 1; | |
+ int level, offset[4], noffset[4]; | |
+ unsigned int nofs = 0; | |
+ struct f2fs_node *rn; | |
+ struct dnode_of_data dn; | |
+ struct page *page; | |
+ | |
+ trace_f2fs_truncate_inode_blocks_enter(inode, from); | |
+ | |
+ level = get_node_path(F2FS_I(inode), from, offset, noffset); | |
+restart: | |
+ page = get_node_page(sbi, inode->i_ino); | |
+ if (IS_ERR(page)) { | |
+ trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); | |
+ return PTR_ERR(page); | |
+ } | |
+ | |
+ set_new_dnode(&dn, inode, page, NULL, 0); | |
+ unlock_page(page); | |
+ | |
+ rn = F2FS_NODE(page); | |
+ switch (level) { | |
+ case 0: | |
+ case 1: | |
+ nofs = noffset[1]; | |
+ break; | |
+ case 2: | |
+ nofs = noffset[1]; | |
+ if (!offset[level - 1]) | |
+ goto skip_partial; | |
+ err = truncate_partial_nodes(&dn, &rn->i, offset, level); | |
+ if (err < 0 && err != -ENOENT) | |
+ goto fail; | |
+ nofs += 1 + NIDS_PER_BLOCK; | |
+ break; | |
+ case 3: | |
+ nofs = 5 + 2 * NIDS_PER_BLOCK; | |
+ if (!offset[level - 1]) | |
+ goto skip_partial; | |
+ err = truncate_partial_nodes(&dn, &rn->i, offset, level); | |
+ if (err < 0 && err != -ENOENT) | |
+ goto fail; | |
+ break; | |
+ default: | |
+ BUG(); | |
+ } | |
+ | |
+skip_partial: | |
+ while (cont) { | |
+ dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); | |
+ switch (offset[0]) { | |
+ case NODE_DIR1_BLOCK: | |
+ case NODE_DIR2_BLOCK: | |
+ err = truncate_dnode(&dn); | |
+ break; | |
+ | |
+ case NODE_IND1_BLOCK: | |
+ case NODE_IND2_BLOCK: | |
+ err = truncate_nodes(&dn, nofs, offset[1], 2); | |
+ break; | |
+ | |
+ case NODE_DIND_BLOCK: | |
+ err = truncate_nodes(&dn, nofs, offset[1], 3); | |
+ cont = 0; | |
+ break; | |
+ | |
+ default: | |
+ BUG(); | |
+ } | |
+ if (err < 0 && err != -ENOENT) | |
+ goto fail; | |
+ if (offset[1] == 0 && | |
+ rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { | |
+ lock_page(page); | |
+ if (page->mapping != node_mapping) { | |
+ f2fs_put_page(page, 1); | |
+ goto restart; | |
+ } | |
+ wait_on_page_writeback(page); | |
+ rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; | |
+ set_page_dirty(page); | |
+ unlock_page(page); | |
+ } | |
+ offset[1] = 0; | |
+ offset[0]++; | |
+ nofs += err; | |
+ } | |
+fail: | |
+ f2fs_put_page(page, 0); | |
+ trace_f2fs_truncate_inode_blocks_exit(inode, err); | |
+ return err > 0 ? 0 : err; | |
+} | |
+ | |
+int truncate_xattr_node(struct inode *inode, struct page *page) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ nid_t nid = F2FS_I(inode)->i_xattr_nid; | |
+ struct dnode_of_data dn; | |
+ struct page *npage; | |
+ | |
+ if (!nid) | |
+ return 0; | |
+ | |
+ npage = get_node_page(sbi, nid); | |
+ if (IS_ERR(npage)) | |
+ return PTR_ERR(npage); | |
+ | |
+ F2FS_I(inode)->i_xattr_nid = 0; | |
+ | |
+ /* need to do checkpoint during fsync */ | |
+ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); | |
+ | |
+ set_new_dnode(&dn, inode, page, npage, nid); | |
+ | |
+ if (page) | |
+ dn.inode_page_locked = 1; | |
+ truncate_node(&dn); | |
+ return 0; | |
+} | |
+ | |
+/* | |
+ * Caller should grab and release a mutex by calling mutex_lock_op() and | |
+ * mutex_unlock_op(). | |
+ */ | |
+int remove_inode_page(struct inode *inode) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct page *page; | |
+ nid_t ino = inode->i_ino; | |
+ struct dnode_of_data dn; | |
+ int err; | |
+ | |
+ page = get_node_page(sbi, ino); | |
+ if (IS_ERR(page)) | |
+ return PTR_ERR(page); | |
+ | |
+ err = truncate_xattr_node(inode, page); | |
+ if (err) { | |
+ f2fs_put_page(page, 1); | |
+ return err; | |
+ } | |
+ | |
+ /* 0 is possible, after f2fs_new_inode() is failed */ | |
+ if (inode->i_blocks != 0 && inode->i_blocks != 1) { | |
+ f2fs_msg(sbi->sb, KERN_ERR, "inode %u still has %llu blocks", | |
+ ino, inode->i_blocks); | |
+ f2fs_handle_error(sbi); | |
+ } | |
+ set_new_dnode(&dn, inode, page, page, ino); | |
+ truncate_node(&dn); | |
+ return 0; | |
+} | |
+ | |
+struct page *new_inode_page(struct inode *inode, const struct qstr *name) | |
+{ | |
+ struct dnode_of_data dn; | |
+ | |
+ /* allocate inode page for new inode */ | |
+ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); | |
+ | |
+ /* caller should f2fs_put_page(page, 1); */ | |
+ return new_node_page(&dn, 0, NULL); | |
+} | |
+ | |
+struct page *new_node_page(struct dnode_of_data *dn, | |
+ unsigned int ofs, struct page *ipage) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | |
+ struct address_space *mapping = sbi->node_inode->i_mapping; | |
+ struct node_info old_ni, new_ni; | |
+ struct page *page; | |
+ int err; | |
+ | |
+ if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) | |
+ return ERR_PTR(-EPERM); | |
+ | |
+ page = grab_cache_page(mapping, dn->nid); | |
+ if (!page) | |
+ return ERR_PTR(-ENOMEM); | |
+ | |
+ if (!inc_valid_node_count(sbi, dn->inode, 1)) { | |
+ err = -ENOSPC; | |
+ goto fail; | |
+ } | |
+ | |
+ get_node_info(sbi, dn->nid, &old_ni); | |
+ | |
+ /* Reinitialize old_ni with new node page */ | |
+ BUG_ON(old_ni.blk_addr != NULL_ADDR); | |
+ new_ni = old_ni; | |
+ new_ni.ino = dn->inode->i_ino; | |
+ set_node_addr(sbi, &new_ni, NEW_ADDR); | |
+ | |
+ fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); | |
+ set_cold_node(dn->inode, page); | |
+ SetPageUptodate(page); | |
+ set_page_dirty(page); | |
+ | |
+ if (ofs == XATTR_NODE_OFFSET) | |
+ F2FS_I(dn->inode)->i_xattr_nid = dn->nid; | |
+ | |
+ dn->node_page = page; | |
+ if (ipage) | |
+ update_inode(dn->inode, ipage); | |
+ else | |
+ sync_inode_page(dn); | |
+ if (ofs == 0) | |
+ inc_valid_inode_count(sbi); | |
+ | |
+ return page; | |
+ | |
+fail: | |
+ clear_node_page_dirty(page); | |
+ f2fs_put_page(page, 1); | |
+ return ERR_PTR(err); | |
+} | |
+ | |
+/* | |
+ * Caller should do after getting the following values. | |
+ * 0: f2fs_put_page(page, 0) | |
+ * LOCKED_PAGE: f2fs_put_page(page, 1) | |
+ * error: nothing | |
+ */ | |
+static int read_node_page(struct page *page, int type) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | |
+ struct node_info ni; | |
+ | |
+ get_node_info(sbi, page->index, &ni); | |
+ | |
+ if (ni.blk_addr == NULL_ADDR) { | |
+ f2fs_put_page(page, 1); | |
+ return -ENOENT; | |
+ } | |
+ | |
+ if (PageUptodate(page)) | |
+ return LOCKED_PAGE; | |
+ | |
+ return f2fs_readpage(sbi, page, ni.blk_addr, type); | |
+} | |
+ | |
+/* | |
+ * Readahead a node page | |
+ */ | |
+void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) | |
+{ | |
+ struct address_space *mapping = sbi->node_inode->i_mapping; | |
+ struct page *apage; | |
+ int err; | |
+ | |
+ apage = find_get_page(mapping, nid); | |
+ if (apage && PageUptodate(apage)) { | |
+ f2fs_put_page(apage, 0); | |
+ return; | |
+ } | |
+ f2fs_put_page(apage, 0); | |
+ | |
+ apage = grab_cache_page(mapping, nid); | |
+ if (!apage) | |
+ return; | |
+ | |
+ err = read_node_page(apage, READA); | |
+ if (err == 0) | |
+ f2fs_put_page(apage, 0); | |
+ else if (err == LOCKED_PAGE) | |
+ f2fs_put_page(apage, 1); | |
+} | |
+ | |
+struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) | |
+{ | |
+ struct address_space *mapping = sbi->node_inode->i_mapping; | |
+ struct page *page; | |
+ int err; | |
+repeat: | |
+ page = grab_cache_page(mapping, nid); | |
+ if (!page) | |
+ return ERR_PTR(-ENOMEM); | |
+ | |
+ err = read_node_page(page, READ_SYNC); | |
+ if (err < 0) | |
+ return ERR_PTR(err); | |
+ else if (err == LOCKED_PAGE) | |
+ goto got_it; | |
+ | |
+ lock_page(page); | |
+ if (!PageUptodate(page)) { | |
+ f2fs_put_page(page, 1); | |
+ return ERR_PTR(-EIO); | |
+ } | |
+ if (page->mapping != mapping) { | |
+ f2fs_put_page(page, 1); | |
+ goto repeat; | |
+ } | |
+got_it: | |
+ if (nid != nid_of_node(page)) { | |
+ f2fs_msg(sbi->sb, KERN_ERR, "page node id does not match " | |
+ "request: %lu", nid); | |
+ f2fs_handle_error(sbi); | |
+ f2fs_put_page(page, 1); | |
+ return ERR_PTR(-EIO); | |
+ } | |
+ mark_page_accessed(page); | |
+ return page; | |
+} | |
+ | |
+/* | |
+ * Return a locked page for the desired node page. | |
+ * And, readahead MAX_RA_NODE number of node pages. | |
+ */ | |
+struct page *get_node_page_ra(struct page *parent, int start) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); | |
+ struct address_space *mapping = sbi->node_inode->i_mapping; | |
+ struct blk_plug plug; | |
+ struct page *page; | |
+ int err, i, end; | |
+ nid_t nid; | |
+ | |
+ /* First, try getting the desired direct node. */ | |
+ nid = get_nid(parent, start, false); | |
+ if (!nid) | |
+ return ERR_PTR(-ENOENT); | |
+repeat: | |
+ page = grab_cache_page(mapping, nid); | |
+ if (!page) | |
+ return ERR_PTR(-ENOMEM); | |
+ | |
+ err = read_node_page(page, READ_SYNC); | |
+ if (err < 0) | |
+ return ERR_PTR(err); | |
+ else if (err == LOCKED_PAGE) | |
+ goto page_hit; | |
+ | |
+ blk_start_plug(&plug); | |
+ | |
+ /* Then, try readahead for siblings of the desired node */ | |
+ end = start + MAX_RA_NODE; | |
+ end = min(end, NIDS_PER_BLOCK); | |
+ for (i = start + 1; i < end; i++) { | |
+ nid = get_nid(parent, i, false); | |
+ if (!nid) | |
+ continue; | |
+ ra_node_page(sbi, nid); | |
+ } | |
+ | |
+ blk_finish_plug(&plug); | |
+ | |
+ lock_page(page); | |
+ if (page->mapping != mapping) { | |
+ f2fs_put_page(page, 1); | |
+ goto repeat; | |
+ } | |
+page_hit: | |
+ if (!PageUptodate(page)) { | |
+ f2fs_put_page(page, 1); | |
+ return ERR_PTR(-EIO); | |
+ } | |
+ mark_page_accessed(page); | |
+ return page; | |
+} | |
+ | |
+void sync_inode_page(struct dnode_of_data *dn) | |
+{ | |
+ if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { | |
+ update_inode(dn->inode, dn->node_page); | |
+ } else if (dn->inode_page) { | |
+ if (!dn->inode_page_locked) | |
+ lock_page(dn->inode_page); | |
+ update_inode(dn->inode, dn->inode_page); | |
+ if (!dn->inode_page_locked) | |
+ unlock_page(dn->inode_page); | |
+ } else { | |
+ update_inode_page(dn->inode); | |
+ } | |
+} | |
+ | |
+int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, | |
+ struct writeback_control *wbc) | |
+{ | |
+ struct address_space *mapping = sbi->node_inode->i_mapping; | |
+ pgoff_t index, end; | |
+ struct pagevec pvec; | |
+ int step = ino ? 2 : 0; | |
+ int nwritten = 0, wrote = 0; | |
+ | |
+ pagevec_init(&pvec, 0); | |
+ | |
+next_step: | |
+ index = 0; | |
+ end = LONG_MAX; | |
+ | |
+ while (index <= end) { | |
+ int i, nr_pages; | |
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | |
+ PAGECACHE_TAG_DIRTY, | |
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | |
+ if (nr_pages == 0) | |
+ break; | |
+ | |
+ for (i = 0; i < nr_pages; i++) { | |
+ struct page *page = pvec.pages[i]; | |
+ | |
+ /* | |
+ * flushing sequence with step: | |
+ * 0. indirect nodes | |
+ * 1. dentry dnodes | |
+ * 2. file dnodes | |
+ */ | |
+ if (step == 0 && IS_DNODE(page)) | |
+ continue; | |
+ if (step == 1 && (!IS_DNODE(page) || | |
+ is_cold_node(page))) | |
+ continue; | |
+ if (step == 2 && (!IS_DNODE(page) || | |
+ !is_cold_node(page))) | |
+ continue; | |
+ | |
+ /* | |
+ * If an fsync mode, | |
+ * we should not skip writing node pages. | |
+ */ | |
+ if (ino && ino_of_node(page) == ino) | |
+ lock_page(page); | |
+ else if (!trylock_page(page)) | |
+ continue; | |
+ | |
+ if (unlikely(page->mapping != mapping)) { | |
+continue_unlock: | |
+ unlock_page(page); | |
+ continue; | |
+ } | |
+ if (ino && ino_of_node(page) != ino) | |
+ goto continue_unlock; | |
+ | |
+ if (!PageDirty(page)) { | |
+ /* someone wrote it for us */ | |
+ goto continue_unlock; | |
+ } | |
+ | |
+ if (!clear_page_dirty_for_io(page)) | |
+ goto continue_unlock; | |
+ | |
+ /* called by fsync() */ | |
+ if (ino && IS_DNODE(page)) { | |
+ int mark = !is_checkpointed_node(sbi, ino); | |
+ set_fsync_mark(page, 1); | |
+ if (IS_INODE(page)) | |
+ set_dentry_mark(page, mark); | |
+ nwritten++; | |
+ } else { | |
+ set_fsync_mark(page, 0); | |
+ set_dentry_mark(page, 0); | |
+ } | |
+ mapping->a_ops->writepage(page, wbc); | |
+ wrote++; | |
+ | |
+ if (--wbc->nr_to_write == 0) | |
+ break; | |
+ } | |
+ pagevec_release(&pvec); | |
+ cond_resched(); | |
+ | |
+ if (wbc->nr_to_write == 0) { | |
+ step = 2; | |
+ break; | |
+ } | |
+ } | |
+ | |
+ if (step < 2) { | |
+ step++; | |
+ goto next_step; | |
+ } | |
+ | |
+ if (wrote) | |
+ f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); | |
+ | |
+ return nwritten; | |
+} | |
+ | |
+static int f2fs_write_node_page(struct page *page, | |
+ struct writeback_control *wbc) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | |
+ nid_t nid; | |
+ block_t new_addr; | |
+ struct node_info ni; | |
+ | |
+ if (sbi->por_doing) | |
+ goto redirty_out; | |
+ | |
+ wait_on_page_writeback(page); | |
+ | |
+ /* get old block addr of this node page */ | |
+ nid = nid_of_node(page); | |
+ BUG_ON(page->index != nid); | |
+ | |
+ get_node_info(sbi, nid, &ni); | |
+ | |
+ /* This page is already truncated */ | |
+ if (ni.blk_addr == NULL_ADDR) { | |
+ dec_page_count(sbi, F2FS_DIRTY_NODES); | |
+ unlock_page(page); | |
+ return 0; | |
+ } | |
+ | |
+ if (wbc->for_reclaim) | |
+ goto redirty_out; | |
+ | |
+ mutex_lock(&sbi->node_write); | |
+ set_page_writeback(page); | |
+ write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); | |
+ set_node_addr(sbi, &ni, new_addr); | |
+ dec_page_count(sbi, F2FS_DIRTY_NODES); | |
+ mutex_unlock(&sbi->node_write); | |
+ unlock_page(page); | |
+ return 0; | |
+ | |
+redirty_out: | |
+ dec_page_count(sbi, F2FS_DIRTY_NODES); | |
+ wbc->pages_skipped++; | |
+ set_page_dirty(page); | |
+ return AOP_WRITEPAGE_ACTIVATE; | |
+} | |
+ | |
+/* | |
+ * It is very important to gather dirty pages and write at once, so that we can | |
+ * submit a big bio without interfering other data writes. | |
+ * Be default, 512 pages (2MB), a segment size, is quite reasonable. | |
+ */ | |
+#define COLLECT_DIRTY_NODES 512 | |
+static int f2fs_write_node_pages(struct address_space *mapping, | |
+ struct writeback_control *wbc) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | |
+ long nr_to_write = wbc->nr_to_write; | |
+ | |
+ /* First check balancing cached NAT entries */ | |
+ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { | |
+ f2fs_sync_fs(sbi->sb, true); | |
+ return 0; | |
+ } | |
+ | |
+ /* collect a number of dirty node pages and write together */ | |
+ if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) | |
+ return 0; | |
+ | |
+ /* if mounting is failed, skip writing node pages */ | |
+ wbc->nr_to_write = max_hw_blocks(sbi); | |
+ sync_node_pages(sbi, 0, wbc); | |
+ wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write); | |
+ return 0; | |
+} | |
+ | |
+static int f2fs_set_node_page_dirty(struct page *page) | |
+{ | |
+ struct address_space *mapping = page->mapping; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | |
+ | |
+ SetPageUptodate(page); | |
+ if (!PageDirty(page)) { | |
+ __set_page_dirty_nobuffers(page); | |
+ inc_page_count(sbi, F2FS_DIRTY_NODES); | |
+ SetPagePrivate(page); | |
+ return 1; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) | |
+{ | |
+ struct inode *inode = page->mapping->host; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ if (PageDirty(page)) | |
+ dec_page_count(sbi, F2FS_DIRTY_NODES); | |
+ ClearPagePrivate(page); | |
+} | |
+ | |
+static int f2fs_release_node_page(struct page *page, gfp_t wait) | |
+{ | |
+ ClearPagePrivate(page); | |
+ return 1; | |
+} | |
+ | |
+/* | |
+ * Structure of the f2fs node operations | |
+ */ | |
+const struct address_space_operations f2fs_node_aops = { | |
+ .writepage = f2fs_write_node_page, | |
+ .writepages = f2fs_write_node_pages, | |
+ .set_page_dirty = f2fs_set_node_page_dirty, | |
+ .invalidatepage = f2fs_invalidate_node_page, | |
+ .releasepage = f2fs_release_node_page, | |
+}; | |
+ | |
+static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) | |
+{ | |
+ struct list_head *this; | |
+ struct free_nid *i; | |
+ list_for_each(this, head) { | |
+ i = list_entry(this, struct free_nid, list); | |
+ if (i->nid == n) | |
+ return i; | |
+ } | |
+ return NULL; | |
+} | |
+ | |
+static void __del_from_free_nid_list(struct free_nid *i) | |
+{ | |
+ list_del(&i->list); | |
+ kmem_cache_free(free_nid_slab, i); | |
+} | |
+ | |
+static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) | |
+{ | |
+ struct free_nid *i; | |
+ struct nat_entry *ne; | |
+ bool allocated = false; | |
+ | |
+ if (nm_i->fcnt > 2 * MAX_FREE_NIDS) | |
+ return -1; | |
+ | |
+ /* 0 nid should not be used */ | |
+ if (nid == 0) | |
+ return 0; | |
+ | |
+ if (!build) | |
+ goto retry; | |
+ | |
+ /* do not add allocated nids */ | |
+ read_lock(&nm_i->nat_tree_lock); | |
+ ne = __lookup_nat_cache(nm_i, nid); | |
+ if (ne && nat_get_blkaddr(ne) != NULL_ADDR) | |
+ allocated = true; | |
+ read_unlock(&nm_i->nat_tree_lock); | |
+ if (allocated) | |
+ return 0; | |
+retry: | |
+ i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); | |
+ if (!i) { | |
+ cond_resched(); | |
+ goto retry; | |
+ } | |
+ i->nid = nid; | |
+ i->state = NID_NEW; | |
+ | |
+ spin_lock(&nm_i->free_nid_list_lock); | |
+ if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { | |
+ spin_unlock(&nm_i->free_nid_list_lock); | |
+ kmem_cache_free(free_nid_slab, i); | |
+ return 0; | |
+ } | |
+ list_add_tail(&i->list, &nm_i->free_nid_list); | |
+ nm_i->fcnt++; | |
+ spin_unlock(&nm_i->free_nid_list_lock); | |
+ return 1; | |
+} | |
+ | |
+static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) | |
+{ | |
+ struct free_nid *i; | |
+ spin_lock(&nm_i->free_nid_list_lock); | |
+ i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); | |
+ if (i && i->state == NID_NEW) { | |
+ __del_from_free_nid_list(i); | |
+ nm_i->fcnt--; | |
+ } | |
+ spin_unlock(&nm_i->free_nid_list_lock); | |
+} | |
+ | |
+static void scan_nat_page(struct f2fs_nm_info *nm_i, | |
+ struct page *nat_page, nid_t start_nid) | |
+{ | |
+ struct f2fs_nat_block *nat_blk = page_address(nat_page); | |
+ block_t blk_addr; | |
+ int i; | |
+ | |
+ i = start_nid % NAT_ENTRY_PER_BLOCK; | |
+ | |
+ for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { | |
+ | |
+ if (start_nid >= nm_i->max_nid) | |
+ break; | |
+ | |
+ blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); | |
+ BUG_ON(blk_addr == NEW_ADDR); | |
+ if (blk_addr == NULL_ADDR) { | |
+ if (add_free_nid(nm_i, start_nid, true) < 0) | |
+ break; | |
+ } | |
+ } | |
+} | |
+ | |
+static void build_free_nids(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | |
+ struct f2fs_summary_block *sum = curseg->sum_blk; | |
+ int i = 0; | |
+ nid_t nid = nm_i->next_scan_nid; | |
+ | |
+ /* Enough entries */ | |
+ if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) | |
+ return; | |
+ | |
+ /* readahead nat pages to be scanned */ | |
+ ra_nat_pages(sbi, nid); | |
+ | |
+ while (1) { | |
+ struct page *page = get_current_nat_page(sbi, nid); | |
+ | |
+ scan_nat_page(nm_i, page, nid); | |
+ f2fs_put_page(page, 1); | |
+ | |
+ nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); | |
+ if (nid >= nm_i->max_nid) | |
+ nid = 0; | |
+ | |
+ if (i++ == FREE_NID_PAGES) | |
+ break; | |
+ } | |
+ | |
+ /* go to the next free nat pages to find free nids abundantly */ | |
+ nm_i->next_scan_nid = nid; | |
+ | |
+ /* find free nids from current sum_pages */ | |
+ mutex_lock(&curseg->curseg_mutex); | |
+ for (i = 0; i < nats_in_cursum(sum); i++) { | |
+ block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); | |
+ nid = le32_to_cpu(nid_in_journal(sum, i)); | |
+ if (addr == NULL_ADDR) | |
+ add_free_nid(nm_i, nid, true); | |
+ else | |
+ remove_free_nid(nm_i, nid); | |
+ } | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+} | |
+ | |
+/* | |
+ * If this function returns success, caller can obtain a new nid | |
+ * from second parameter of this function. | |
+ * The returned nid could be used ino as well as nid when inode is created. | |
+ */ | |
+bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct free_nid *i = NULL; | |
+ struct list_head *this; | |
+retry: | |
+ if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) | |
+ return false; | |
+ | |
+ spin_lock(&nm_i->free_nid_list_lock); | |
+ | |
+ /* We should not use stale free nids created by build_free_nids */ | |
+ if (nm_i->fcnt && !sbi->on_build_free_nids) { | |
+ BUG_ON(list_empty(&nm_i->free_nid_list)); | |
+ list_for_each(this, &nm_i->free_nid_list) { | |
+ i = list_entry(this, struct free_nid, list); | |
+ if (i->state == NID_NEW) | |
+ break; | |
+ } | |
+ | |
+ BUG_ON(i->state != NID_NEW); | |
+ *nid = i->nid; | |
+ i->state = NID_ALLOC; | |
+ nm_i->fcnt--; | |
+ spin_unlock(&nm_i->free_nid_list_lock); | |
+ return true; | |
+ } | |
+ spin_unlock(&nm_i->free_nid_list_lock); | |
+ | |
+ /* Let's scan nat pages and its caches to get free nids */ | |
+ mutex_lock(&nm_i->build_lock); | |
+ sbi->on_build_free_nids = 1; | |
+ build_free_nids(sbi); | |
+ sbi->on_build_free_nids = 0; | |
+ mutex_unlock(&nm_i->build_lock); | |
+ goto retry; | |
+} | |
+ | |
+/* | |
+ * alloc_nid() should be called prior to this function. | |
+ */ | |
+void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct free_nid *i; | |
+ | |
+ spin_lock(&nm_i->free_nid_list_lock); | |
+ i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); | |
+ BUG_ON(!i || i->state != NID_ALLOC); | |
+ __del_from_free_nid_list(i); | |
+ spin_unlock(&nm_i->free_nid_list_lock); | |
+} | |
+ | |
+/* | |
+ * alloc_nid() should be called prior to this function. | |
+ */ | |
+void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct free_nid *i; | |
+ | |
+ if (!nid) | |
+ return; | |
+ | |
+ spin_lock(&nm_i->free_nid_list_lock); | |
+ i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); | |
+ BUG_ON(!i || i->state != NID_ALLOC); | |
+ if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { | |
+ __del_from_free_nid_list(i); | |
+ } else { | |
+ i->state = NID_NEW; | |
+ nm_i->fcnt++; | |
+ } | |
+ spin_unlock(&nm_i->free_nid_list_lock); | |
+} | |
+ | |
+void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, | |
+ struct f2fs_summary *sum, struct node_info *ni, | |
+ block_t new_blkaddr) | |
+{ | |
+ rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); | |
+ set_node_addr(sbi, ni, new_blkaddr); | |
+ clear_node_page_dirty(page); | |
+} | |
+ | |
+int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | |
+{ | |
+ struct address_space *mapping = sbi->node_inode->i_mapping; | |
+ struct f2fs_node *src, *dst; | |
+ nid_t ino = ino_of_node(page); | |
+ struct node_info old_ni, new_ni; | |
+ struct page *ipage; | |
+ int err; | |
+ | |
+ ipage = grab_cache_page(mapping, ino); | |
+ if (!ipage) | |
+ return -ENOMEM; | |
+ | |
+ /* Should not use this inode from free nid list */ | |
+ remove_free_nid(NM_I(sbi), ino); | |
+ | |
+ get_node_info(sbi, ino, &old_ni); | |
+ SetPageUptodate(ipage); | |
+ fill_node_footer(ipage, ino, ino, 0, true); | |
+ | |
+ src = F2FS_NODE(page); | |
+ dst = F2FS_NODE(ipage); | |
+ | |
+ memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); | |
+ dst->i.i_size = 0; | |
+ dst->i.i_blocks = cpu_to_le64(1); | |
+ dst->i.i_links = cpu_to_le32(1); | |
+ dst->i.i_xattr_nid = 0; | |
+ | |
+ new_ni = old_ni; | |
+ new_ni.ino = ino; | |
+ | |
+ err = set_node_addr(sbi, &new_ni, NEW_ADDR); | |
+ if (!err) | |
+ if (!inc_valid_node_count(sbi, NULL, 1)) | |
+ err = -ENOSPC; | |
+ if (!err) | |
+ inc_valid_inode_count(sbi); | |
+ f2fs_put_page(ipage, 1); | |
+ return err; | |
+} | |
+ | |
+int restore_node_summary(struct f2fs_sb_info *sbi, | |
+ unsigned int segno, struct f2fs_summary_block *sum) | |
+{ | |
+ struct f2fs_node *rn; | |
+ struct f2fs_summary *sum_entry; | |
+ struct page *page; | |
+ block_t addr; | |
+ int i, last_offset; | |
+ | |
+ /* alloc temporal page for read node */ | |
+ page = alloc_page(GFP_NOFS | __GFP_ZERO); | |
+ if (!page) | |
+ return -ENOMEM; | |
+ lock_page(page); | |
+ | |
+ /* scan the node segment */ | |
+ last_offset = sbi->blocks_per_seg; | |
+ addr = START_BLOCK(sbi, segno); | |
+ sum_entry = &sum->entries[0]; | |
+ | |
+ for (i = 0; i < last_offset; i++, sum_entry++) { | |
+ /* | |
+ * In order to read next node page, | |
+ * we must clear PageUptodate flag. | |
+ */ | |
+ ClearPageUptodate(page); | |
+ | |
+ if (f2fs_readpage(sbi, page, addr, READ_SYNC)) | |
+ goto out; | |
+ | |
+ lock_page(page); | |
+ rn = F2FS_NODE(page); | |
+ sum_entry->nid = rn->footer.nid; | |
+ sum_entry->version = 0; | |
+ sum_entry->ofs_in_node = 0; | |
+ addr++; | |
+ } | |
+ unlock_page(page); | |
+out: | |
+ __free_pages(page, 0); | |
+ return 0; | |
+} | |
+ | |
+static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | |
+ struct f2fs_summary_block *sum = curseg->sum_blk; | |
+ int i; | |
+ | |
+ mutex_lock(&curseg->curseg_mutex); | |
+ | |
+ if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) { | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+ return false; | |
+ } | |
+ | |
+ for (i = 0; i < nats_in_cursum(sum); i++) { | |
+ struct nat_entry *ne; | |
+ struct f2fs_nat_entry raw_ne; | |
+ nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); | |
+ | |
+ raw_ne = nat_in_journal(sum, i); | |
+retry: | |
+ write_lock(&nm_i->nat_tree_lock); | |
+ ne = __lookup_nat_cache(nm_i, nid); | |
+ if (ne) { | |
+ __set_nat_cache_dirty(nm_i, ne); | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ continue; | |
+ } | |
+ ne = grab_nat_entry(nm_i, nid); | |
+ if (!ne) { | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ goto retry; | |
+ } | |
+ nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); | |
+ nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); | |
+ nat_set_version(ne, raw_ne.version); | |
+ __set_nat_cache_dirty(nm_i, ne); | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ } | |
+ update_nats_in_cursum(sum, -i); | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+ return true; | |
+} | |
+ | |
+/* | |
+ * This function is called during the checkpointing process. | |
+ */ | |
+void flush_nat_entries(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | |
+ struct f2fs_summary_block *sum = curseg->sum_blk; | |
+ struct list_head *cur, *n; | |
+ struct page *page = NULL; | |
+ struct f2fs_nat_block *nat_blk = NULL; | |
+ nid_t start_nid = 0, end_nid = 0; | |
+ bool flushed; | |
+ | |
+ flushed = flush_nats_in_journal(sbi); | |
+ | |
+ if (!flushed) | |
+ mutex_lock(&curseg->curseg_mutex); | |
+ | |
+ /* 1) flush dirty nat caches */ | |
+ list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { | |
+ struct nat_entry *ne; | |
+ nid_t nid; | |
+ struct f2fs_nat_entry raw_ne; | |
+ int offset = -1; | |
+ block_t new_blkaddr; | |
+ | |
+ ne = list_entry(cur, struct nat_entry, list); | |
+ nid = nat_get_nid(ne); | |
+ | |
+ if (nat_get_blkaddr(ne) == NEW_ADDR) | |
+ continue; | |
+ if (flushed) | |
+ goto to_nat_page; | |
+ | |
+ /* if there is room for nat enries in curseg->sumpage */ | |
+ offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); | |
+ if (offset >= 0) { | |
+ raw_ne = nat_in_journal(sum, offset); | |
+ goto flush_now; | |
+ } | |
+to_nat_page: | |
+ if (!page || (start_nid > nid || nid > end_nid)) { | |
+ if (page) { | |
+ f2fs_put_page(page, 1); | |
+ page = NULL; | |
+ } | |
+ start_nid = START_NID(nid); | |
+ end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1; | |
+ | |
+ /* | |
+ * get nat block with dirty flag, increased reference | |
+ * count, mapped and lock | |
+ */ | |
+ page = get_next_nat_page(sbi, start_nid); | |
+ nat_blk = page_address(page); | |
+ } | |
+ | |
+ BUG_ON(!nat_blk); | |
+ raw_ne = nat_blk->entries[nid - start_nid]; | |
+flush_now: | |
+ new_blkaddr = nat_get_blkaddr(ne); | |
+ | |
+ raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); | |
+ raw_ne.block_addr = cpu_to_le32(new_blkaddr); | |
+ raw_ne.version = nat_get_version(ne); | |
+ | |
+ if (offset < 0) { | |
+ nat_blk->entries[nid - start_nid] = raw_ne; | |
+ } else { | |
+ nat_in_journal(sum, offset) = raw_ne; | |
+ nid_in_journal(sum, offset) = cpu_to_le32(nid); | |
+ } | |
+ | |
+ if (nat_get_blkaddr(ne) == NULL_ADDR && | |
+ add_free_nid(NM_I(sbi), nid, false) <= 0) { | |
+ write_lock(&nm_i->nat_tree_lock); | |
+ __del_from_nat_cache(nm_i, ne); | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ } else { | |
+ write_lock(&nm_i->nat_tree_lock); | |
+ __clear_nat_cache_dirty(nm_i, ne); | |
+ ne->checkpointed = true; | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ } | |
+ } | |
+ if (!flushed) | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+ f2fs_put_page(page, 1); | |
+ | |
+ /* 2) shrink nat caches if necessary */ | |
+ try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD); | |
+} | |
+ | |
+static int init_node_manager(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ unsigned char *version_bitmap; | |
+ unsigned int nat_segs, nat_blocks; | |
+ | |
+ nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); | |
+ | |
+ /* segment_count_nat includes pair segment so divide to 2. */ | |
+ nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; | |
+ nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); | |
+ nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; | |
+ nm_i->fcnt = 0; | |
+ nm_i->nat_cnt = 0; | |
+ | |
+ INIT_LIST_HEAD(&nm_i->free_nid_list); | |
+ INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); | |
+ INIT_LIST_HEAD(&nm_i->nat_entries); | |
+ INIT_LIST_HEAD(&nm_i->dirty_nat_entries); | |
+ | |
+ mutex_init(&nm_i->build_lock); | |
+ spin_lock_init(&nm_i->free_nid_list_lock); | |
+ rwlock_init(&nm_i->nat_tree_lock); | |
+ | |
+ nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); | |
+ nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); | |
+ version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); | |
+ if (!version_bitmap) | |
+ return -EFAULT; | |
+ | |
+ nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, | |
+ GFP_KERNEL); | |
+ if (!nm_i->nat_bitmap) | |
+ return -ENOMEM; | |
+ return 0; | |
+} | |
+ | |
+int build_node_manager(struct f2fs_sb_info *sbi) | |
+{ | |
+ int err; | |
+ | |
+ sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); | |
+ if (!sbi->nm_info) | |
+ return -ENOMEM; | |
+ | |
+ err = init_node_manager(sbi); | |
+ if (err) | |
+ return err; | |
+ | |
+ build_free_nids(sbi); | |
+ return 0; | |
+} | |
+ | |
+void destroy_node_manager(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct free_nid *i, *next_i; | |
+ struct nat_entry *natvec[NATVEC_SIZE]; | |
+ nid_t nid = 0; | |
+ unsigned int found; | |
+ | |
+ if (!nm_i) | |
+ return; | |
+ | |
+ /* destroy free nid list */ | |
+ spin_lock(&nm_i->free_nid_list_lock); | |
+ list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { | |
+ BUG_ON(i->state == NID_ALLOC); | |
+ __del_from_free_nid_list(i); | |
+ nm_i->fcnt--; | |
+ } | |
+ BUG_ON(nm_i->fcnt); | |
+ spin_unlock(&nm_i->free_nid_list_lock); | |
+ | |
+ /* destroy nat cache */ | |
+ write_lock(&nm_i->nat_tree_lock); | |
+ while ((found = __gang_lookup_nat_cache(nm_i, | |
+ nid, NATVEC_SIZE, natvec))) { | |
+ unsigned idx; | |
+ for (idx = 0; idx < found; idx++) { | |
+ struct nat_entry *e = natvec[idx]; | |
+ nid = nat_get_nid(e) + 1; | |
+ __del_from_nat_cache(nm_i, e); | |
+ } | |
+ } | |
+ BUG_ON(nm_i->nat_cnt); | |
+ write_unlock(&nm_i->nat_tree_lock); | |
+ | |
+ kfree(nm_i->nat_bitmap); | |
+ sbi->nm_info = NULL; | |
+ kfree(nm_i); | |
+} | |
+ | |
+int __init create_node_manager_caches(void) | |
+{ | |
+ nat_entry_slab = f2fs_kmem_cache_create("nat_entry", | |
+ sizeof(struct nat_entry), NULL); | |
+ if (!nat_entry_slab) | |
+ return -ENOMEM; | |
+ | |
+ free_nid_slab = f2fs_kmem_cache_create("free_nid", | |
+ sizeof(struct free_nid), NULL); | |
+ if (!free_nid_slab) { | |
+ kmem_cache_destroy(nat_entry_slab); | |
+ return -ENOMEM; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+void destroy_node_manager_caches(void) | |
+{ | |
+ kmem_cache_destroy(free_nid_slab); | |
+ kmem_cache_destroy(nat_entry_slab); | |
+} | |
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h | |
new file mode 100644 | |
index 0000000..3496bb3 | |
--- /dev/null | |
+++ b/fs/f2fs/node.h | |
@@ -0,0 +1,345 @@ | |
+/* | |
+ * fs/f2fs/node.h | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+/* start node id of a node block dedicated to the given node id */ | |
+#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) | |
+ | |
+/* node block offset on the NAT area dedicated to the given start node id */ | |
+#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) | |
+ | |
+/* # of pages to perform readahead before building free nids */ | |
+#define FREE_NID_PAGES 4 | |
+ | |
+/* maximum # of free node ids to produce during build_free_nids */ | |
+#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) | |
+ | |
+/* maximum readahead size for node during getting data blocks */ | |
+#define MAX_RA_NODE 128 | |
+ | |
+/* maximum cached nat entries to manage memory footprint */ | |
+#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) | |
+ | |
+/* vector size for gang look-up from nat cache that consists of radix tree */ | |
+#define NATVEC_SIZE 64 | |
+ | |
+/* return value for read_node_page */ | |
+#define LOCKED_PAGE 1 | |
+ | |
+/* | |
+ * For node information | |
+ */ | |
+struct node_info { | |
+ nid_t nid; /* node id */ | |
+ nid_t ino; /* inode number of the node's owner */ | |
+ block_t blk_addr; /* block address of the node */ | |
+ unsigned char version; /* version of the node */ | |
+}; | |
+ | |
+struct nat_entry { | |
+ struct list_head list; /* for clean or dirty nat list */ | |
+ bool checkpointed; /* whether it is checkpointed or not */ | |
+ struct node_info ni; /* in-memory node information */ | |
+}; | |
+ | |
+#define nat_get_nid(nat) (nat->ni.nid) | |
+#define nat_set_nid(nat, n) (nat->ni.nid = n) | |
+#define nat_get_blkaddr(nat) (nat->ni.blk_addr) | |
+#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) | |
+#define nat_get_ino(nat) (nat->ni.ino) | |
+#define nat_set_ino(nat, i) (nat->ni.ino = i) | |
+#define nat_get_version(nat) (nat->ni.version) | |
+#define nat_set_version(nat, v) (nat->ni.version = v) | |
+ | |
+#define __set_nat_cache_dirty(nm_i, ne) \ | |
+ list_move_tail(&ne->list, &nm_i->dirty_nat_entries); | |
+#define __clear_nat_cache_dirty(nm_i, ne) \ | |
+ list_move_tail(&ne->list, &nm_i->nat_entries); | |
+#define inc_node_version(version) (++version) | |
+ | |
+static inline void node_info_from_raw_nat(struct node_info *ni, | |
+ struct f2fs_nat_entry *raw_ne) | |
+{ | |
+ ni->ino = le32_to_cpu(raw_ne->ino); | |
+ ni->blk_addr = le32_to_cpu(raw_ne->block_addr); | |
+ ni->version = raw_ne->version; | |
+} | |
+ | |
+/* | |
+ * For free nid mangement | |
+ */ | |
+enum nid_state { | |
+ NID_NEW, /* newly added to free nid list */ | |
+ NID_ALLOC /* it is allocated */ | |
+}; | |
+ | |
+struct free_nid { | |
+ struct list_head list; /* for free node id list */ | |
+ nid_t nid; /* node id */ | |
+ int state; /* in use or not: NID_NEW or NID_ALLOC */ | |
+}; | |
+ | |
+static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ struct free_nid *fnid; | |
+ | |
+ if (nm_i->fcnt <= 0) | |
+ return -1; | |
+ spin_lock(&nm_i->free_nid_list_lock); | |
+ fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); | |
+ *nid = fnid->nid; | |
+ spin_unlock(&nm_i->free_nid_list_lock); | |
+ return 0; | |
+} | |
+ | |
+/* | |
+ * inline functions | |
+ */ | |
+static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); | |
+} | |
+ | |
+static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ pgoff_t block_off; | |
+ pgoff_t block_addr; | |
+ int seg_off; | |
+ | |
+ block_off = NAT_BLOCK_OFFSET(start); | |
+ seg_off = block_off >> sbi->log_blocks_per_seg; | |
+ | |
+ block_addr = (pgoff_t)(nm_i->nat_blkaddr + | |
+ (seg_off << sbi->log_blocks_per_seg << 1) + | |
+ (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); | |
+ | |
+ if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) | |
+ block_addr += sbi->blocks_per_seg; | |
+ | |
+ return block_addr; | |
+} | |
+ | |
+static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, | |
+ pgoff_t block_addr) | |
+{ | |
+ struct f2fs_nm_info *nm_i = NM_I(sbi); | |
+ | |
+ block_addr -= nm_i->nat_blkaddr; | |
+ if ((block_addr >> sbi->log_blocks_per_seg) % 2) | |
+ block_addr -= sbi->blocks_per_seg; | |
+ else | |
+ block_addr += sbi->blocks_per_seg; | |
+ | |
+ return block_addr + nm_i->nat_blkaddr; | |
+} | |
+ | |
+static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) | |
+{ | |
+ unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); | |
+ | |
+ if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) | |
+ f2fs_clear_bit(block_off, nm_i->nat_bitmap); | |
+ else | |
+ f2fs_set_bit(block_off, nm_i->nat_bitmap); | |
+} | |
+ | |
+static inline void fill_node_footer(struct page *page, nid_t nid, | |
+ nid_t ino, unsigned int ofs, bool reset) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(page); | |
+ if (reset) | |
+ memset(rn, 0, sizeof(*rn)); | |
+ rn->footer.nid = cpu_to_le32(nid); | |
+ rn->footer.ino = cpu_to_le32(ino); | |
+ rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); | |
+} | |
+ | |
+static inline void copy_node_footer(struct page *dst, struct page *src) | |
+{ | |
+ struct f2fs_node *src_rn = F2FS_NODE(src); | |
+ struct f2fs_node *dst_rn = F2FS_NODE(dst); | |
+ memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); | |
+} | |
+ | |
+static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ struct f2fs_node *rn = F2FS_NODE(page); | |
+ | |
+ rn->footer.cp_ver = ckpt->checkpoint_ver; | |
+ rn->footer.next_blkaddr = cpu_to_le32(blkaddr); | |
+} | |
+ | |
+static inline nid_t ino_of_node(struct page *node_page) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(node_page); | |
+ return le32_to_cpu(rn->footer.ino); | |
+} | |
+ | |
+static inline nid_t nid_of_node(struct page *node_page) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(node_page); | |
+ return le32_to_cpu(rn->footer.nid); | |
+} | |
+ | |
+static inline unsigned int ofs_of_node(struct page *node_page) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(node_page); | |
+ unsigned flag = le32_to_cpu(rn->footer.flag); | |
+ return flag >> OFFSET_BIT_SHIFT; | |
+} | |
+ | |
+static inline unsigned long long cpver_of_node(struct page *node_page) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(node_page); | |
+ return le64_to_cpu(rn->footer.cp_ver); | |
+} | |
+ | |
+static inline block_t next_blkaddr_of_node(struct page *node_page) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(node_page); | |
+ return le32_to_cpu(rn->footer.next_blkaddr); | |
+} | |
+ | |
+/* | |
+ * f2fs assigns the following node offsets described as (num). | |
+ * N = NIDS_PER_BLOCK | |
+ * | |
+ * Inode block (0) | |
+ * |- direct node (1) | |
+ * |- direct node (2) | |
+ * |- indirect node (3) | |
+ * | `- direct node (4 => 4 + N - 1) | |
+ * |- indirect node (4 + N) | |
+ * | `- direct node (5 + N => 5 + 2N - 1) | |
+ * `- double indirect node (5 + 2N) | |
+ * `- indirect node (6 + 2N) | |
+ * `- direct node (x(N + 1)) | |
+ */ | |
+static inline bool IS_DNODE(struct page *node_page) | |
+{ | |
+ unsigned int ofs = ofs_of_node(node_page); | |
+ | |
+ if (ofs == XATTR_NODE_OFFSET) | |
+ return false; | |
+ | |
+ if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || | |
+ ofs == 5 + 2 * NIDS_PER_BLOCK) | |
+ return false; | |
+ if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { | |
+ ofs -= 6 + 2 * NIDS_PER_BLOCK; | |
+ if (!((long int)ofs % (NIDS_PER_BLOCK + 1))) | |
+ return false; | |
+ } | |
+ return true; | |
+} | |
+ | |
+static inline void set_nid(struct page *p, int off, nid_t nid, bool i) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(p); | |
+ | |
+ wait_on_page_writeback(p); | |
+ | |
+ if (i) | |
+ rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); | |
+ else | |
+ rn->in.nid[off] = cpu_to_le32(nid); | |
+ set_page_dirty(p); | |
+} | |
+ | |
+static inline nid_t get_nid(struct page *p, int off, bool i) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(p); | |
+ | |
+ if (i) | |
+ return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); | |
+ return le32_to_cpu(rn->in.nid[off]); | |
+} | |
+ | |
+/* | |
+ * Coldness identification: | |
+ * - Mark cold files in f2fs_inode_info | |
+ * - Mark cold node blocks in their node footer | |
+ * - Mark cold data pages in page cache | |
+ */ | |
+static inline int is_file(struct inode *inode, int type) | |
+{ | |
+ return F2FS_I(inode)->i_advise & type; | |
+} | |
+ | |
+static inline void set_file(struct inode *inode, int type) | |
+{ | |
+ F2FS_I(inode)->i_advise |= type; | |
+} | |
+ | |
+static inline void clear_file(struct inode *inode, int type) | |
+{ | |
+ F2FS_I(inode)->i_advise &= ~type; | |
+} | |
+ | |
+#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) | |
+#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) | |
+#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) | |
+#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) | |
+#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) | |
+#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) | |
+ | |
+static inline int is_cold_data(struct page *page) | |
+{ | |
+ return PageChecked(page); | |
+} | |
+ | |
+static inline void set_cold_data(struct page *page) | |
+{ | |
+ SetPageChecked(page); | |
+} | |
+ | |
+static inline void clear_cold_data(struct page *page) | |
+{ | |
+ ClearPageChecked(page); | |
+} | |
+ | |
+static inline int is_node(struct page *page, int type) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(page); | |
+ return le32_to_cpu(rn->footer.flag) & (1 << type); | |
+} | |
+ | |
+#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) | |
+#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) | |
+#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) | |
+ | |
+static inline void set_cold_node(struct inode *inode, struct page *page) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(page); | |
+ unsigned int flag = le32_to_cpu(rn->footer.flag); | |
+ | |
+ if (S_ISDIR(inode->i_mode)) | |
+ flag &= ~(0x1 << COLD_BIT_SHIFT); | |
+ else | |
+ flag |= (0x1 << COLD_BIT_SHIFT); | |
+ rn->footer.flag = cpu_to_le32(flag); | |
+} | |
+ | |
+static inline void set_mark(struct page *page, int mark, int type) | |
+{ | |
+ struct f2fs_node *rn = F2FS_NODE(page); | |
+ unsigned int flag = le32_to_cpu(rn->footer.flag); | |
+ if (mark) | |
+ flag |= (0x1 << type); | |
+ else | |
+ flag &= ~(0x1 << type); | |
+ rn->footer.flag = cpu_to_le32(flag); | |
+} | |
+#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) | |
+#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) | |
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c | |
new file mode 100644 | |
index 0000000..bee0034 | |
--- /dev/null | |
+++ b/fs/f2fs/recovery.c | |
@@ -0,0 +1,502 @@ | |
+/* | |
+ * fs/f2fs/recovery.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/fs.h> | |
+#include <linux/f2fs_fs.h> | |
+#include "f2fs.h" | |
+#include "node.h" | |
+#include "segment.h" | |
+ | |
+static struct kmem_cache *fsync_entry_slab; | |
+ | |
+bool space_for_roll_forward(struct f2fs_sb_info *sbi) | |
+{ | |
+ if (sbi->last_valid_block_count + sbi->alloc_valid_block_count | |
+ > sbi->user_block_count) | |
+ return false; | |
+ return true; | |
+} | |
+ | |
+static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, | |
+ nid_t ino) | |
+{ | |
+ struct list_head *this; | |
+ struct fsync_inode_entry *entry; | |
+ | |
+ list_for_each(this, head) { | |
+ entry = list_entry(this, struct fsync_inode_entry, list); | |
+ if (entry->inode->i_ino == ino) | |
+ return entry; | |
+ } | |
+ return NULL; | |
+} | |
+ | |
+static int recover_dentry(struct page *ipage, struct inode *inode) | |
+{ | |
+ struct f2fs_node *raw_node = F2FS_NODE(ipage); | |
+ struct f2fs_inode *raw_inode = &(raw_node->i); | |
+ nid_t pino = le32_to_cpu(raw_inode->i_pino); | |
+ struct f2fs_dir_entry *de; | |
+ struct qstr name; | |
+ struct page *page; | |
+ struct inode *dir, *einode; | |
+ int err = 0; | |
+ | |
+ dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); | |
+ if (!dir) { | |
+ dir = f2fs_iget(inode->i_sb, pino); | |
+ if (IS_ERR(dir)) { | |
+ f2fs_msg(inode->i_sb, KERN_INFO, | |
+ "%s: f2fs_iget failed: %ld", | |
+ __func__, PTR_ERR(dir)); | |
+ err = PTR_ERR(dir); | |
+ goto out; | |
+ } | |
+ set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); | |
+ add_dirty_dir_inode(dir); | |
+ } | |
+ | |
+ name.len = le32_to_cpu(raw_inode->i_namelen); | |
+ name.name = raw_inode->i_name; | |
+retry: | |
+ de = f2fs_find_entry(dir, &name, &page); | |
+ if (de && inode->i_ino == le32_to_cpu(de->ino)) | |
+ goto out_unmap_put; | |
+ if (de) { | |
+ einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); | |
+ if (IS_ERR(einode)) { | |
+ WARN_ON(1); | |
+ if (PTR_ERR(einode) == -ENOENT) | |
+ err = -EEXIST; | |
+ goto out_unmap_put; | |
+ } | |
+ err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); | |
+ if (err) { | |
+ iput(einode); | |
+ goto out_unmap_put; | |
+ } | |
+ f2fs_delete_entry(de, page, einode); | |
+ iput(einode); | |
+ goto retry; | |
+ } | |
+ err = __f2fs_add_link(dir, &name, inode); | |
+ goto out; | |
+ | |
+out_unmap_put: | |
+ kunmap(page); | |
+ f2fs_put_page(page, 0); | |
+out: | |
+ f2fs_msg(inode->i_sb, KERN_DEBUG, "recover_inode and its dentry: " | |
+ "ino = %x, name = %s, dir = %lx, err = %d", | |
+ ino_of_node(ipage), raw_inode->i_name, | |
+ IS_ERR(dir) ? 0 : dir->i_ino, err); | |
+ return err; | |
+} | |
+ | |
+static int recover_inode(struct inode *inode, struct page *node_page) | |
+{ | |
+ struct f2fs_node *raw_node = F2FS_NODE(node_page); | |
+ struct f2fs_inode *raw_inode = &(raw_node->i); | |
+ | |
+ if (!IS_INODE(node_page)) | |
+ return 0; | |
+ | |
+ inode->i_mode = le16_to_cpu(raw_inode->i_mode); | |
+ i_size_write(inode, le64_to_cpu(raw_inode->i_size)); | |
+ inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | |
+ inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); | |
+ inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | |
+ inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | |
+ inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); | |
+ inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | |
+ | |
+ if (is_dent_dnode(node_page)) | |
+ return recover_dentry(node_page, inode); | |
+ | |
+ f2fs_msg(inode->i_sb, KERN_DEBUG, "recover_inode: ino = %x, name = %s", | |
+ ino_of_node(node_page), raw_inode->i_name); | |
+ return 0; | |
+} | |
+ | |
+static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |
+{ | |
+ unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); | |
+ struct curseg_info *curseg; | |
+ struct page *page; | |
+ block_t blkaddr; | |
+ int err = 0; | |
+ | |
+ /* get node pages in the current segment */ | |
+ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); | |
+ blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; | |
+ | |
+ /* read node page */ | |
+ page = alloc_page(GFP_F2FS_ZERO); | |
+ if (!page) | |
+ return -ENOMEM; | |
+ lock_page(page); | |
+ | |
+ while (1) { | |
+ struct fsync_inode_entry *entry; | |
+ | |
+ err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); | |
+ if (err) | |
+ goto out; | |
+ | |
+ lock_page(page); | |
+ | |
+ if (cp_ver != cpver_of_node(page)) | |
+ break; | |
+ | |
+ if (!is_fsync_dnode(page)) | |
+ goto next; | |
+ | |
+ entry = get_fsync_inode(head, ino_of_node(page)); | |
+ if (entry) { | |
+ if (IS_INODE(page) && is_dent_dnode(page)) | |
+ set_inode_flag(F2FS_I(entry->inode), | |
+ FI_INC_LINK); | |
+ } else { | |
+ if (IS_INODE(page) && is_dent_dnode(page)) { | |
+ err = recover_inode_page(sbi, page); | |
+ if (err) { | |
+ f2fs_msg(sbi->sb, KERN_INFO, | |
+ "%s: recover_inode_page failed: %d", | |
+ __func__, err); | |
+ break; | |
+ } | |
+ } | |
+ | |
+ /* add this fsync inode to the list */ | |
+ entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); | |
+ if (!entry) { | |
+ err = -ENOMEM; | |
+ break; | |
+ } | |
+ | |
+ entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); | |
+ if (IS_ERR(entry->inode)) { | |
+ err = PTR_ERR(entry->inode); | |
+ f2fs_msg(sbi->sb, KERN_INFO, | |
+ "%s: f2fs_iget failed: %d", | |
+ __func__, err); | |
+ kmem_cache_free(fsync_entry_slab, entry); | |
+ break; | |
+ } | |
+ list_add_tail(&entry->list, head); | |
+ } | |
+ entry->blkaddr = blkaddr; | |
+ | |
+ err = recover_inode(entry->inode, page); | |
+ if (err && err != -ENOENT) { | |
+ f2fs_msg(sbi->sb, KERN_INFO, | |
+ "%s: recover_inode failed: %d", | |
+ __func__, err); | |
+ break; | |
+ } | |
+next: | |
+ /* check next segment */ | |
+ blkaddr = next_blkaddr_of_node(page); | |
+ } | |
+ unlock_page(page); | |
+out: | |
+ __free_pages(page, 0); | |
+ return err; | |
+} | |
+ | |
+static void destroy_fsync_dnodes(struct list_head *head) | |
+{ | |
+ struct fsync_inode_entry *entry, *tmp; | |
+ | |
+ list_for_each_entry_safe(entry, tmp, head, list) { | |
+ iput(entry->inode); | |
+ list_del(&entry->list); | |
+ kmem_cache_free(fsync_entry_slab, entry); | |
+ } | |
+} | |
+ | |
+static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, | |
+ block_t blkaddr, struct dnode_of_data *dn) | |
+{ | |
+ struct seg_entry *sentry; | |
+ unsigned int segno = GET_SEGNO(sbi, blkaddr); | |
+ unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & | |
+ (sbi->blocks_per_seg - 1); | |
+ struct f2fs_summary sum; | |
+ nid_t ino, nid; | |
+ void *kaddr; | |
+ struct inode *inode; | |
+ struct page *node_page; | |
+ unsigned int offset; | |
+ block_t bidx; | |
+ int i; | |
+ | |
+ sentry = get_seg_entry(sbi, segno); | |
+ if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) | |
+ return 0; | |
+ | |
+ /* Get the previous summary */ | |
+ for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { | |
+ struct curseg_info *curseg = CURSEG_I(sbi, i); | |
+ if (curseg->segno == segno) { | |
+ sum = curseg->sum_blk->entries[blkoff]; | |
+ break; | |
+ } | |
+ } | |
+ if (i > CURSEG_COLD_DATA) { | |
+ struct page *sum_page = get_sum_page(sbi, segno); | |
+ struct f2fs_summary_block *sum_node; | |
+ kaddr = page_address(sum_page); | |
+ sum_node = (struct f2fs_summary_block *)kaddr; | |
+ sum = sum_node->entries[blkoff]; | |
+ f2fs_put_page(sum_page, 1); | |
+ } | |
+ | |
+ /* Use the locked dnode page and inode */ | |
+ nid = le32_to_cpu(sum.nid); | |
+ if (dn->inode->i_ino == nid) { | |
+ struct dnode_of_data tdn = *dn; | |
+ tdn.nid = nid; | |
+ tdn.node_page = dn->inode_page; | |
+ tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); | |
+ truncate_data_blocks_range(&tdn, 1); | |
+ return 0; | |
+ } else if (dn->nid == nid) { | |
+ struct dnode_of_data tdn = *dn; | |
+ tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); | |
+ truncate_data_blocks_range(&tdn, 1); | |
+ return 0; | |
+ } | |
+ | |
+ /* Get the node page */ | |
+ node_page = get_node_page(sbi, nid); | |
+ if (IS_ERR(node_page)) | |
+ return PTR_ERR(node_page); | |
+ | |
+ offset = ofs_of_node(node_page); | |
+ ino = ino_of_node(node_page); | |
+ f2fs_put_page(node_page, 1); | |
+ | |
+ /* Skip nodes with circular references */ | |
+ if (ino == dn->inode->i_ino) { | |
+ f2fs_msg(sbi->sb, KERN_ERR, "%s: node %x has circular inode %x", | |
+ __func__, ino, nid); | |
+ f2fs_handle_error(sbi); | |
+ return -EDEADLK; | |
+ } | |
+ | |
+ /* Deallocate previous index in the node page */ | |
+ inode = f2fs_iget(sbi->sb, ino); | |
+ if (IS_ERR(inode)) | |
+ return PTR_ERR(inode); | |
+ | |
+ bidx = start_bidx_of_node(offset, F2FS_I(inode)) + | |
+ le16_to_cpu(sum.ofs_in_node); | |
+ | |
+ truncate_hole(inode, bidx, bidx + 1); | |
+ iput(inode); | |
+ return 0; | |
+} | |
+ | |
+static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |
+ struct page *page, block_t blkaddr) | |
+{ | |
+ struct f2fs_inode_info *fi = F2FS_I(inode); | |
+ unsigned int start, end; | |
+ struct dnode_of_data dn; | |
+ struct f2fs_summary sum; | |
+ struct node_info ni; | |
+ int err = 0, recovered = 0; | |
+ int ilock; | |
+ | |
+ start = start_bidx_of_node(ofs_of_node(page), fi); | |
+ if (IS_INODE(page)) | |
+ end = start + ADDRS_PER_INODE(fi); | |
+ else | |
+ end = start + ADDRS_PER_BLOCK; | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ set_new_dnode(&dn, inode, NULL, NULL, 0); | |
+ | |
+ err = get_dnode_of_data(&dn, start, ALLOC_NODE); | |
+ if (err) { | |
+ mutex_unlock_op(sbi, ilock); | |
+ f2fs_msg(sbi->sb, KERN_INFO, | |
+ "%s: get_dnode_of_data failed: %d", __func__, err); | |
+ return err; | |
+ } | |
+ | |
+ wait_on_page_writeback(dn.node_page); | |
+ | |
+ get_node_info(sbi, dn.nid, &ni); | |
+ BUG_ON(ni.ino != ino_of_node(page)); | |
+ BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page)); | |
+ | |
+ for (; start < end; start++) { | |
+ block_t src, dest; | |
+ | |
+ src = datablock_addr(dn.node_page, dn.ofs_in_node); | |
+ dest = datablock_addr(page, dn.ofs_in_node); | |
+ | |
+ if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { | |
+ if (src == NULL_ADDR) { | |
+ int err = reserve_new_block(&dn); | |
+ /* We should not get -ENOSPC */ | |
+ if (err) | |
+ f2fs_msg(sbi->sb, KERN_INFO, | |
+ "%s: reserve_new_block failed: %d", | |
+ __func__, err); | |
+ BUG_ON(err); | |
+ } | |
+ | |
+ /* Check the previous node page having this index */ | |
+ err = check_index_in_prev_nodes(sbi, dest, &dn); | |
+ if (err) | |
+ goto err; | |
+ | |
+ set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); | |
+ | |
+ /* write dummy data page */ | |
+ recover_data_page(sbi, NULL, &sum, src, dest); | |
+ update_extent_cache(dest, &dn); | |
+ recovered++; | |
+ } | |
+ dn.ofs_in_node++; | |
+ } | |
+ | |
+ /* write node page in place */ | |
+ set_summary(&sum, dn.nid, 0, 0); | |
+ if (IS_INODE(dn.node_page)) | |
+ sync_inode_page(&dn); | |
+ | |
+ copy_node_footer(dn.node_page, page); | |
+ fill_node_footer(dn.node_page, dn.nid, ni.ino, | |
+ ofs_of_node(page), false); | |
+ set_page_dirty(dn.node_page); | |
+ | |
+ recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); | |
+err: | |
+ f2fs_put_dnode(&dn); | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+ f2fs_msg(sbi->sb, KERN_DEBUG, "recover_data: ino = %lx, " | |
+ "recovered_data = %d blocks, err = %d", | |
+ inode->i_ino, recovered, err); | |
+ return err; | |
+} | |
+ | |
+static int recover_data(struct f2fs_sb_info *sbi, | |
+ struct list_head *head, int type) | |
+{ | |
+ unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); | |
+ struct curseg_info *curseg; | |
+ struct page *page; | |
+ int err = 0; | |
+ block_t blkaddr; | |
+ | |
+ /* get node pages in the current segment */ | |
+ curseg = CURSEG_I(sbi, type); | |
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | |
+ | |
+ /* read node page */ | |
+ page = alloc_page(GFP_NOFS | __GFP_ZERO); | |
+ if (!page) | |
+ return -ENOMEM; | |
+ | |
+ lock_page(page); | |
+ | |
+ while (1) { | |
+ struct fsync_inode_entry *entry; | |
+ | |
+ err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); | |
+ if (err) { | |
+ f2fs_msg(sbi->sb, KERN_INFO, | |
+ "%s: f2fs_readpage failed: %d", | |
+ __func__, err); | |
+ goto out; | |
+ } | |
+ | |
+ lock_page(page); | |
+ | |
+ if (cp_ver != cpver_of_node(page)) | |
+ break; | |
+ | |
+ entry = get_fsync_inode(head, ino_of_node(page)); | |
+ if (!entry) | |
+ goto next; | |
+ | |
+ err = do_recover_data(sbi, entry->inode, page, blkaddr); | |
+ if (err) { | |
+ f2fs_msg(sbi->sb, KERN_INFO, | |
+ "%s: do_recover_data failed: %d", | |
+ __func__, err); | |
+ break; | |
+ } | |
+ | |
+ if (entry->blkaddr == blkaddr) { | |
+ iput(entry->inode); | |
+ list_del(&entry->list); | |
+ kmem_cache_free(fsync_entry_slab, entry); | |
+ } | |
+next: | |
+ /* check next segment */ | |
+ blkaddr = next_blkaddr_of_node(page); | |
+ } | |
+ unlock_page(page); | |
+out: | |
+ __free_pages(page, 0); | |
+ | |
+ if (!err) | |
+ allocate_new_segments(sbi); | |
+ return err; | |
+} | |
+ | |
+int recover_fsync_data(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct list_head inode_list; | |
+ int err; | |
+ | |
+ fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", | |
+ sizeof(struct fsync_inode_entry), NULL); | |
+ if (unlikely(!fsync_entry_slab)) | |
+ return -ENOMEM; | |
+ | |
+ INIT_LIST_HEAD(&inode_list); | |
+ | |
+ /* step #1: find fsynced inode numbers */ | |
+ sbi->por_doing = 1; | |
+ err = find_fsync_dnodes(sbi, &inode_list); | |
+ if (err) { | |
+ f2fs_msg(sbi->sb, KERN_INFO, | |
+ "%s: find_fsync_dnodes failed: %d", __func__, err); | |
+ goto out; | |
+ } | |
+ | |
+ if (list_empty(&inode_list)) | |
+ goto out; | |
+ | |
+ /* step #2: recover data */ | |
+ err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); | |
+ if (!list_empty(&inode_list)) { | |
+ f2fs_handle_error(sbi); | |
+ err = -EIO; | |
+ } | |
+out: | |
+ destroy_fsync_dnodes(&inode_list); | |
+ kmem_cache_destroy(fsync_entry_slab); | |
+ sbi->por_doing = 0; | |
+ if (!err) { | |
+ f2fs_msg(sbi->sb, KERN_INFO, "recovery complete"); | |
+ write_checkpoint(sbi, false); | |
+ } else | |
+ f2fs_msg(sbi->sb, KERN_ERR, "recovery did not fully complete"); | |
+ | |
+ return err; | |
+} | |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c | |
new file mode 100644 | |
index 0000000..cb8e70e | |
--- /dev/null | |
+++ b/fs/f2fs/segment.c | |
@@ -0,0 +1,1787 @@ | |
+/* | |
+ * fs/f2fs/segment.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/fs.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/bio.h> | |
+#include <linux/blkdev.h> | |
+#include <linux/prefetch.h> | |
+#include <linux/vmalloc.h> | |
+ | |
+#include "f2fs.h" | |
+#include "segment.h" | |
+#include "node.h" | |
+#include <trace/events/f2fs.h> | |
+ | |
+/* | |
+ * This function balances dirty node and dentry pages. | |
+ * In addition, it controls garbage collection. | |
+ */ | |
+void f2fs_balance_fs(struct f2fs_sb_info *sbi) | |
+{ | |
+ /* | |
+ * We should do GC or end up with checkpoint, if there are so many dirty | |
+ * dir/node pages without enough free segments. | |
+ */ | |
+ if (has_not_enough_free_secs(sbi, 0)) { | |
+ mutex_lock(&sbi->gc_mutex); | |
+ f2fs_gc(sbi); | |
+ } | |
+} | |
+ | |
+static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, | |
+ enum dirty_type dirty_type) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ | |
+ /* need not be added */ | |
+ if (IS_CURSEG(sbi, segno)) | |
+ return; | |
+ | |
+ if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) | |
+ dirty_i->nr_dirty[dirty_type]++; | |
+ | |
+ if (dirty_type == DIRTY) { | |
+ struct seg_entry *sentry = get_seg_entry(sbi, segno); | |
+ enum dirty_type t = DIRTY_HOT_DATA; | |
+ | |
+ dirty_type = sentry->type; | |
+ | |
+ if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) | |
+ dirty_i->nr_dirty[dirty_type]++; | |
+ | |
+ /* Only one bitmap should be set */ | |
+ for (; t <= DIRTY_COLD_NODE; t++) { | |
+ if (t == dirty_type) | |
+ continue; | |
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) | |
+ dirty_i->nr_dirty[t]--; | |
+ } | |
+ } | |
+} | |
+ | |
+static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, | |
+ enum dirty_type dirty_type) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ | |
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) | |
+ dirty_i->nr_dirty[dirty_type]--; | |
+ | |
+ if (dirty_type == DIRTY) { | |
+ enum dirty_type t = DIRTY_HOT_DATA; | |
+ | |
+ /* clear all the bitmaps */ | |
+ for (; t <= DIRTY_COLD_NODE; t++) | |
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) | |
+ dirty_i->nr_dirty[t]--; | |
+ | |
+ if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) | |
+ clear_bit(GET_SECNO(sbi, segno), | |
+ dirty_i->victim_secmap); | |
+ } | |
+} | |
+ | |
+/* | |
+ * Should not occur error such as -ENOMEM. | |
+ * Adding dirty entry into seglist is not critical operation. | |
+ * If a given segment is one of current working segments, it won't be added. | |
+ */ | |
+static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ unsigned short valid_blocks; | |
+ | |
+ if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) | |
+ return; | |
+ | |
+ mutex_lock(&dirty_i->seglist_lock); | |
+ | |
+ valid_blocks = get_valid_blocks(sbi, segno, 0); | |
+ | |
+ if (valid_blocks == 0) { | |
+ __locate_dirty_segment(sbi, segno, PRE); | |
+ __remove_dirty_segment(sbi, segno, DIRTY); | |
+ } else if (valid_blocks < sbi->blocks_per_seg) { | |
+ __locate_dirty_segment(sbi, segno, DIRTY); | |
+ } else { | |
+ /* Recovery routine with SSR needs this */ | |
+ __remove_dirty_segment(sbi, segno, DIRTY); | |
+ } | |
+ | |
+ mutex_unlock(&dirty_i->seglist_lock); | |
+} | |
+ | |
+/* | |
+ * Should call clear_prefree_segments after checkpoint is done. | |
+ */ | |
+static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ unsigned int segno = -1; | |
+ unsigned int total_segs = TOTAL_SEGS(sbi); | |
+ | |
+ mutex_lock(&dirty_i->seglist_lock); | |
+ while (1) { | |
+ segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, | |
+ segno + 1); | |
+ if (segno >= total_segs) | |
+ break; | |
+ __set_test_and_free(sbi, segno); | |
+ } | |
+ mutex_unlock(&dirty_i->seglist_lock); | |
+} | |
+ | |
+void clear_prefree_segments(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ unsigned int segno = -1; | |
+ unsigned int total_segs = TOTAL_SEGS(sbi); | |
+ | |
+ mutex_lock(&dirty_i->seglist_lock); | |
+ while (1) { | |
+ segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, | |
+ segno + 1); | |
+ if (segno >= total_segs) | |
+ break; | |
+ | |
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) | |
+ dirty_i->nr_dirty[PRE]--; | |
+ | |
+ /* Let's use trim */ | |
+ if (test_opt(sbi, DISCARD)) | |
+ blkdev_issue_discard(sbi->sb->s_bdev, | |
+ START_BLOCK(sbi, segno) << | |
+ sbi->log_sectors_per_block, | |
+ 1 << (sbi->log_sectors_per_block + | |
+ sbi->log_blocks_per_seg), | |
+ GFP_NOFS, 0); | |
+ } | |
+ mutex_unlock(&dirty_i->seglist_lock); | |
+} | |
+ | |
+static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) | |
+ sit_i->dirty_sentries++; | |
+} | |
+ | |
+static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, | |
+ unsigned int segno, int modified) | |
+{ | |
+ struct seg_entry *se = get_seg_entry(sbi, segno); | |
+ se->type = type; | |
+ if (modified) | |
+ __mark_sit_entry_dirty(sbi, segno); | |
+} | |
+ | |
+static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) | |
+{ | |
+ struct seg_entry *se; | |
+ unsigned int segno, offset; | |
+ long int new_vblocks; | |
+ bool check_map = false; | |
+ | |
+ segno = GET_SEGNO(sbi, blkaddr); | |
+ | |
+ se = get_seg_entry(sbi, segno); | |
+ new_vblocks = se->valid_blocks + del; | |
+ offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); | |
+ | |
+ if (new_vblocks < 0 || new_vblocks > sbi->blocks_per_seg || | |
+ (new_vblocks >> (sizeof(unsigned short) << 3))) | |
+ if (f2fs_handle_error(sbi)) | |
+ check_map = true; | |
+ | |
+ se->mtime = get_mtime(sbi); | |
+ SIT_I(sbi)->max_mtime = se->mtime; | |
+ | |
+ /* Update valid block bitmap */ | |
+ if (del > 0) { | |
+ if (f2fs_set_bit(offset, se->cur_valid_map)) | |
+ if (f2fs_handle_error(sbi)) | |
+ check_map = true; | |
+ } else { | |
+ if (!f2fs_clear_bit(offset, se->cur_valid_map)) | |
+ if (f2fs_handle_error(sbi)) | |
+ check_map = true; | |
+ } | |
+ | |
+ if (unlikely(check_map)) { | |
+ int i; | |
+ long int vblocks = 0; | |
+ | |
+ f2fs_msg(sbi->sb, KERN_ERR, | |
+ "cannot %svalidate block %u in segment %u with %hu valid blocks", | |
+ (del < 0) ? "in" : "", | |
+ offset, segno, se->valid_blocks); | |
+ | |
+ /* assume the count was stale to start */ | |
+ del = 0; | |
+ for (i = 0; i < sbi->blocks_per_seg; i++) | |
+ if (f2fs_test_bit(i, se->cur_valid_map)) | |
+ vblocks++; | |
+ if (vblocks != se->valid_blocks) { | |
+ f2fs_msg(sbi->sb, KERN_INFO, "correcting valid block " | |
+ "counts %d -> %ld", se->valid_blocks, vblocks); | |
+ /* make accounting corrections */ | |
+ del = vblocks - se->valid_blocks; | |
+ } | |
+ } | |
+ se->valid_blocks += del; | |
+ | |
+ if (!f2fs_test_bit(offset, se->ckpt_valid_map)) | |
+ se->ckpt_valid_blocks += del; | |
+ | |
+ __mark_sit_entry_dirty(sbi, segno); | |
+ | |
+ /* update total number of valid blocks to be written in ckpt area */ | |
+ SIT_I(sbi)->written_valid_blocks += del; | |
+ | |
+ if (sbi->segs_per_sec > 1) | |
+ get_sec_entry(sbi, segno)->valid_blocks += del; | |
+} | |
+ | |
+static void refresh_sit_entry(struct f2fs_sb_info *sbi, | |
+ block_t old_blkaddr, block_t new_blkaddr) | |
+{ | |
+ update_sit_entry(sbi, new_blkaddr, 1); | |
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) | |
+ update_sit_entry(sbi, old_blkaddr, -1); | |
+} | |
+ | |
+void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) | |
+{ | |
+ unsigned int segno = GET_SEGNO(sbi, addr); | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ | |
+ BUG_ON(addr == NULL_ADDR); | |
+ if (addr == NEW_ADDR) | |
+ return; | |
+ | |
+ if (segno >= TOTAL_SEGS(sbi)) { | |
+ f2fs_msg(sbi->sb, KERN_ERR, "invalid segment number %u", segno); | |
+ if (f2fs_handle_error(sbi)) | |
+ return; | |
+ } | |
+ | |
+ /* add it into sit main buffer */ | |
+ mutex_lock(&sit_i->sentry_lock); | |
+ | |
+ update_sit_entry(sbi, addr, -1); | |
+ | |
+ /* add it into dirty seglist */ | |
+ locate_dirty_segment(sbi, segno); | |
+ | |
+ mutex_unlock(&sit_i->sentry_lock); | |
+} | |
+ | |
+/* | |
+ * This function should be resided under the curseg_mutex lock | |
+ */ | |
+static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, | |
+ struct f2fs_summary *sum) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ void *addr = curseg->sum_blk; | |
+ addr += curseg->next_blkoff * sizeof(struct f2fs_summary); | |
+ memcpy(addr, sum, sizeof(struct f2fs_summary)); | |
+} | |
+ | |
+/* | |
+ * Calculate the number of current summary pages for writing | |
+ */ | |
+int npages_for_summary_flush(struct f2fs_sb_info *sbi) | |
+{ | |
+ int total_size_bytes = 0; | |
+ int valid_sum_count = 0; | |
+ int i, sum_space; | |
+ | |
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { | |
+ if (sbi->ckpt->alloc_type[i] == SSR) | |
+ valid_sum_count += sbi->blocks_per_seg; | |
+ else | |
+ valid_sum_count += curseg_blkoff(sbi, i); | |
+ } | |
+ | |
+ total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) | |
+ + sizeof(struct nat_journal) + 2 | |
+ + sizeof(struct sit_journal) + 2; | |
+ sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE; | |
+ if (total_size_bytes < sum_space) | |
+ return 1; | |
+ else if (total_size_bytes < 2 * sum_space) | |
+ return 2; | |
+ return 3; | |
+} | |
+ | |
+/* | |
+ * Caller should put this summary page | |
+ */ | |
+struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) | |
+{ | |
+ return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); | |
+} | |
+ | |
+static void write_sum_page(struct f2fs_sb_info *sbi, | |
+ struct f2fs_summary_block *sum_blk, block_t blk_addr) | |
+{ | |
+ struct page *page = grab_meta_page(sbi, blk_addr); | |
+ void *kaddr = page_address(page); | |
+ memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE); | |
+ set_page_dirty(page); | |
+ f2fs_put_page(page, 1); | |
+} | |
+ | |
+static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ unsigned int segno = curseg->segno + 1; | |
+ struct free_segmap_info *free_i = FREE_I(sbi); | |
+ | |
+ if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) | |
+ return !test_bit(segno, free_i->free_segmap); | |
+ return 0; | |
+} | |
+ | |
+/* | |
+ * Find a new segment from the free segments bitmap to right order | |
+ * This function should be returned with success, otherwise BUG | |
+ */ | |
+static void get_new_segment(struct f2fs_sb_info *sbi, | |
+ unsigned int *newseg, bool new_sec, int dir) | |
+{ | |
+ struct free_segmap_info *free_i = FREE_I(sbi); | |
+ unsigned int segno, secno, zoneno; | |
+ unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; | |
+ unsigned int hint = *newseg / sbi->segs_per_sec; | |
+ unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); | |
+ unsigned int left_start = hint; | |
+ bool init = true; | |
+ int go_left = 0; | |
+ int i; | |
+ | |
+ write_lock(&free_i->segmap_lock); | |
+ | |
+ if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { | |
+ segno = find_next_zero_bit(free_i->free_segmap, | |
+ TOTAL_SEGS(sbi), *newseg + 1); | |
+ if (segno - *newseg < sbi->segs_per_sec - | |
+ (*newseg % sbi->segs_per_sec)) | |
+ goto got_it; | |
+ } | |
+find_other_zone: | |
+ secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); | |
+ if (secno >= TOTAL_SECS(sbi)) { | |
+ if (dir == ALLOC_RIGHT) { | |
+ secno = find_next_zero_bit(free_i->free_secmap, | |
+ TOTAL_SECS(sbi), 0); | |
+ BUG_ON(secno >= TOTAL_SECS(sbi)); | |
+ } else { | |
+ go_left = 1; | |
+ left_start = hint - 1; | |
+ } | |
+ } | |
+ if (go_left == 0) | |
+ goto skip_left; | |
+ | |
+ while (test_bit(left_start, free_i->free_secmap)) { | |
+ if (left_start > 0) { | |
+ left_start--; | |
+ continue; | |
+ } | |
+ left_start = find_next_zero_bit(free_i->free_secmap, | |
+ TOTAL_SECS(sbi), 0); | |
+ BUG_ON(left_start >= TOTAL_SECS(sbi)); | |
+ break; | |
+ } | |
+ secno = left_start; | |
+skip_left: | |
+ hint = secno; | |
+ segno = secno * sbi->segs_per_sec; | |
+ zoneno = secno / sbi->secs_per_zone; | |
+ | |
+ /* give up on finding another zone */ | |
+ if (!init) | |
+ goto got_it; | |
+ if (sbi->secs_per_zone == 1) | |
+ goto got_it; | |
+ if (zoneno == old_zoneno) | |
+ goto got_it; | |
+ if (dir == ALLOC_LEFT) { | |
+ if (!go_left && zoneno + 1 >= total_zones) | |
+ goto got_it; | |
+ if (go_left && zoneno == 0) | |
+ goto got_it; | |
+ } | |
+ for (i = 0; i < NR_CURSEG_TYPE; i++) | |
+ if (CURSEG_I(sbi, i)->zone == zoneno) | |
+ break; | |
+ | |
+ if (i < NR_CURSEG_TYPE) { | |
+ /* zone is in user, try another */ | |
+ if (go_left) | |
+ hint = zoneno * sbi->secs_per_zone - 1; | |
+ else if (zoneno + 1 >= total_zones) | |
+ hint = 0; | |
+ else | |
+ hint = (zoneno + 1) * sbi->secs_per_zone; | |
+ init = false; | |
+ goto find_other_zone; | |
+ } | |
+got_it: | |
+ /* set it as dirty segment in free segmap */ | |
+ BUG_ON(test_bit(segno, free_i->free_segmap)); | |
+ __set_inuse(sbi, segno); | |
+ *newseg = segno; | |
+ write_unlock(&free_i->segmap_lock); | |
+} | |
+ | |
+static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ struct summary_footer *sum_footer; | |
+ | |
+ curseg->segno = curseg->next_segno; | |
+ curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); | |
+ curseg->next_blkoff = 0; | |
+ curseg->next_segno = NULL_SEGNO; | |
+ | |
+ sum_footer = &(curseg->sum_blk->footer); | |
+ memset(sum_footer, 0, sizeof(struct summary_footer)); | |
+ if (IS_DATASEG(type)) | |
+ SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); | |
+ if (IS_NODESEG(type)) | |
+ SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); | |
+ __set_sit_entry_type(sbi, type, curseg->segno, modified); | |
+} | |
+ | |
+/* | |
+ * Allocate a current working segment. | |
+ * This function always allocates a free segment in LFS manner. | |
+ */ | |
+static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ unsigned int segno = curseg->segno; | |
+ int dir = ALLOC_LEFT; | |
+ | |
+ write_sum_page(sbi, curseg->sum_blk, | |
+ GET_SUM_BLOCK(sbi, segno)); | |
+ if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) | |
+ dir = ALLOC_RIGHT; | |
+ | |
+ if (test_opt(sbi, NOHEAP)) | |
+ dir = ALLOC_RIGHT; | |
+ | |
+ get_new_segment(sbi, &segno, new_sec, dir); | |
+ curseg->next_segno = segno; | |
+ reset_curseg(sbi, type, 1); | |
+ curseg->alloc_type = LFS; | |
+} | |
+ | |
+static void __next_free_blkoff(struct f2fs_sb_info *sbi, | |
+ struct curseg_info *seg, block_t start) | |
+{ | |
+ struct seg_entry *se = get_seg_entry(sbi, seg->segno); | |
+ block_t ofs; | |
+ for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { | |
+ if (!f2fs_test_bit(ofs, se->ckpt_valid_map) | |
+ && !f2fs_test_bit(ofs, se->cur_valid_map)) | |
+ break; | |
+ } | |
+ seg->next_blkoff = ofs; | |
+} | |
+ | |
+/* | |
+ * If a segment is written by LFS manner, next block offset is just obtained | |
+ * by increasing the current block offset. However, if a segment is written by | |
+ * SSR manner, next block offset obtained by calling __next_free_blkoff | |
+ */ | |
+static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, | |
+ struct curseg_info *seg) | |
+{ | |
+ if (seg->alloc_type == SSR) | |
+ __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); | |
+ else | |
+ seg->next_blkoff++; | |
+} | |
+ | |
+/* | |
+ * This function always allocates a used segment (from dirty seglist) by SSR | |
+ * manner, so it should recover the existing segment information of valid blocks | |
+ */ | |
+static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ unsigned int new_segno = curseg->next_segno; | |
+ struct f2fs_summary_block *sum_node; | |
+ struct page *sum_page; | |
+ | |
+ write_sum_page(sbi, curseg->sum_blk, | |
+ GET_SUM_BLOCK(sbi, curseg->segno)); | |
+ __set_test_and_inuse(sbi, new_segno); | |
+ | |
+ mutex_lock(&dirty_i->seglist_lock); | |
+ __remove_dirty_segment(sbi, new_segno, PRE); | |
+ __remove_dirty_segment(sbi, new_segno, DIRTY); | |
+ mutex_unlock(&dirty_i->seglist_lock); | |
+ | |
+ reset_curseg(sbi, type, 1); | |
+ curseg->alloc_type = SSR; | |
+ __next_free_blkoff(sbi, curseg, 0); | |
+ | |
+ if (reuse) { | |
+ sum_page = get_sum_page(sbi, new_segno); | |
+ sum_node = (struct f2fs_summary_block *)page_address(sum_page); | |
+ memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); | |
+ f2fs_put_page(sum_page, 1); | |
+ } | |
+} | |
+ | |
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; | |
+ | |
+ if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0)) | |
+ return v_ops->get_victim(sbi, | |
+ &(curseg)->next_segno, BG_GC, type, SSR); | |
+ | |
+ /* For data segments, let's do SSR more intensively */ | |
+ for (; type >= CURSEG_HOT_DATA; type--) | |
+ if (v_ops->get_victim(sbi, &(curseg)->next_segno, | |
+ BG_GC, type, SSR)) | |
+ return 1; | |
+ return 0; | |
+} | |
+ | |
+/* | |
+ * flush out current segment and replace it with new segment | |
+ * This function should be returned with success, otherwise BUG | |
+ */ | |
+static void allocate_segment_by_default(struct f2fs_sb_info *sbi, | |
+ int type, bool force) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ | |
+ if (force) | |
+ new_curseg(sbi, type, true); | |
+ else if (type == CURSEG_WARM_NODE) | |
+ new_curseg(sbi, type, false); | |
+ else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) | |
+ new_curseg(sbi, type, false); | |
+ else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) | |
+ change_curseg(sbi, type, true); | |
+ else | |
+ new_curseg(sbi, type, false); | |
+#ifdef CONFIG_F2FS_STAT_FS | |
+ sbi->segment_count[curseg->alloc_type]++; | |
+#endif | |
+} | |
+ | |
+void allocate_new_segments(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct curseg_info *curseg; | |
+ unsigned int old_curseg; | |
+ int i; | |
+ | |
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { | |
+ curseg = CURSEG_I(sbi, i); | |
+ old_curseg = curseg->segno; | |
+ SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); | |
+ locate_dirty_segment(sbi, old_curseg); | |
+ } | |
+} | |
+ | |
+static const struct segment_allocation default_salloc_ops = { | |
+ .allocate_segment = allocate_segment_by_default, | |
+}; | |
+ | |
+static void f2fs_end_io_write(struct bio *bio, int err) | |
+{ | |
+ const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | |
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | |
+ struct bio_private *p = bio->bi_private; | |
+ | |
+ do { | |
+ struct page *page = bvec->bv_page; | |
+ | |
+ if (--bvec >= bio->bi_io_vec) | |
+ prefetchw(&bvec->bv_page->flags); | |
+ if (!uptodate) { | |
+ SetPageError(page); | |
+ if (page->mapping) | |
+ set_bit(AS_EIO, &page->mapping->flags); | |
+ set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); | |
+ p->sbi->sb->s_flags |= MS_RDONLY; | |
+ } | |
+ end_page_writeback(page); | |
+ dec_page_count(p->sbi, F2FS_WRITEBACK); | |
+ } while (bvec >= bio->bi_io_vec); | |
+ | |
+ if (p->is_sync) | |
+ complete(p->wait); | |
+ kfree(p); | |
+ bio_put(bio); | |
+} | |
+ | |
+struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) | |
+{ | |
+ struct bio *bio; | |
+ | |
+ /* No failure on bio allocation */ | |
+ bio = bio_alloc(GFP_NOIO, npages); | |
+ bio->bi_bdev = bdev; | |
+ bio->bi_private = NULL; | |
+ | |
+ return bio; | |
+} | |
+ | |
+static void do_submit_bio(struct f2fs_sb_info *sbi, | |
+ enum page_type type, bool sync) | |
+{ | |
+ int rw = sync ? WRITE_SYNC : WRITE; | |
+ enum page_type btype = type > META ? META : type; | |
+ | |
+ if (type >= META_FLUSH) | |
+ rw = WRITE_FLUSH_FUA; | |
+ | |
+ if (btype == META) | |
+ rw |= REQ_META; | |
+ | |
+ if (sbi->bio[btype]) { | |
+ struct bio_private *p = sbi->bio[btype]->bi_private; | |
+ p->sbi = sbi; | |
+ sbi->bio[btype]->bi_end_io = f2fs_end_io_write; | |
+ | |
+ trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); | |
+ | |
+ if (type == META_FLUSH) { | |
+ DECLARE_COMPLETION_ONSTACK(wait); | |
+ p->is_sync = true; | |
+ p->wait = &wait; | |
+ submit_bio(rw, sbi->bio[btype]); | |
+ wait_for_completion(&wait); | |
+ } else { | |
+ p->is_sync = false; | |
+ submit_bio(rw, sbi->bio[btype]); | |
+ } | |
+ sbi->bio[btype] = NULL; | |
+ } | |
+} | |
+ | |
+void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) | |
+{ | |
+ down_write(&sbi->bio_sem); | |
+ do_submit_bio(sbi, type, sync); | |
+ up_write(&sbi->bio_sem); | |
+} | |
+ | |
+static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, | |
+ block_t blk_addr, enum page_type type) | |
+{ | |
+ struct block_device *bdev = sbi->sb->s_bdev; | |
+ | |
+ verify_block_addr(sbi, blk_addr); | |
+ | |
+ down_write(&sbi->bio_sem); | |
+ | |
+ inc_page_count(sbi, F2FS_WRITEBACK); | |
+ | |
+ if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) | |
+ do_submit_bio(sbi, type, false); | |
+alloc_new: | |
+ if (sbi->bio[type] == NULL) { | |
+ struct bio_private *priv; | |
+retry: | |
+ priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); | |
+ if (!priv) { | |
+ cond_resched(); | |
+ goto retry; | |
+ } | |
+ | |
+ sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); | |
+ sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); | |
+ sbi->bio[type]->bi_private = priv; | |
+ /* | |
+ * The end_io will be assigned at the sumbission phase. | |
+ * Until then, let bio_add_page() merge consecutive IOs as much | |
+ * as possible. | |
+ */ | |
+ } | |
+ | |
+ if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < | |
+ PAGE_CACHE_SIZE) { | |
+ do_submit_bio(sbi, type, false); | |
+ goto alloc_new; | |
+ } | |
+ | |
+ sbi->last_block_in_bio[type] = blk_addr; | |
+ | |
+ up_write(&sbi->bio_sem); | |
+ trace_f2fs_submit_write_page(page, blk_addr, type); | |
+} | |
+ | |
+void f2fs_wait_on_page_writeback(struct page *page, | |
+ enum page_type type, bool sync) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | |
+ if (PageWriteback(page)) { | |
+ f2fs_submit_bio(sbi, type, sync); | |
+ wait_on_page_writeback(page); | |
+ } | |
+} | |
+ | |
+static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ if (curseg->next_blkoff < sbi->blocks_per_seg) | |
+ return true; | |
+ return false; | |
+} | |
+ | |
+static int __get_segment_type_2(struct page *page, enum page_type p_type) | |
+{ | |
+ if (p_type == DATA) | |
+ return CURSEG_HOT_DATA; | |
+ else | |
+ return CURSEG_HOT_NODE; | |
+} | |
+ | |
+static int __get_segment_type_4(struct page *page, enum page_type p_type) | |
+{ | |
+ if (p_type == DATA) { | |
+ struct inode *inode = page->mapping->host; | |
+ | |
+ if (S_ISDIR(inode->i_mode)) | |
+ return CURSEG_HOT_DATA; | |
+ else | |
+ return CURSEG_COLD_DATA; | |
+ } else { | |
+ if (IS_DNODE(page) && !is_cold_node(page)) | |
+ return CURSEG_HOT_NODE; | |
+ else | |
+ return CURSEG_COLD_NODE; | |
+ } | |
+} | |
+ | |
+static int __get_segment_type_6(struct page *page, enum page_type p_type) | |
+{ | |
+ if (p_type == DATA) { | |
+ struct inode *inode = page->mapping->host; | |
+ | |
+ if (S_ISDIR(inode->i_mode)) | |
+ return CURSEG_HOT_DATA; | |
+ else if (is_cold_data(page) || file_is_cold(inode)) | |
+ return CURSEG_COLD_DATA; | |
+ else | |
+ return CURSEG_WARM_DATA; | |
+ } else { | |
+ if (IS_DNODE(page)) | |
+ return is_cold_node(page) ? CURSEG_WARM_NODE : | |
+ CURSEG_HOT_NODE; | |
+ else | |
+ return CURSEG_COLD_NODE; | |
+ } | |
+} | |
+ | |
+static int __get_segment_type(struct page *page, enum page_type p_type) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | |
+ switch (sbi->active_logs) { | |
+ case 2: | |
+ return __get_segment_type_2(page, p_type); | |
+ case 4: | |
+ return __get_segment_type_4(page, p_type); | |
+ } | |
+ /* NR_CURSEG_TYPE(6) logs by default */ | |
+ BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); | |
+ return __get_segment_type_6(page, p_type); | |
+} | |
+ | |
+static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, | |
+ block_t old_blkaddr, block_t *new_blkaddr, | |
+ struct f2fs_summary *sum, enum page_type p_type) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ struct curseg_info *curseg; | |
+ unsigned int old_cursegno; | |
+ int type; | |
+ | |
+ type = __get_segment_type(page, p_type); | |
+ curseg = CURSEG_I(sbi, type); | |
+ | |
+ mutex_lock(&curseg->curseg_mutex); | |
+ | |
+ *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | |
+ old_cursegno = curseg->segno; | |
+ | |
+ /* | |
+ * __add_sum_entry should be resided under the curseg_mutex | |
+ * because, this function updates a summary entry in the | |
+ * current summary block. | |
+ */ | |
+ __add_sum_entry(sbi, type, sum); | |
+ | |
+ mutex_lock(&sit_i->sentry_lock); | |
+ __refresh_next_blkoff(sbi, curseg); | |
+#ifdef CONFIG_F2FS_STAT_FS | |
+ sbi->block_count[curseg->alloc_type]++; | |
+#endif | |
+ | |
+ /* | |
+ * SIT information should be updated before segment allocation, | |
+ * since SSR needs latest valid block information. | |
+ */ | |
+ refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); | |
+ | |
+ if (!__has_curseg_space(sbi, type)) | |
+ sit_i->s_ops->allocate_segment(sbi, type, false); | |
+ | |
+ locate_dirty_segment(sbi, old_cursegno); | |
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); | |
+ mutex_unlock(&sit_i->sentry_lock); | |
+ | |
+ if (p_type == NODE) | |
+ fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); | |
+ | |
+ /* writeout dirty page into bdev */ | |
+ submit_write_page(sbi, page, *new_blkaddr, p_type); | |
+ | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+} | |
+ | |
+void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) | |
+{ | |
+ set_page_writeback(page); | |
+ submit_write_page(sbi, page, page->index, META); | |
+} | |
+ | |
+void write_node_page(struct f2fs_sb_info *sbi, struct page *page, | |
+ unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) | |
+{ | |
+ struct f2fs_summary sum; | |
+ set_summary(&sum, nid, 0, 0); | |
+ do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); | |
+} | |
+ | |
+void write_data_page(struct inode *inode, struct page *page, | |
+ struct dnode_of_data *dn, block_t old_blkaddr, | |
+ block_t *new_blkaddr) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct f2fs_summary sum; | |
+ struct node_info ni; | |
+ | |
+ BUG_ON(old_blkaddr == NULL_ADDR); | |
+ get_node_info(sbi, dn->nid, &ni); | |
+ set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); | |
+ | |
+ do_write_page(sbi, page, old_blkaddr, | |
+ new_blkaddr, &sum, DATA); | |
+} | |
+ | |
+void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, | |
+ block_t old_blk_addr) | |
+{ | |
+ submit_write_page(sbi, page, old_blk_addr, DATA); | |
+} | |
+ | |
+void recover_data_page(struct f2fs_sb_info *sbi, | |
+ struct page *page, struct f2fs_summary *sum, | |
+ block_t old_blkaddr, block_t new_blkaddr) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ struct curseg_info *curseg; | |
+ unsigned int segno, old_cursegno; | |
+ struct seg_entry *se; | |
+ int type; | |
+ | |
+ segno = GET_SEGNO(sbi, new_blkaddr); | |
+ se = get_seg_entry(sbi, segno); | |
+ type = se->type; | |
+ | |
+ if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { | |
+ if (old_blkaddr == NULL_ADDR) | |
+ type = CURSEG_COLD_DATA; | |
+ else | |
+ type = CURSEG_WARM_DATA; | |
+ } | |
+ curseg = CURSEG_I(sbi, type); | |
+ | |
+ mutex_lock(&curseg->curseg_mutex); | |
+ mutex_lock(&sit_i->sentry_lock); | |
+ | |
+ old_cursegno = curseg->segno; | |
+ | |
+ /* change the current segment */ | |
+ if (segno != curseg->segno) { | |
+ curseg->next_segno = segno; | |
+ change_curseg(sbi, type, true); | |
+ } | |
+ | |
+ curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & | |
+ (sbi->blocks_per_seg - 1); | |
+ __add_sum_entry(sbi, type, sum); | |
+ | |
+ refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); | |
+ | |
+ locate_dirty_segment(sbi, old_cursegno); | |
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); | |
+ | |
+ mutex_unlock(&sit_i->sentry_lock); | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+} | |
+ | |
+void rewrite_node_page(struct f2fs_sb_info *sbi, | |
+ struct page *page, struct f2fs_summary *sum, | |
+ block_t old_blkaddr, block_t new_blkaddr) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ int type = CURSEG_WARM_NODE; | |
+ struct curseg_info *curseg; | |
+ unsigned int segno, old_cursegno; | |
+ block_t next_blkaddr = next_blkaddr_of_node(page); | |
+ unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); | |
+ | |
+ curseg = CURSEG_I(sbi, type); | |
+ | |
+ mutex_lock(&curseg->curseg_mutex); | |
+ mutex_lock(&sit_i->sentry_lock); | |
+ | |
+ segno = GET_SEGNO(sbi, new_blkaddr); | |
+ old_cursegno = curseg->segno; | |
+ | |
+ /* change the current segment */ | |
+ if (segno != curseg->segno) { | |
+ curseg->next_segno = segno; | |
+ change_curseg(sbi, type, true); | |
+ } | |
+ curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & | |
+ (sbi->blocks_per_seg - 1); | |
+ __add_sum_entry(sbi, type, sum); | |
+ | |
+ /* change the current log to the next block addr in advance */ | |
+ if (next_segno != segno) { | |
+ curseg->next_segno = next_segno; | |
+ change_curseg(sbi, type, true); | |
+ } | |
+ curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & | |
+ (sbi->blocks_per_seg - 1); | |
+ | |
+ /* rewrite node page */ | |
+ set_page_writeback(page); | |
+ submit_write_page(sbi, page, new_blkaddr, NODE); | |
+ f2fs_submit_bio(sbi, NODE, true); | |
+ refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); | |
+ | |
+ locate_dirty_segment(sbi, old_cursegno); | |
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); | |
+ | |
+ mutex_unlock(&sit_i->sentry_lock); | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+} | |
+ | |
+static int read_compacted_summaries(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ struct curseg_info *seg_i; | |
+ unsigned char *kaddr; | |
+ struct page *page; | |
+ block_t start; | |
+ int i, j, offset; | |
+ | |
+ start = start_sum_block(sbi); | |
+ | |
+ page = get_meta_page(sbi, start++); | |
+ kaddr = (unsigned char *)page_address(page); | |
+ | |
+ /* Step 1: restore nat cache */ | |
+ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); | |
+ memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE); | |
+ | |
+ /* Step 2: restore sit cache */ | |
+ seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); | |
+ memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE, | |
+ SUM_JOURNAL_SIZE); | |
+ offset = 2 * SUM_JOURNAL_SIZE; | |
+ | |
+ /* Step 3: restore summary entries */ | |
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { | |
+ unsigned short blk_off; | |
+ unsigned int segno; | |
+ | |
+ seg_i = CURSEG_I(sbi, i); | |
+ segno = le32_to_cpu(ckpt->cur_data_segno[i]); | |
+ blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); | |
+ seg_i->next_segno = segno; | |
+ reset_curseg(sbi, i, 0); | |
+ seg_i->alloc_type = ckpt->alloc_type[i]; | |
+ seg_i->next_blkoff = blk_off; | |
+ | |
+ if (seg_i->alloc_type == SSR) | |
+ blk_off = sbi->blocks_per_seg; | |
+ | |
+ for (j = 0; j < blk_off; j++) { | |
+ struct f2fs_summary *s; | |
+ s = (struct f2fs_summary *)(kaddr + offset); | |
+ seg_i->sum_blk->entries[j] = *s; | |
+ offset += SUMMARY_SIZE; | |
+ if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE - | |
+ SUM_FOOTER_SIZE) | |
+ continue; | |
+ | |
+ f2fs_put_page(page, 1); | |
+ page = NULL; | |
+ | |
+ page = get_meta_page(sbi, start++); | |
+ kaddr = (unsigned char *)page_address(page); | |
+ offset = 0; | |
+ } | |
+ } | |
+ f2fs_put_page(page, 1); | |
+ return 0; | |
+} | |
+ | |
+static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) | |
+{ | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ struct f2fs_summary_block *sum; | |
+ struct curseg_info *curseg; | |
+ struct page *new; | |
+ unsigned short blk_off; | |
+ unsigned int segno = 0; | |
+ block_t blk_addr = 0; | |
+ | |
+ /* get segment number and block addr */ | |
+ if (IS_DATASEG(type)) { | |
+ segno = le32_to_cpu(ckpt->cur_data_segno[type]); | |
+ blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - | |
+ CURSEG_HOT_DATA]); | |
+ if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) | |
+ blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); | |
+ else | |
+ blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); | |
+ } else { | |
+ segno = le32_to_cpu(ckpt->cur_node_segno[type - | |
+ CURSEG_HOT_NODE]); | |
+ blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - | |
+ CURSEG_HOT_NODE]); | |
+ if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) | |
+ blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, | |
+ type - CURSEG_HOT_NODE); | |
+ else | |
+ blk_addr = GET_SUM_BLOCK(sbi, segno); | |
+ } | |
+ | |
+ new = get_meta_page(sbi, blk_addr); | |
+ sum = (struct f2fs_summary_block *)page_address(new); | |
+ | |
+ if (IS_NODESEG(type)) { | |
+ if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { | |
+ struct f2fs_summary *ns = &sum->entries[0]; | |
+ int i; | |
+ for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { | |
+ ns->version = 0; | |
+ ns->ofs_in_node = 0; | |
+ } | |
+ } else { | |
+ if (restore_node_summary(sbi, segno, sum)) { | |
+ f2fs_put_page(new, 1); | |
+ return -EINVAL; | |
+ } | |
+ } | |
+ } | |
+ | |
+ /* set uncompleted segment to curseg */ | |
+ curseg = CURSEG_I(sbi, type); | |
+ mutex_lock(&curseg->curseg_mutex); | |
+ memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE); | |
+ curseg->next_segno = segno; | |
+ reset_curseg(sbi, type, 0); | |
+ curseg->alloc_type = ckpt->alloc_type[type]; | |
+ curseg->next_blkoff = blk_off; | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+ f2fs_put_page(new, 1); | |
+ return 0; | |
+} | |
+ | |
+static int restore_curseg_summaries(struct f2fs_sb_info *sbi) | |
+{ | |
+ int type = CURSEG_HOT_DATA; | |
+ | |
+ if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { | |
+ /* restore for compacted data summary */ | |
+ if (read_compacted_summaries(sbi)) | |
+ return -EINVAL; | |
+ type = CURSEG_HOT_NODE; | |
+ } | |
+ | |
+ for (; type <= CURSEG_COLD_NODE; type++) | |
+ if (read_normal_summaries(sbi, type)) | |
+ return -EINVAL; | |
+ return 0; | |
+} | |
+ | |
+static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) | |
+{ | |
+ struct page *page; | |
+ unsigned char *kaddr; | |
+ struct f2fs_summary *summary; | |
+ struct curseg_info *seg_i; | |
+ int written_size = 0; | |
+ int i, j; | |
+ | |
+ page = grab_meta_page(sbi, blkaddr++); | |
+ kaddr = (unsigned char *)page_address(page); | |
+ | |
+ /* Step 1: write nat cache */ | |
+ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); | |
+ memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE); | |
+ written_size += SUM_JOURNAL_SIZE; | |
+ | |
+ /* Step 2: write sit cache */ | |
+ seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); | |
+ memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits, | |
+ SUM_JOURNAL_SIZE); | |
+ written_size += SUM_JOURNAL_SIZE; | |
+ | |
+ set_page_dirty(page); | |
+ | |
+ /* Step 3: write summary entries */ | |
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { | |
+ unsigned short blkoff; | |
+ seg_i = CURSEG_I(sbi, i); | |
+ if (sbi->ckpt->alloc_type[i] == SSR) | |
+ blkoff = sbi->blocks_per_seg; | |
+ else | |
+ blkoff = curseg_blkoff(sbi, i); | |
+ | |
+ for (j = 0; j < blkoff; j++) { | |
+ if (!page) { | |
+ page = grab_meta_page(sbi, blkaddr++); | |
+ kaddr = (unsigned char *)page_address(page); | |
+ written_size = 0; | |
+ } | |
+ summary = (struct f2fs_summary *)(kaddr + written_size); | |
+ *summary = seg_i->sum_blk->entries[j]; | |
+ written_size += SUMMARY_SIZE; | |
+ set_page_dirty(page); | |
+ | |
+ if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - | |
+ SUM_FOOTER_SIZE) | |
+ continue; | |
+ | |
+ f2fs_put_page(page, 1); | |
+ page = NULL; | |
+ } | |
+ } | |
+ if (page) | |
+ f2fs_put_page(page, 1); | |
+} | |
+ | |
+static void write_normal_summaries(struct f2fs_sb_info *sbi, | |
+ block_t blkaddr, int type) | |
+{ | |
+ int i, end; | |
+ if (IS_DATASEG(type)) | |
+ end = type + NR_CURSEG_DATA_TYPE; | |
+ else | |
+ end = type + NR_CURSEG_NODE_TYPE; | |
+ | |
+ for (i = type; i < end; i++) { | |
+ struct curseg_info *sum = CURSEG_I(sbi, i); | |
+ mutex_lock(&sum->curseg_mutex); | |
+ write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type)); | |
+ mutex_unlock(&sum->curseg_mutex); | |
+ } | |
+} | |
+ | |
+void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) | |
+{ | |
+ if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) | |
+ write_compacted_summaries(sbi, start_blk); | |
+ else | |
+ write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); | |
+} | |
+ | |
+void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) | |
+{ | |
+ if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) | |
+ write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); | |
+} | |
+ | |
+int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, | |
+ unsigned int val, int alloc) | |
+{ | |
+ int i; | |
+ | |
+ if (type == NAT_JOURNAL) { | |
+ for (i = 0; i < nats_in_cursum(sum); i++) { | |
+ if (le32_to_cpu(nid_in_journal(sum, i)) == val) | |
+ return i; | |
+ } | |
+ if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) | |
+ return update_nats_in_cursum(sum, 1); | |
+ } else if (type == SIT_JOURNAL) { | |
+ for (i = 0; i < sits_in_cursum(sum); i++) | |
+ if (le32_to_cpu(segno_in_journal(sum, i)) == val) | |
+ return i; | |
+ if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) | |
+ return update_sits_in_cursum(sum, 1); | |
+ } | |
+ return -1; | |
+} | |
+ | |
+static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, | |
+ unsigned int segno) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); | |
+ block_t blk_addr = sit_i->sit_base_addr + offset; | |
+ | |
+ check_seg_range(sbi, segno); | |
+ | |
+ /* calculate sit block address */ | |
+ if (f2fs_test_bit(offset, sit_i->sit_bitmap)) | |
+ blk_addr += sit_i->sit_blocks; | |
+ | |
+ return get_meta_page(sbi, blk_addr); | |
+} | |
+ | |
+static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, | |
+ unsigned int start) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ struct page *src_page, *dst_page; | |
+ pgoff_t src_off, dst_off; | |
+ void *src_addr, *dst_addr; | |
+ | |
+ src_off = current_sit_addr(sbi, start); | |
+ dst_off = next_sit_addr(sbi, src_off); | |
+ | |
+ /* get current sit block page without lock */ | |
+ src_page = get_meta_page(sbi, src_off); | |
+ dst_page = grab_meta_page(sbi, dst_off); | |
+ BUG_ON(PageDirty(src_page)); | |
+ | |
+ src_addr = page_address(src_page); | |
+ dst_addr = page_address(dst_page); | |
+ memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); | |
+ | |
+ set_page_dirty(dst_page); | |
+ f2fs_put_page(src_page, 1); | |
+ | |
+ set_to_next_sit(sit_i, start); | |
+ | |
+ return dst_page; | |
+} | |
+ | |
+static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); | |
+ struct f2fs_summary_block *sum = curseg->sum_blk; | |
+ int i; | |
+ | |
+ /* | |
+ * If the journal area in the current summary is full of sit entries, | |
+ * all the sit entries will be flushed. Otherwise the sit entries | |
+ * are not able to replace with newly hot sit entries. | |
+ */ | |
+ if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { | |
+ for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { | |
+ unsigned int segno; | |
+ segno = le32_to_cpu(segno_in_journal(sum, i)); | |
+ __mark_sit_entry_dirty(sbi, segno); | |
+ } | |
+ update_sits_in_cursum(sum, -sits_in_cursum(sum)); | |
+ return 1; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+/* | |
+ * CP calls this function, which flushes SIT entries including sit_journal, | |
+ * and moves prefree segs to free segs. | |
+ */ | |
+void flush_sit_entries(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ unsigned long *bitmap = sit_i->dirty_sentries_bitmap; | |
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); | |
+ struct f2fs_summary_block *sum = curseg->sum_blk; | |
+ unsigned long nsegs = TOTAL_SEGS(sbi); | |
+ struct page *page = NULL; | |
+ struct f2fs_sit_block *raw_sit = NULL; | |
+ unsigned int start = 0, end = 0; | |
+ unsigned int segno = -1; | |
+ bool flushed; | |
+ | |
+ mutex_lock(&curseg->curseg_mutex); | |
+ mutex_lock(&sit_i->sentry_lock); | |
+ | |
+ /* | |
+ * "flushed" indicates whether sit entries in journal are flushed | |
+ * to the SIT area or not. | |
+ */ | |
+ flushed = flush_sits_in_journal(sbi); | |
+ | |
+ while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { | |
+ struct seg_entry *se = get_seg_entry(sbi, segno); | |
+ int sit_offset, offset; | |
+ | |
+ sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); | |
+ | |
+ if (flushed) | |
+ goto to_sit_page; | |
+ | |
+ offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); | |
+ if (offset >= 0) { | |
+ segno_in_journal(sum, offset) = cpu_to_le32(segno); | |
+ seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); | |
+ goto flush_done; | |
+ } | |
+to_sit_page: | |
+ if (!page || (start > segno) || (segno > end)) { | |
+ if (page) { | |
+ f2fs_put_page(page, 1); | |
+ page = NULL; | |
+ } | |
+ | |
+ start = START_SEGNO(sit_i, segno); | |
+ end = start + SIT_ENTRY_PER_BLOCK - 1; | |
+ | |
+ /* read sit block that will be updated */ | |
+ page = get_next_sit_page(sbi, start); | |
+ raw_sit = page_address(page); | |
+ } | |
+ | |
+ /* udpate entry in SIT block */ | |
+ seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); | |
+flush_done: | |
+ __clear_bit(segno, bitmap); | |
+ sit_i->dirty_sentries--; | |
+ } | |
+ mutex_unlock(&sit_i->sentry_lock); | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+ | |
+ /* writeout last modified SIT block */ | |
+ f2fs_put_page(page, 1); | |
+ | |
+ set_prefree_as_free_segments(sbi); | |
+} | |
+ | |
+static int build_sit_info(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ struct sit_info *sit_i; | |
+ unsigned int sit_segs, start; | |
+ char *src_bitmap, *dst_bitmap; | |
+ unsigned int bitmap_size; | |
+ | |
+ /* allocate memory for SIT information */ | |
+ sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); | |
+ if (!sit_i) | |
+ return -ENOMEM; | |
+ | |
+ SM_I(sbi)->sit_info = sit_i; | |
+ | |
+ sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); | |
+ if (!sit_i->sentries) | |
+ return -ENOMEM; | |
+ | |
+ bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); | |
+ sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); | |
+ if (!sit_i->dirty_sentries_bitmap) | |
+ return -ENOMEM; | |
+ | |
+ for (start = 0; start < TOTAL_SEGS(sbi); start++) { | |
+ sit_i->sentries[start].cur_valid_map | |
+ = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); | |
+ sit_i->sentries[start].ckpt_valid_map | |
+ = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); | |
+ if (!sit_i->sentries[start].cur_valid_map | |
+ || !sit_i->sentries[start].ckpt_valid_map) | |
+ return -ENOMEM; | |
+ } | |
+ | |
+ if (sbi->segs_per_sec > 1) { | |
+ sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * | |
+ sizeof(struct sec_entry)); | |
+ if (!sit_i->sec_entries) | |
+ return -ENOMEM; | |
+ } | |
+ | |
+ /* get information related with SIT */ | |
+ sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; | |
+ | |
+ /* setup SIT bitmap from ckeckpoint pack */ | |
+ bitmap_size = __bitmap_size(sbi, SIT_BITMAP); | |
+ src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); | |
+ | |
+ dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); | |
+ if (!dst_bitmap) | |
+ return -ENOMEM; | |
+ | |
+ /* init SIT information */ | |
+ sit_i->s_ops = &default_salloc_ops; | |
+ | |
+ sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); | |
+ sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; | |
+ sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count); | |
+ sit_i->sit_bitmap = dst_bitmap; | |
+ sit_i->bitmap_size = bitmap_size; | |
+ sit_i->dirty_sentries = 0; | |
+ sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; | |
+ sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); | |
+ sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; | |
+ mutex_init(&sit_i->sentry_lock); | |
+ return 0; | |
+} | |
+ | |
+static int build_free_segmap(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_sm_info *sm_info = SM_I(sbi); | |
+ struct free_segmap_info *free_i; | |
+ unsigned int bitmap_size, sec_bitmap_size; | |
+ | |
+ /* allocate memory for free segmap information */ | |
+ free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); | |
+ if (!free_i) | |
+ return -ENOMEM; | |
+ | |
+ SM_I(sbi)->free_info = free_i; | |
+ | |
+ bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); | |
+ free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); | |
+ if (!free_i->free_segmap) | |
+ return -ENOMEM; | |
+ | |
+ sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); | |
+ free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); | |
+ if (!free_i->free_secmap) | |
+ return -ENOMEM; | |
+ | |
+ /* set all segments as dirty temporarily */ | |
+ memset(free_i->free_segmap, 0xff, bitmap_size); | |
+ memset(free_i->free_secmap, 0xff, sec_bitmap_size); | |
+ | |
+ /* init free segmap information */ | |
+ free_i->start_segno = | |
+ (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); | |
+ free_i->free_segments = 0; | |
+ free_i->free_sections = 0; | |
+ rwlock_init(&free_i->segmap_lock); | |
+ return 0; | |
+} | |
+ | |
+static int build_curseg(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct curseg_info *array; | |
+ int i; | |
+ | |
+ array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); | |
+ if (!array) | |
+ return -ENOMEM; | |
+ | |
+ SM_I(sbi)->curseg_array = array; | |
+ | |
+ for (i = 0; i < NR_CURSEG_TYPE; i++) { | |
+ mutex_init(&array[i].curseg_mutex); | |
+ array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL); | |
+ if (!array[i].sum_blk) | |
+ return -ENOMEM; | |
+ array[i].segno = NULL_SEGNO; | |
+ array[i].next_blkoff = 0; | |
+ } | |
+ return restore_curseg_summaries(sbi); | |
+} | |
+ | |
+static void build_sit_entries(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); | |
+ struct f2fs_summary_block *sum = curseg->sum_blk; | |
+ unsigned int start; | |
+ | |
+ for (start = 0; start < TOTAL_SEGS(sbi); start++) { | |
+ struct seg_entry *se = &sit_i->sentries[start]; | |
+ struct f2fs_sit_block *sit_blk; | |
+ struct f2fs_sit_entry sit; | |
+ struct page *page; | |
+ int i; | |
+ | |
+ mutex_lock(&curseg->curseg_mutex); | |
+ for (i = 0; i < sits_in_cursum(sum); i++) { | |
+ if (le32_to_cpu(segno_in_journal(sum, i)) == start) { | |
+ sit = sit_in_journal(sum, i); | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+ goto got_it; | |
+ } | |
+ } | |
+ mutex_unlock(&curseg->curseg_mutex); | |
+ page = get_current_sit_page(sbi, start); | |
+ sit_blk = (struct f2fs_sit_block *)page_address(page); | |
+ sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; | |
+ f2fs_put_page(page, 1); | |
+got_it: | |
+ check_block_count(sbi, start, &sit); | |
+ seg_info_from_raw_sit(se, &sit); | |
+ if (sbi->segs_per_sec > 1) { | |
+ struct sec_entry *e = get_sec_entry(sbi, start); | |
+ e->valid_blocks += se->valid_blocks; | |
+ } | |
+ } | |
+} | |
+ | |
+static void init_free_segmap(struct f2fs_sb_info *sbi) | |
+{ | |
+ unsigned int start; | |
+ int type; | |
+ | |
+ for (start = 0; start < TOTAL_SEGS(sbi); start++) { | |
+ struct seg_entry *sentry = get_seg_entry(sbi, start); | |
+ if (!sentry->valid_blocks) | |
+ __set_free(sbi, start); | |
+ } | |
+ | |
+ /* set use the current segments */ | |
+ for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { | |
+ struct curseg_info *curseg_t = CURSEG_I(sbi, type); | |
+ __set_test_and_inuse(sbi, curseg_t->segno); | |
+ } | |
+} | |
+ | |
+static void init_dirty_segmap(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ struct free_segmap_info *free_i = FREE_I(sbi); | |
+ unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); | |
+ unsigned short valid_blocks; | |
+ | |
+ while (1) { | |
+ /* find dirty segment based on free segmap */ | |
+ segno = find_next_inuse(free_i, total_segs, offset); | |
+ if (segno >= total_segs) | |
+ break; | |
+ offset = segno + 1; | |
+ valid_blocks = get_valid_blocks(sbi, segno, 0); | |
+ if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) | |
+ continue; | |
+ mutex_lock(&dirty_i->seglist_lock); | |
+ __locate_dirty_segment(sbi, segno, DIRTY); | |
+ mutex_unlock(&dirty_i->seglist_lock); | |
+ } | |
+} | |
+ | |
+static int init_victim_secmap(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); | |
+ | |
+ dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); | |
+ if (!dirty_i->victim_secmap) | |
+ return -ENOMEM; | |
+ return 0; | |
+} | |
+ | |
+static int build_dirty_segmap(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct dirty_seglist_info *dirty_i; | |
+ unsigned int bitmap_size, i; | |
+ | |
+ /* allocate memory for dirty segments list information */ | |
+ dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); | |
+ if (!dirty_i) | |
+ return -ENOMEM; | |
+ | |
+ SM_I(sbi)->dirty_info = dirty_i; | |
+ mutex_init(&dirty_i->seglist_lock); | |
+ | |
+ bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); | |
+ | |
+ for (i = 0; i < NR_DIRTY_TYPE; i++) { | |
+ dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); | |
+ if (!dirty_i->dirty_segmap[i]) | |
+ return -ENOMEM; | |
+ } | |
+ | |
+ init_dirty_segmap(sbi); | |
+ return init_victim_secmap(sbi); | |
+} | |
+ | |
+/* | |
+ * Update min, max modified time for cost-benefit GC algorithm | |
+ */ | |
+static void init_min_max_mtime(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ unsigned int segno; | |
+ | |
+ mutex_lock(&sit_i->sentry_lock); | |
+ | |
+ sit_i->min_mtime = LLONG_MAX; | |
+ | |
+ for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { | |
+ unsigned int i; | |
+ unsigned long long mtime = 0; | |
+ | |
+ for (i = 0; i < sbi->segs_per_sec; i++) | |
+ mtime += get_seg_entry(sbi, segno + i)->mtime; | |
+ | |
+ mtime = div_u64(mtime, sbi->segs_per_sec); | |
+ | |
+ if (sit_i->min_mtime > mtime) | |
+ sit_i->min_mtime = mtime; | |
+ } | |
+ sit_i->max_mtime = get_mtime(sbi); | |
+ mutex_unlock(&sit_i->sentry_lock); | |
+} | |
+ | |
+int build_segment_manager(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ struct f2fs_sm_info *sm_info; | |
+ int err; | |
+ | |
+ sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); | |
+ if (!sm_info) | |
+ return -ENOMEM; | |
+ | |
+ /* init sm info */ | |
+ sbi->sm_info = sm_info; | |
+ INIT_LIST_HEAD(&sm_info->wblist_head); | |
+ spin_lock_init(&sm_info->wblist_lock); | |
+ sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); | |
+ sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); | |
+ sm_info->segment_count = le32_to_cpu(raw_super->segment_count); | |
+ sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); | |
+ sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); | |
+ sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); | |
+ sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); | |
+ | |
+ err = build_sit_info(sbi); | |
+ if (err) | |
+ return err; | |
+ err = build_free_segmap(sbi); | |
+ if (err) | |
+ return err; | |
+ err = build_curseg(sbi); | |
+ if (err) | |
+ return err; | |
+ | |
+ /* reinit free segmap based on SIT */ | |
+ build_sit_entries(sbi); | |
+ | |
+ init_free_segmap(sbi); | |
+ err = build_dirty_segmap(sbi); | |
+ if (err) | |
+ return err; | |
+ | |
+ init_min_max_mtime(sbi); | |
+ return 0; | |
+} | |
+ | |
+static void discard_dirty_segmap(struct f2fs_sb_info *sbi, | |
+ enum dirty_type dirty_type) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ | |
+ mutex_lock(&dirty_i->seglist_lock); | |
+ kfree(dirty_i->dirty_segmap[dirty_type]); | |
+ dirty_i->nr_dirty[dirty_type] = 0; | |
+ mutex_unlock(&dirty_i->seglist_lock); | |
+} | |
+ | |
+static void destroy_victim_secmap(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ kfree(dirty_i->victim_secmap); | |
+} | |
+ | |
+static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | |
+ int i; | |
+ | |
+ if (!dirty_i) | |
+ return; | |
+ | |
+ /* discard pre-free/dirty segments list */ | |
+ for (i = 0; i < NR_DIRTY_TYPE; i++) | |
+ discard_dirty_segmap(sbi, i); | |
+ | |
+ destroy_victim_secmap(sbi); | |
+ SM_I(sbi)->dirty_info = NULL; | |
+ kfree(dirty_i); | |
+} | |
+ | |
+static void destroy_curseg(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct curseg_info *array = SM_I(sbi)->curseg_array; | |
+ int i; | |
+ | |
+ if (!array) | |
+ return; | |
+ SM_I(sbi)->curseg_array = NULL; | |
+ for (i = 0; i < NR_CURSEG_TYPE; i++) | |
+ kfree(array[i].sum_blk); | |
+ kfree(array); | |
+} | |
+ | |
+static void destroy_free_segmap(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct free_segmap_info *free_i = SM_I(sbi)->free_info; | |
+ if (!free_i) | |
+ return; | |
+ SM_I(sbi)->free_info = NULL; | |
+ kfree(free_i->free_segmap); | |
+ kfree(free_i->free_secmap); | |
+ kfree(free_i); | |
+} | |
+ | |
+static void destroy_sit_info(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ unsigned int start; | |
+ | |
+ if (!sit_i) | |
+ return; | |
+ | |
+ if (sit_i->sentries) { | |
+ for (start = 0; start < TOTAL_SEGS(sbi); start++) { | |
+ kfree(sit_i->sentries[start].cur_valid_map); | |
+ kfree(sit_i->sentries[start].ckpt_valid_map); | |
+ } | |
+ } | |
+ vfree(sit_i->sentries); | |
+ vfree(sit_i->sec_entries); | |
+ kfree(sit_i->dirty_sentries_bitmap); | |
+ | |
+ SM_I(sbi)->sit_info = NULL; | |
+ kfree(sit_i->sit_bitmap); | |
+ kfree(sit_i); | |
+} | |
+ | |
+void destroy_segment_manager(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_sm_info *sm_info = SM_I(sbi); | |
+ destroy_dirty_segmap(sbi); | |
+ destroy_curseg(sbi); | |
+ destroy_free_segmap(sbi); | |
+ destroy_sit_info(sbi); | |
+ sbi->sm_info = NULL; | |
+ kfree(sm_info); | |
+} | |
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h | |
new file mode 100644 | |
index 0000000..062424a | |
--- /dev/null | |
+++ b/fs/f2fs/segment.h | |
@@ -0,0 +1,637 @@ | |
+/* | |
+ * fs/f2fs/segment.h | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/blkdev.h> | |
+ | |
+/* constant macro */ | |
+#define NULL_SEGNO ((unsigned int)(~0)) | |
+#define NULL_SECNO ((unsigned int)(~0)) | |
+ | |
+/* L: Logical segment # in volume, R: Relative segment # in main area */ | |
+#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) | |
+#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) | |
+ | |
+#define IS_DATASEG(t) \ | |
+ ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ | |
+ (t == CURSEG_WARM_DATA)) | |
+ | |
+#define IS_NODESEG(t) \ | |
+ ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ | |
+ (t == CURSEG_WARM_NODE)) | |
+ | |
+#define IS_CURSEG(sbi, seg) \ | |
+ ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ | |
+ (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ | |
+ (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ | |
+ (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ | |
+ (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ | |
+ (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) | |
+ | |
+#define IS_CURSEC(sbi, secno) \ | |
+ ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ | |
+ sbi->segs_per_sec) || \ | |
+ (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ | |
+ sbi->segs_per_sec) || \ | |
+ (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ | |
+ sbi->segs_per_sec) || \ | |
+ (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ | |
+ sbi->segs_per_sec) || \ | |
+ (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ | |
+ sbi->segs_per_sec) || \ | |
+ (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ | |
+ sbi->segs_per_sec)) \ | |
+ | |
+#define START_BLOCK(sbi, segno) \ | |
+ (SM_I(sbi)->seg0_blkaddr + \ | |
+ (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) | |
+#define NEXT_FREE_BLKADDR(sbi, curseg) \ | |
+ (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) | |
+ | |
+#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) | |
+ | |
+#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ | |
+ ((blk_addr) - SM_I(sbi)->seg0_blkaddr) | |
+#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ | |
+ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) | |
+#define GET_SEGNO(sbi, blk_addr) \ | |
+ (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ | |
+ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ | |
+ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) | |
+#define GET_SECNO(sbi, segno) \ | |
+ ((segno) / sbi->segs_per_sec) | |
+#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ | |
+ ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) | |
+ | |
+#define GET_SUM_BLOCK(sbi, segno) \ | |
+ ((sbi->sm_info->ssa_blkaddr) + segno) | |
+ | |
+#define GET_SUM_TYPE(footer) ((footer)->entry_type) | |
+#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) | |
+ | |
+#define SIT_ENTRY_OFFSET(sit_i, segno) \ | |
+ (segno % sit_i->sents_per_block) | |
+#define SIT_BLOCK_OFFSET(sit_i, segno) \ | |
+ (segno / SIT_ENTRY_PER_BLOCK) | |
+#define START_SEGNO(sit_i, segno) \ | |
+ (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) | |
+#define f2fs_bitmap_size(nr) \ | |
+ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) | |
+#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) | |
+#define TOTAL_SECS(sbi) (sbi->total_sections) | |
+ | |
+#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ | |
+ (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) | |
+#define SECTOR_TO_BLOCK(sbi, sectors) \ | |
+ (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) | |
+ | |
+/* during checkpoint, bio_private is used to synchronize the last bio */ | |
+struct bio_private { | |
+ struct f2fs_sb_info *sbi; | |
+ bool is_sync; | |
+ void *wait; | |
+}; | |
+ | |
+/* | |
+ * indicate a block allocation direction: RIGHT and LEFT. | |
+ * RIGHT means allocating new sections towards the end of volume. | |
+ * LEFT means the opposite direction. | |
+ */ | |
+enum { | |
+ ALLOC_RIGHT = 0, | |
+ ALLOC_LEFT | |
+}; | |
+ | |
+/* | |
+ * In the victim_sel_policy->alloc_mode, there are two block allocation modes. | |
+ * LFS writes data sequentially with cleaning operations. | |
+ * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. | |
+ */ | |
+enum { | |
+ LFS = 0, | |
+ SSR | |
+}; | |
+ | |
+/* | |
+ * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes. | |
+ * GC_CB is based on cost-benefit algorithm. | |
+ * GC_GREEDY is based on greedy algorithm. | |
+ */ | |
+enum { | |
+ GC_CB = 0, | |
+ GC_GREEDY | |
+}; | |
+ | |
+/* | |
+ * BG_GC means the background cleaning job. | |
+ * FG_GC means the on-demand cleaning job. | |
+ */ | |
+enum { | |
+ BG_GC = 0, | |
+ FG_GC | |
+}; | |
+ | |
+/* for a function parameter to select a victim segment */ | |
+struct victim_sel_policy { | |
+ int alloc_mode; /* LFS or SSR */ | |
+ int gc_mode; /* GC_CB or GC_GREEDY */ | |
+ unsigned long *dirty_segmap; /* dirty segment bitmap */ | |
+ unsigned int offset; /* last scanned bitmap offset */ | |
+ unsigned int ofs_unit; /* bitmap search unit */ | |
+ unsigned int min_cost; /* minimum cost */ | |
+ unsigned int min_segno; /* segment # having min. cost */ | |
+}; | |
+ | |
+struct seg_entry { | |
+ unsigned short valid_blocks; /* # of valid blocks */ | |
+ unsigned char *cur_valid_map; /* validity bitmap of blocks */ | |
+ /* | |
+ * # of valid blocks and the validity bitmap stored in the the last | |
+ * checkpoint pack. This information is used by the SSR mode. | |
+ */ | |
+ unsigned short ckpt_valid_blocks; | |
+ unsigned char *ckpt_valid_map; | |
+ unsigned char type; /* segment type like CURSEG_XXX_TYPE */ | |
+ unsigned long long mtime; /* modification time of the segment */ | |
+}; | |
+ | |
+struct sec_entry { | |
+ unsigned int valid_blocks; /* # of valid blocks in a section */ | |
+}; | |
+ | |
+struct segment_allocation { | |
+ void (*allocate_segment)(struct f2fs_sb_info *, int, bool); | |
+}; | |
+ | |
+struct sit_info { | |
+ const struct segment_allocation *s_ops; | |
+ | |
+ block_t sit_base_addr; /* start block address of SIT area */ | |
+ block_t sit_blocks; /* # of blocks used by SIT area */ | |
+ block_t written_valid_blocks; /* # of valid blocks in main area */ | |
+ char *sit_bitmap; /* SIT bitmap pointer */ | |
+ unsigned int bitmap_size; /* SIT bitmap size */ | |
+ | |
+ unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ | |
+ unsigned int dirty_sentries; /* # of dirty sentries */ | |
+ unsigned int sents_per_block; /* # of SIT entries per block */ | |
+ struct mutex sentry_lock; /* to protect SIT cache */ | |
+ struct seg_entry *sentries; /* SIT segment-level cache */ | |
+ struct sec_entry *sec_entries; /* SIT section-level cache */ | |
+ | |
+ /* for cost-benefit algorithm in cleaning procedure */ | |
+ unsigned long long elapsed_time; /* elapsed time after mount */ | |
+ unsigned long long mounted_time; /* mount time */ | |
+ unsigned long long min_mtime; /* min. modification time */ | |
+ unsigned long long max_mtime; /* max. modification time */ | |
+}; | |
+ | |
+struct free_segmap_info { | |
+ unsigned int start_segno; /* start segment number logically */ | |
+ unsigned int free_segments; /* # of free segments */ | |
+ unsigned int free_sections; /* # of free sections */ | |
+ rwlock_t segmap_lock; /* free segmap lock */ | |
+ unsigned long *free_segmap; /* free segment bitmap */ | |
+ unsigned long *free_secmap; /* free section bitmap */ | |
+}; | |
+ | |
+/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */ | |
+enum dirty_type { | |
+ DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ | |
+ DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ | |
+ DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ | |
+ DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ | |
+ DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ | |
+ DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ | |
+ DIRTY, /* to count # of dirty segments */ | |
+ PRE, /* to count # of entirely obsolete segments */ | |
+ NR_DIRTY_TYPE | |
+}; | |
+ | |
+struct dirty_seglist_info { | |
+ const struct victim_selection *v_ops; /* victim selction operation */ | |
+ unsigned long *dirty_segmap[NR_DIRTY_TYPE]; | |
+ struct mutex seglist_lock; /* lock for segment bitmaps */ | |
+ int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ | |
+ unsigned long *victim_secmap; /* background GC victims */ | |
+}; | |
+ | |
+/* victim selection function for cleaning and SSR */ | |
+struct victim_selection { | |
+ int (*get_victim)(struct f2fs_sb_info *, unsigned int *, | |
+ int, int, char); | |
+}; | |
+ | |
+/* for active log information */ | |
+struct curseg_info { | |
+ struct mutex curseg_mutex; /* lock for consistency */ | |
+ struct f2fs_summary_block *sum_blk; /* cached summary block */ | |
+ unsigned char alloc_type; /* current allocation type */ | |
+ unsigned int segno; /* current segment number */ | |
+ unsigned short next_blkoff; /* next block offset to write */ | |
+ unsigned int zone; /* current zone number */ | |
+ unsigned int next_segno; /* preallocated segment */ | |
+}; | |
+ | |
+/* | |
+ * inline functions | |
+ */ | |
+static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) | |
+{ | |
+ return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); | |
+} | |
+ | |
+static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, | |
+ unsigned int segno) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ return &sit_i->sentries[segno]; | |
+} | |
+ | |
+static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, | |
+ unsigned int segno) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; | |
+} | |
+ | |
+static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, | |
+ unsigned int segno, int section) | |
+{ | |
+ /* | |
+ * In order to get # of valid blocks in a section instantly from many | |
+ * segments, f2fs manages two counting structures separately. | |
+ */ | |
+ if (section > 1) | |
+ return get_sec_entry(sbi, segno)->valid_blocks; | |
+ else | |
+ return get_seg_entry(sbi, segno)->valid_blocks; | |
+} | |
+ | |
+static inline void seg_info_from_raw_sit(struct seg_entry *se, | |
+ struct f2fs_sit_entry *rs) | |
+{ | |
+ se->valid_blocks = GET_SIT_VBLOCKS(rs); | |
+ se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); | |
+ memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); | |
+ memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); | |
+ se->type = GET_SIT_TYPE(rs); | |
+ se->mtime = le64_to_cpu(rs->mtime); | |
+} | |
+ | |
+static inline void seg_info_to_raw_sit(struct seg_entry *se, | |
+ struct f2fs_sit_entry *rs) | |
+{ | |
+ unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | | |
+ se->valid_blocks; | |
+ rs->vblocks = cpu_to_le16(raw_vblocks); | |
+ memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); | |
+ memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); | |
+ se->ckpt_valid_blocks = se->valid_blocks; | |
+ rs->mtime = cpu_to_le64(se->mtime); | |
+} | |
+ | |
+static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, | |
+ unsigned int max, unsigned int segno) | |
+{ | |
+ unsigned int ret; | |
+ read_lock(&free_i->segmap_lock); | |
+ ret = find_next_bit(free_i->free_segmap, max, segno); | |
+ read_unlock(&free_i->segmap_lock); | |
+ return ret; | |
+} | |
+ | |
+static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) | |
+{ | |
+ struct free_segmap_info *free_i = FREE_I(sbi); | |
+ unsigned int secno = segno / sbi->segs_per_sec; | |
+ unsigned int start_segno = secno * sbi->segs_per_sec; | |
+ unsigned int next; | |
+ | |
+ write_lock(&free_i->segmap_lock); | |
+ clear_bit(segno, free_i->free_segmap); | |
+ free_i->free_segments++; | |
+ | |
+ next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); | |
+ if (next >= start_segno + sbi->segs_per_sec) { | |
+ clear_bit(secno, free_i->free_secmap); | |
+ free_i->free_sections++; | |
+ } | |
+ write_unlock(&free_i->segmap_lock); | |
+} | |
+ | |
+static inline void __set_inuse(struct f2fs_sb_info *sbi, | |
+ unsigned int segno) | |
+{ | |
+ struct free_segmap_info *free_i = FREE_I(sbi); | |
+ unsigned int secno = segno / sbi->segs_per_sec; | |
+ set_bit(segno, free_i->free_segmap); | |
+ free_i->free_segments--; | |
+ if (!test_and_set_bit(secno, free_i->free_secmap)) | |
+ free_i->free_sections--; | |
+} | |
+ | |
+static inline void __set_test_and_free(struct f2fs_sb_info *sbi, | |
+ unsigned int segno) | |
+{ | |
+ struct free_segmap_info *free_i = FREE_I(sbi); | |
+ unsigned int secno = segno / sbi->segs_per_sec; | |
+ unsigned int start_segno = secno * sbi->segs_per_sec; | |
+ unsigned int next; | |
+ | |
+ write_lock(&free_i->segmap_lock); | |
+ if (test_and_clear_bit(segno, free_i->free_segmap)) { | |
+ free_i->free_segments++; | |
+ | |
+ next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), | |
+ start_segno); | |
+ if (next >= start_segno + sbi->segs_per_sec) { | |
+ if (test_and_clear_bit(secno, free_i->free_secmap)) | |
+ free_i->free_sections++; | |
+ } | |
+ } | |
+ write_unlock(&free_i->segmap_lock); | |
+} | |
+ | |
+static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, | |
+ unsigned int segno) | |
+{ | |
+ struct free_segmap_info *free_i = FREE_I(sbi); | |
+ unsigned int secno = segno / sbi->segs_per_sec; | |
+ write_lock(&free_i->segmap_lock); | |
+ if (!test_and_set_bit(segno, free_i->free_segmap)) { | |
+ free_i->free_segments--; | |
+ if (!test_and_set_bit(secno, free_i->free_secmap)) | |
+ free_i->free_sections--; | |
+ } | |
+ write_unlock(&free_i->segmap_lock); | |
+} | |
+ | |
+static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, | |
+ void *dst_addr) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); | |
+} | |
+ | |
+static inline block_t written_block_count(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ block_t vblocks; | |
+ | |
+ mutex_lock(&sit_i->sentry_lock); | |
+ vblocks = sit_i->written_valid_blocks; | |
+ mutex_unlock(&sit_i->sentry_lock); | |
+ | |
+ return vblocks; | |
+} | |
+ | |
+static inline unsigned int free_segments(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct free_segmap_info *free_i = FREE_I(sbi); | |
+ unsigned int free_segs; | |
+ | |
+ read_lock(&free_i->segmap_lock); | |
+ free_segs = free_i->free_segments; | |
+ read_unlock(&free_i->segmap_lock); | |
+ | |
+ return free_segs; | |
+} | |
+ | |
+static inline int reserved_segments(struct f2fs_sb_info *sbi) | |
+{ | |
+ return SM_I(sbi)->reserved_segments; | |
+} | |
+ | |
+static inline unsigned int free_sections(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct free_segmap_info *free_i = FREE_I(sbi); | |
+ unsigned int free_secs; | |
+ | |
+ read_lock(&free_i->segmap_lock); | |
+ free_secs = free_i->free_sections; | |
+ read_unlock(&free_i->segmap_lock); | |
+ | |
+ return free_secs; | |
+} | |
+ | |
+static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) | |
+{ | |
+ return DIRTY_I(sbi)->nr_dirty[PRE]; | |
+} | |
+ | |
+static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi) | |
+{ | |
+ return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] + | |
+ DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] + | |
+ DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] + | |
+ DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] + | |
+ DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] + | |
+ DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE]; | |
+} | |
+ | |
+static inline int overprovision_segments(struct f2fs_sb_info *sbi) | |
+{ | |
+ return SM_I(sbi)->ovp_segments; | |
+} | |
+ | |
+static inline int overprovision_sections(struct f2fs_sb_info *sbi) | |
+{ | |
+ return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; | |
+} | |
+ | |
+static inline int reserved_sections(struct f2fs_sb_info *sbi) | |
+{ | |
+ return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; | |
+} | |
+ | |
+static inline bool need_SSR(struct f2fs_sb_info *sbi) | |
+{ | |
+ return (free_sections(sbi) < overprovision_sections(sbi)); | |
+} | |
+ | |
+static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) | |
+{ | |
+ int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); | |
+ int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); | |
+ | |
+ if (sbi->por_doing) | |
+ return false; | |
+ | |
+ return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + | |
+ reserved_sections(sbi))); | |
+} | |
+ | |
+static inline int utilization(struct f2fs_sb_info *sbi) | |
+{ | |
+ return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count); | |
+} | |
+ | |
+/* | |
+ * Sometimes f2fs may be better to drop out-of-place update policy. | |
+ * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write | |
+ * data in the original place likewise other traditional file systems. | |
+ * But, currently set 100 in percentage, which means it is disabled. | |
+ * See below need_inplace_update(). | |
+ */ | |
+#define MIN_IPU_UTIL 100 | |
+static inline bool need_inplace_update(struct inode *inode) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ if (S_ISDIR(inode->i_mode)) | |
+ return false; | |
+ if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) | |
+ return true; | |
+ return false; | |
+} | |
+ | |
+static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, | |
+ int type) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ return curseg->segno; | |
+} | |
+ | |
+static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi, | |
+ int type) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ return curseg->alloc_type; | |
+} | |
+ | |
+static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) | |
+{ | |
+ struct curseg_info *curseg = CURSEG_I(sbi, type); | |
+ return curseg->next_blkoff; | |
+} | |
+ | |
+static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) | |
+{ | |
+ unsigned int end_segno = SM_I(sbi)->segment_count - 1; | |
+ BUG_ON(segno > end_segno); | |
+} | |
+ | |
+/* | |
+ * This function is used for only debugging. | |
+ * NOTE: In future, we have to remove this function. | |
+ */ | |
+static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) | |
+{ | |
+ struct f2fs_sm_info *sm_info = SM_I(sbi); | |
+ block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; | |
+ block_t start_addr = sm_info->seg0_blkaddr; | |
+ block_t end_addr = start_addr + total_blks - 1; | |
+ BUG_ON(blk_addr < start_addr); | |
+ BUG_ON(blk_addr > end_addr); | |
+} | |
+ | |
+/* | |
+ * Summary block is always treated as invalid block | |
+ */ | |
+static inline void check_block_count(struct f2fs_sb_info *sbi, | |
+ int segno, struct f2fs_sit_entry *raw_sit) | |
+{ | |
+ struct f2fs_sm_info *sm_info = SM_I(sbi); | |
+ unsigned int end_segno = sm_info->segment_count - 1; | |
+ int valid_blocks = 0; | |
+ int i; | |
+ | |
+ /* check segment usage */ | |
+ BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); | |
+ | |
+ /* check boundary of a given segment number */ | |
+ BUG_ON(segno > end_segno); | |
+ | |
+ /* check bitmap with valid block count */ | |
+ for (i = 0; i < sbi->blocks_per_seg; i++) | |
+ if (f2fs_test_bit(i, raw_sit->valid_map)) | |
+ valid_blocks++; | |
+ BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); | |
+} | |
+ | |
+static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, | |
+ unsigned int start) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); | |
+ block_t blk_addr = sit_i->sit_base_addr + offset; | |
+ | |
+ check_seg_range(sbi, start); | |
+ | |
+ /* calculate sit block address */ | |
+ if (f2fs_test_bit(offset, sit_i->sit_bitmap)) | |
+ blk_addr += sit_i->sit_blocks; | |
+ | |
+ return blk_addr; | |
+} | |
+ | |
+static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, | |
+ pgoff_t block_addr) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ block_addr -= sit_i->sit_base_addr; | |
+ if (block_addr < sit_i->sit_blocks) | |
+ block_addr += sit_i->sit_blocks; | |
+ else | |
+ block_addr -= sit_i->sit_blocks; | |
+ | |
+ return block_addr + sit_i->sit_base_addr; | |
+} | |
+ | |
+static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) | |
+{ | |
+ unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); | |
+ | |
+ if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) | |
+ f2fs_clear_bit(block_off, sit_i->sit_bitmap); | |
+ else | |
+ f2fs_set_bit(block_off, sit_i->sit_bitmap); | |
+} | |
+ | |
+static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct sit_info *sit_i = SIT_I(sbi); | |
+ return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - | |
+ sit_i->mounted_time; | |
+} | |
+ | |
+static inline void set_summary(struct f2fs_summary *sum, nid_t nid, | |
+ unsigned int ofs_in_node, unsigned char version) | |
+{ | |
+ sum->nid = cpu_to_le32(nid); | |
+ sum->ofs_in_node = cpu_to_le16(ofs_in_node); | |
+ sum->version = version; | |
+} | |
+ | |
+static inline block_t start_sum_block(struct f2fs_sb_info *sbi) | |
+{ | |
+ return __start_cp_addr(sbi) + | |
+ le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); | |
+} | |
+ | |
+static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) | |
+{ | |
+ return __start_cp_addr(sbi) + | |
+ le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) | |
+ - (base + 1) + type; | |
+} | |
+ | |
+static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) | |
+{ | |
+ if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) | |
+ return true; | |
+ return false; | |
+} | |
+ | |
+static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct block_device *bdev = sbi->sb->s_bdev; | |
+ struct request_queue *q = bdev_get_queue(bdev); | |
+ return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); | |
+} | |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c | |
new file mode 100644 | |
index 0000000..e8c28d1 | |
--- /dev/null | |
+++ b/fs/f2fs/super.c | |
@@ -0,0 +1,1154 @@ | |
+/* | |
+ * fs/f2fs/super.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/module.h> | |
+#include <linux/init.h> | |
+#include <linux/fs.h> | |
+#include <linux/statfs.h> | |
+#include <linux/buffer_head.h> | |
+#include <linux/backing-dev.h> | |
+#include <linux/kthread.h> | |
+#include <linux/parser.h> | |
+#include <linux/mount.h> | |
+#include <linux/seq_file.h> | |
+#include <linux/proc_fs.h> | |
+#include <linux/random.h> | |
+#include <linux/exportfs.h> | |
+#include <linux/blkdev.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/sysfs.h> | |
+ | |
+#include "f2fs.h" | |
+#include "node.h" | |
+#include "segment.h" | |
+#include "xattr.h" | |
+#include "gc.h" | |
+ | |
+#define CREATE_TRACE_POINTS | |
+#include <trace/events/f2fs.h> | |
+ | |
+static struct proc_dir_entry *f2fs_proc_root; | |
+static struct kmem_cache *f2fs_inode_cachep; | |
+static struct kset *f2fs_kset; | |
+ | |
+enum { | |
+ Opt_gc_background, | |
+ Opt_disable_roll_forward, | |
+ Opt_discard, | |
+ Opt_noheap, | |
+ Opt_nouser_xattr, | |
+ Opt_noacl, | |
+ Opt_active_logs, | |
+ Opt_disable_ext_identify, | |
+ Opt_inline_xattr, | |
+ Opt_android_emu, | |
+ Opt_err_continue, | |
+ Opt_err_panic, | |
+ Opt_err_recover, | |
+ Opt_err, | |
+}; | |
+ | |
+static match_table_t f2fs_tokens = { | |
+ {Opt_gc_background, "background_gc=%s"}, | |
+ {Opt_disable_roll_forward, "disable_roll_forward"}, | |
+ {Opt_discard, "discard"}, | |
+ {Opt_noheap, "no_heap"}, | |
+ {Opt_nouser_xattr, "nouser_xattr"}, | |
+ {Opt_noacl, "noacl"}, | |
+ {Opt_active_logs, "active_logs=%u"}, | |
+ {Opt_disable_ext_identify, "disable_ext_identify"}, | |
+ {Opt_inline_xattr, "inline_xattr"}, | |
+ {Opt_android_emu, "android_emu=%s"}, | |
+ {Opt_err_continue, "errors=continue"}, | |
+ {Opt_err_panic, "errors=panic"}, | |
+ {Opt_err_recover, "errors=recover"}, | |
+ {Opt_err, NULL}, | |
+}; | |
+ | |
+/* Sysfs support for f2fs */ | |
+struct f2fs_attr { | |
+ struct attribute attr; | |
+ ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); | |
+ ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, | |
+ const char *, size_t); | |
+ int offset; | |
+}; | |
+ | |
+static ssize_t f2fs_sbi_show(struct f2fs_attr *a, | |
+ struct f2fs_sb_info *sbi, char *buf) | |
+{ | |
+ struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; | |
+ unsigned int *ui; | |
+ | |
+ if (!gc_kth) | |
+ return -EINVAL; | |
+ | |
+ ui = (unsigned int *)(((char *)gc_kth) + a->offset); | |
+ | |
+ return snprintf(buf, PAGE_SIZE, "%u\n", *ui); | |
+} | |
+ | |
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a, | |
+ struct f2fs_sb_info *sbi, | |
+ const char *buf, size_t count) | |
+{ | |
+ struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; | |
+ unsigned long t; | |
+ unsigned int *ui; | |
+ ssize_t ret; | |
+ | |
+ if (!gc_kth) | |
+ return -EINVAL; | |
+ | |
+ ui = (unsigned int *)(((char *)gc_kth) + a->offset); | |
+ | |
+ ret = kstrtoul(skip_spaces(buf), 0, &t); | |
+ if (ret < 0) | |
+ return ret; | |
+ *ui = t; | |
+ return count; | |
+} | |
+ | |
+static ssize_t f2fs_attr_show(struct kobject *kobj, | |
+ struct attribute *attr, char *buf) | |
+{ | |
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, | |
+ s_kobj); | |
+ struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); | |
+ | |
+ return a->show ? a->show(a, sbi, buf) : 0; | |
+} | |
+ | |
+static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, | |
+ const char *buf, size_t len) | |
+{ | |
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, | |
+ s_kobj); | |
+ struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); | |
+ | |
+ return a->store ? a->store(a, sbi, buf, len) : 0; | |
+} | |
+ | |
+static void f2fs_sb_release(struct kobject *kobj) | |
+{ | |
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, | |
+ s_kobj); | |
+ complete(&sbi->s_kobj_unregister); | |
+} | |
+ | |
+#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ | |
+static struct f2fs_attr f2fs_attr_##_name = { \ | |
+ .attr = {.name = __stringify(_name), .mode = _mode }, \ | |
+ .show = _show, \ | |
+ .store = _store, \ | |
+ .offset = offsetof(struct f2fs_gc_kthread, _elname), \ | |
+} | |
+ | |
+#define F2FS_RW_ATTR(name, elname) \ | |
+ F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) | |
+ | |
+F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); | |
+F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); | |
+F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); | |
+F2FS_RW_ATTR(gc_idle, gc_idle); | |
+ | |
+#define ATTR_LIST(name) (&f2fs_attr_##name.attr) | |
+static struct attribute *f2fs_attrs[] = { | |
+ ATTR_LIST(gc_min_sleep_time), | |
+ ATTR_LIST(gc_max_sleep_time), | |
+ ATTR_LIST(gc_no_gc_sleep_time), | |
+ ATTR_LIST(gc_idle), | |
+ NULL, | |
+}; | |
+ | |
+static const struct sysfs_ops f2fs_attr_ops = { | |
+ .show = f2fs_attr_show, | |
+ .store = f2fs_attr_store, | |
+}; | |
+ | |
+static struct kobj_type f2fs_ktype = { | |
+ .default_attrs = f2fs_attrs, | |
+ .sysfs_ops = &f2fs_attr_ops, | |
+ .release = f2fs_sb_release, | |
+}; | |
+ | |
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) | |
+{ | |
+ struct va_format vaf; | |
+ va_list args; | |
+ | |
+ va_start(args, fmt); | |
+ vaf.fmt = fmt; | |
+ vaf.va = &args; | |
+ printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); | |
+ va_end(args); | |
+} | |
+ | |
+static void init_once(void *foo) | |
+{ | |
+ struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; | |
+ | |
+ inode_init_once(&fi->vfs_inode); | |
+} | |
+ | |
+static int parse_android_emu(struct f2fs_sb_info *sbi, char *args) | |
+{ | |
+ char *sep = args; | |
+ char *sepres; | |
+ int ret; | |
+ | |
+ if (!sep) | |
+ return -EINVAL; | |
+ | |
+ sepres = strsep(&sep, ":"); | |
+ if (!sep) | |
+ return -EINVAL; | |
+ ret = kstrtou32(sepres, 0, &sbi->android_emu_uid); | |
+ if (ret) | |
+ return ret; | |
+ | |
+ sepres = strsep(&sep, ":"); | |
+ if (!sep) | |
+ return -EINVAL; | |
+ ret = kstrtou32(sepres, 0, &sbi->android_emu_gid); | |
+ if (ret) | |
+ return ret; | |
+ | |
+ sepres = strsep(&sep, ":"); | |
+ ret = kstrtou16(sepres, 8, &sbi->android_emu_mode); | |
+ if (ret) | |
+ return ret; | |
+ | |
+ if (sep && strstr(sep, "nocase")) | |
+ sbi->android_emu_flags = F2FS_ANDROID_EMU_NOCASE; | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int parse_options(struct super_block *sb, char *options) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ substring_t args[MAX_OPT_ARGS]; | |
+ char *p, *name; | |
+ int arg = 0; | |
+ | |
+ if (!options) | |
+ return 0; | |
+ | |
+ while ((p = strsep(&options, ",")) != NULL) { | |
+ int token; | |
+ if (!*p) | |
+ continue; | |
+ /* | |
+ * Initialize args struct so we know whether arg was | |
+ * found; some options take optional arguments. | |
+ */ | |
+ args[0].to = args[0].from = NULL; | |
+ token = match_token(p, f2fs_tokens, args); | |
+ | |
+ switch (token) { | |
+ case Opt_gc_background: | |
+ name = match_strdup(&args[0]); | |
+ | |
+ if (!name) | |
+ return -ENOMEM; | |
+ if (!strncmp(name, "on", 2)) | |
+ set_opt(sbi, BG_GC); | |
+ else if (!strncmp(name, "off", 3)) | |
+ clear_opt(sbi, BG_GC); | |
+ else { | |
+ kfree(name); | |
+ return -EINVAL; | |
+ } | |
+ kfree(name); | |
+ break; | |
+ case Opt_disable_roll_forward: | |
+ set_opt(sbi, DISABLE_ROLL_FORWARD); | |
+ break; | |
+ case Opt_discard: | |
+ set_opt(sbi, DISCARD); | |
+ break; | |
+ case Opt_noheap: | |
+ set_opt(sbi, NOHEAP); | |
+ break; | |
+#ifdef CONFIG_F2FS_FS_XATTR | |
+ case Opt_nouser_xattr: | |
+ clear_opt(sbi, XATTR_USER); | |
+ break; | |
+ case Opt_inline_xattr: | |
+ set_opt(sbi, INLINE_XATTR); | |
+ break; | |
+#else | |
+ case Opt_nouser_xattr: | |
+ f2fs_msg(sb, KERN_INFO, | |
+ "nouser_xattr options not supported"); | |
+ break; | |
+ case Opt_inline_xattr: | |
+ f2fs_msg(sb, KERN_INFO, | |
+ "inline_xattr options not supported"); | |
+ break; | |
+#endif | |
+#ifdef CONFIG_F2FS_FS_POSIX_ACL | |
+ case Opt_noacl: | |
+ clear_opt(sbi, POSIX_ACL); | |
+ break; | |
+#else | |
+ case Opt_noacl: | |
+ f2fs_msg(sb, KERN_INFO, "noacl options not supported"); | |
+ break; | |
+#endif | |
+ case Opt_active_logs: | |
+ if (args->from && match_int(args, &arg)) | |
+ return -EINVAL; | |
+ if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) | |
+ return -EINVAL; | |
+ sbi->active_logs = arg; | |
+ break; | |
+ case Opt_disable_ext_identify: | |
+ set_opt(sbi, DISABLE_EXT_IDENTIFY); | |
+ break; | |
+ case Opt_err_continue: | |
+ clear_opt(sbi, ERRORS_RECOVER); | |
+ clear_opt(sbi, ERRORS_PANIC); | |
+ break; | |
+ case Opt_err_panic: | |
+ set_opt(sbi, ERRORS_PANIC); | |
+ clear_opt(sbi, ERRORS_RECOVER); | |
+ break; | |
+ case Opt_err_recover: | |
+ set_opt(sbi, ERRORS_RECOVER); | |
+ clear_opt(sbi, ERRORS_PANIC); | |
+ break; | |
+ case Opt_android_emu: | |
+ if (args->from) { | |
+ int ret; | |
+ char *perms = match_strdup(args); | |
+ | |
+ ret = parse_android_emu(sbi, perms); | |
+ kfree(perms); | |
+ | |
+ if (ret) | |
+ return -EINVAL; | |
+ | |
+ set_opt(sbi, ANDROID_EMU); | |
+ } else | |
+ return -EINVAL; | |
+ break; | |
+ | |
+ default: | |
+ f2fs_msg(sb, KERN_ERR, | |
+ "Unrecognized mount option \"%s\" or missing value", | |
+ p); | |
+ return -EINVAL; | |
+ } | |
+ } | |
+ return 0; | |
+} | |
+ | |
+static struct inode *f2fs_alloc_inode(struct super_block *sb) | |
+{ | |
+ struct f2fs_inode_info *fi; | |
+ | |
+ fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); | |
+ if (!fi) | |
+ return NULL; | |
+ | |
+ init_once((void *) fi); | |
+ | |
+ /* Initialize f2fs-specific inode info */ | |
+ fi->vfs_inode.i_version = 1; | |
+ atomic_set(&fi->dirty_dents, 0); | |
+ fi->i_current_depth = 1; | |
+ fi->i_advise = 0; | |
+ rwlock_init(&fi->ext.ext_lock); | |
+ | |
+ set_inode_flag(fi, FI_NEW_INODE); | |
+ | |
+ if (test_opt(F2FS_SB(sb), INLINE_XATTR)) | |
+ set_inode_flag(fi, FI_INLINE_XATTR); | |
+ | |
+ return &fi->vfs_inode; | |
+} | |
+ | |
+static int f2fs_drop_inode(struct inode *inode) | |
+{ | |
+ /* | |
+ * This is to avoid a deadlock condition like below. | |
+ * writeback_single_inode(inode) | |
+ * - f2fs_write_data_page | |
+ * - f2fs_gc -> iput -> evict | |
+ * - inode_wait_for_writeback(inode) | |
+ */ | |
+ if (!inode_unhashed(inode) && inode->i_state & I_SYNC) | |
+ return 0; | |
+ return generic_drop_inode(inode); | |
+} | |
+ | |
+/* | |
+ * f2fs_dirty_inode() is called from __mark_inode_dirty() | |
+ * | |
+ * We should call set_dirty_inode to write the dirty inode through write_inode. | |
+ */ | |
+static void f2fs_dirty_inode(struct inode *inode, int flags) | |
+{ | |
+ set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); | |
+} | |
+ | |
+static void f2fs_i_callback(struct rcu_head *head) | |
+{ | |
+ struct inode *inode = container_of(head, struct inode, i_rcu); | |
+ kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode)); | |
+} | |
+ | |
+static void f2fs_destroy_inode(struct inode *inode) | |
+{ | |
+ call_rcu(&inode->i_rcu, f2fs_i_callback); | |
+} | |
+ | |
+static void f2fs_put_super(struct super_block *sb) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ | |
+ if (sbi->s_proc) { | |
+ remove_proc_entry("segment_info", sbi->s_proc); | |
+ remove_proc_entry(sb->s_id, f2fs_proc_root); | |
+ } | |
+ kobject_del(&sbi->s_kobj); | |
+ | |
+ f2fs_destroy_stats(sbi); | |
+ stop_gc_thread(sbi); | |
+ | |
+ write_checkpoint(sbi, true); | |
+ | |
+ iput(sbi->node_inode); | |
+ iput(sbi->meta_inode); | |
+ | |
+ /* destroy f2fs internal modules */ | |
+ destroy_node_manager(sbi); | |
+ destroy_segment_manager(sbi); | |
+ | |
+ kfree(sbi->ckpt); | |
+ kobject_put(&sbi->s_kobj); | |
+ wait_for_completion(&sbi->s_kobj_unregister); | |
+ | |
+ sb->s_fs_info = NULL; | |
+ brelse(sbi->raw_super_buf); | |
+ kfree(sbi); | |
+} | |
+ | |
+int f2fs_sync_fs(struct super_block *sb, int sync) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ | |
+ trace_f2fs_sync_fs(sb, sync); | |
+ | |
+ if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) | |
+ return 0; | |
+ | |
+ if (sync) { | |
+ mutex_lock(&sbi->gc_mutex); | |
+ write_checkpoint(sbi, false); | |
+ mutex_unlock(&sbi->gc_mutex); | |
+ } else { | |
+ f2fs_balance_fs(sbi); | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) | |
+{ | |
+ struct super_block *sb = dentry->d_sb; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | |
+ block_t total_count, user_block_count, start_count, ovp_count; | |
+ | |
+ total_count = le64_to_cpu(sbi->raw_super->block_count); | |
+ user_block_count = sbi->user_block_count; | |
+ start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr); | |
+ ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; | |
+ buf->f_type = F2FS_SUPER_MAGIC; | |
+ buf->f_bsize = sbi->blocksize; | |
+ | |
+ buf->f_blocks = total_count - start_count; | |
+ buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; | |
+ buf->f_bavail = user_block_count - valid_user_blocks(sbi); | |
+ | |
+ buf->f_files = sbi->total_node_count; | |
+ buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); | |
+ | |
+ buf->f_namelen = F2FS_NAME_LEN; | |
+ buf->f_fsid.val[0] = (u32)id; | |
+ buf->f_fsid.val[1] = (u32)(id >> 32); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int f2fs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(vfs->mnt_sb); | |
+ | |
+ if (!(vfs->mnt_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) | |
+ seq_printf(seq, ",background_gc=%s", "on"); | |
+ else | |
+ seq_printf(seq, ",background_gc=%s", "off"); | |
+ if (test_opt(sbi, DISABLE_ROLL_FORWARD)) | |
+ seq_puts(seq, ",disable_roll_forward"); | |
+ if (test_opt(sbi, DISCARD)) | |
+ seq_puts(seq, ",discard"); | |
+ if (test_opt(sbi, NOHEAP)) | |
+ seq_puts(seq, ",no_heap_alloc"); | |
+#ifdef CONFIG_F2FS_FS_XATTR | |
+ if (test_opt(sbi, XATTR_USER)) | |
+ seq_puts(seq, ",user_xattr"); | |
+ else | |
+ seq_puts(seq, ",nouser_xattr"); | |
+ if (test_opt(sbi, INLINE_XATTR)) | |
+ seq_puts(seq, ",inline_xattr"); | |
+#endif | |
+#ifdef CONFIG_F2FS_FS_POSIX_ACL | |
+ if (test_opt(sbi, POSIX_ACL)) | |
+ seq_puts(seq, ",acl"); | |
+ else | |
+ seq_puts(seq, ",noacl"); | |
+#endif | |
+ if (test_opt(sbi, ERRORS_PANIC)) | |
+ seq_puts(seq, ",errors=panic"); | |
+ else if (test_opt(sbi, ERRORS_RECOVER)) | |
+ seq_puts(seq, ",errors=recover"); | |
+ else | |
+ seq_puts(seq, ",errors=continue"); | |
+ if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) | |
+ seq_puts(seq, ",disable_ext_identify"); | |
+ | |
+ if (test_opt(sbi, ANDROID_EMU)) | |
+ seq_printf(seq, ",android_emu=%u:%u:%ho%s", | |
+ sbi->android_emu_uid, | |
+ sbi->android_emu_gid, | |
+ sbi->android_emu_mode, | |
+ (sbi->android_emu_flags & | |
+ F2FS_ANDROID_EMU_NOCASE) ? | |
+ ":nocase" : ""); | |
+ | |
+ seq_printf(seq, ",active_logs=%u", sbi->active_logs); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int segment_info_seq_show(struct seq_file *seq, void *offset) | |
+{ | |
+ struct super_block *sb = seq->private; | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); | |
+ int i; | |
+ | |
+ for (i = 0; i < total_segs; i++) { | |
+ seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); | |
+ if (i != 0 && (i % 10) == 0) | |
+ seq_puts(seq, "\n"); | |
+ else | |
+ seq_puts(seq, " "); | |
+ } | |
+ return 0; | |
+} | |
+ | |
+static int segment_info_open_fs(struct inode *inode, struct file *file) | |
+{ | |
+ return single_open(file, segment_info_seq_show, | |
+ PROC_I(inode)->pde->data); | |
+} | |
+ | |
+static const struct file_operations f2fs_seq_segment_info_fops = { | |
+ .owner = THIS_MODULE, | |
+ .open = segment_info_open_fs, | |
+ .read = seq_read, | |
+ .llseek = seq_lseek, | |
+ .release = single_release, | |
+}; | |
+ | |
+static int f2fs_remount(struct super_block *sb, int *flags, char *data) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ struct f2fs_mount_info org_mount_opt; | |
+ int err, active_logs; | |
+ | |
+ /* | |
+ * Save the old mount options in case we | |
+ * need to restore them. | |
+ */ | |
+ org_mount_opt = sbi->mount_opt; | |
+ active_logs = sbi->active_logs; | |
+ | |
+ /* parse mount options */ | |
+ err = parse_options(sb, data); | |
+ if (err) | |
+ goto restore_opts; | |
+ | |
+ /* | |
+ * Previous and new state of filesystem is RO, | |
+ * so no point in checking GC conditions. | |
+ */ | |
+ if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) | |
+ goto skip; | |
+ | |
+ /* | |
+ * We stop the GC thread if FS is mounted as RO | |
+ * or if background_gc = off is passed in mount | |
+ * option. Also sync the filesystem. | |
+ */ | |
+ if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { | |
+ if (sbi->gc_thread) { | |
+ stop_gc_thread(sbi); | |
+ f2fs_sync_fs(sb, 1); | |
+ } | |
+ } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { | |
+ err = start_gc_thread(sbi); | |
+ if (err) | |
+ goto restore_opts; | |
+ } | |
+skip: | |
+ /* Update the POSIXACL Flag */ | |
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | |
+ (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); | |
+ return 0; | |
+ | |
+restore_opts: | |
+ sbi->mount_opt = org_mount_opt; | |
+ sbi->active_logs = active_logs; | |
+ return err; | |
+} | |
+ | |
+static struct super_operations f2fs_sops = { | |
+ .alloc_inode = f2fs_alloc_inode, | |
+ .drop_inode = f2fs_drop_inode, | |
+ .destroy_inode = f2fs_destroy_inode, | |
+ .write_inode = f2fs_write_inode, | |
+ .dirty_inode = f2fs_dirty_inode, | |
+ .show_options = f2fs_show_options, | |
+ .evict_inode = f2fs_evict_inode, | |
+ .put_super = f2fs_put_super, | |
+ .sync_fs = f2fs_sync_fs, | |
+ .statfs = f2fs_statfs, | |
+ .remount_fs = f2fs_remount, | |
+}; | |
+ | |
+static struct inode *f2fs_nfs_get_inode(struct super_block *sb, | |
+ u64 ino, u32 generation) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(sb); | |
+ struct inode *inode; | |
+ | |
+ if (ino < F2FS_ROOT_INO(sbi)) | |
+ return ERR_PTR(-ESTALE); | |
+ | |
+ /* | |
+ * f2fs_iget isn't quite right if the inode is currently unallocated! | |
+ * However f2fs_iget currently does appropriate checks to handle stale | |
+ * inodes so everything is OK. | |
+ */ | |
+ inode = f2fs_iget(sb, ino); | |
+ if (IS_ERR(inode)) | |
+ return ERR_CAST(inode); | |
+ if (generation && inode->i_generation != generation) { | |
+ /* we didn't find the right inode.. */ | |
+ iput(inode); | |
+ return ERR_PTR(-ESTALE); | |
+ } | |
+ return inode; | |
+} | |
+ | |
+static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid, | |
+ int fh_len, int fh_type) | |
+{ | |
+ return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | |
+ f2fs_nfs_get_inode); | |
+} | |
+ | |
+static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid, | |
+ int fh_len, int fh_type) | |
+{ | |
+ return generic_fh_to_parent(sb, fid, fh_len, fh_type, | |
+ f2fs_nfs_get_inode); | |
+} | |
+ | |
+static const struct export_operations f2fs_export_ops = { | |
+ .fh_to_dentry = f2fs_fh_to_dentry, | |
+ .fh_to_parent = f2fs_fh_to_parent, | |
+ .get_parent = f2fs_get_parent, | |
+}; | |
+ | |
+static loff_t max_file_size(unsigned bits) | |
+{ | |
+ loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); | |
+ loff_t leaf_count = ADDRS_PER_BLOCK; | |
+ | |
+ /* two direct node blocks */ | |
+ result += (leaf_count * 2); | |
+ | |
+ /* two indirect node blocks */ | |
+ leaf_count *= NIDS_PER_BLOCK; | |
+ result += (leaf_count * 2); | |
+ | |
+ /* one double indirect node block */ | |
+ leaf_count *= NIDS_PER_BLOCK; | |
+ result += leaf_count; | |
+ | |
+ result <<= bits; | |
+ return result; | |
+} | |
+ | |
+static int sanity_check_raw_super(struct super_block *sb, | |
+ struct f2fs_super_block *raw_super) | |
+{ | |
+ unsigned int blocksize; | |
+ | |
+ if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { | |
+ f2fs_msg(sb, KERN_INFO, | |
+ "Magic Mismatch, valid(0x%x) - read(0x%x)", | |
+ F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); | |
+ return 1; | |
+ } | |
+ | |
+ /* Currently, support only 4KB page cache size */ | |
+ if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) { | |
+ f2fs_msg(sb, KERN_INFO, | |
+ "Invalid page_cache_size (%lu), supports only 4KB\n", | |
+ PAGE_CACHE_SIZE); | |
+ return 1; | |
+ } | |
+ | |
+ /* Currently, support only 4KB block size */ | |
+ blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); | |
+ if (blocksize != F2FS_BLKSIZE) { | |
+ f2fs_msg(sb, KERN_INFO, | |
+ "Invalid blocksize (%u), supports only 4KB\n", | |
+ blocksize); | |
+ return 1; | |
+ } | |
+ | |
+ if (le32_to_cpu(raw_super->log_sectorsize) != | |
+ F2FS_LOG_SECTOR_SIZE) { | |
+ f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); | |
+ return 1; | |
+ } | |
+ if (le32_to_cpu(raw_super->log_sectors_per_block) != | |
+ F2FS_LOG_SECTORS_PER_BLOCK) { | |
+ f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); | |
+ return 1; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+static int sanity_check_ckpt(struct f2fs_sb_info *sbi) | |
+{ | |
+ unsigned int total, fsmeta; | |
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); | |
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
+ | |
+ total = le32_to_cpu(raw_super->segment_count); | |
+ fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); | |
+ fsmeta += le32_to_cpu(raw_super->segment_count_sit); | |
+ fsmeta += le32_to_cpu(raw_super->segment_count_nat); | |
+ fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); | |
+ fsmeta += le32_to_cpu(raw_super->segment_count_ssa); | |
+ | |
+ if (fsmeta >= total) | |
+ return 1; | |
+ | |
+ if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { | |
+ f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); | |
+ return 1; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+static void init_sb_info(struct f2fs_sb_info *sbi) | |
+{ | |
+ struct f2fs_super_block *raw_super = sbi->raw_super; | |
+ int i; | |
+ | |
+ sbi->log_sectors_per_block = | |
+ le32_to_cpu(raw_super->log_sectors_per_block); | |
+ sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize); | |
+ sbi->blocksize = 1 << sbi->log_blocksize; | |
+ sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); | |
+ sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg; | |
+ sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); | |
+ sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); | |
+ sbi->total_sections = le32_to_cpu(raw_super->section_count); | |
+ sbi->total_node_count = | |
+ (le32_to_cpu(raw_super->segment_count_nat) / 2) | |
+ * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; | |
+ sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); | |
+ sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); | |
+ sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); | |
+ sbi->cur_victim_sec = NULL_SECNO; | |
+ | |
+ for (i = 0; i < NR_COUNT_TYPE; i++) | |
+ atomic_set(&sbi->nr_pages[i], 0); | |
+} | |
+ | |
+static int validate_superblock(struct super_block *sb, | |
+ struct f2fs_super_block **raw_super, | |
+ struct buffer_head **raw_super_buf, sector_t block) | |
+{ | |
+ const char *super = (block == 0 ? "first" : "second"); | |
+ | |
+ /* read f2fs raw super block */ | |
+ *raw_super_buf = sb_bread(sb, block); | |
+ if (!*raw_super_buf) { | |
+ f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", | |
+ super); | |
+ return -EIO; | |
+ } | |
+ | |
+ *raw_super = (struct f2fs_super_block *) | |
+ ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); | |
+ | |
+ /* sanity checking of raw super */ | |
+ if (!sanity_check_raw_super(sb, *raw_super)) | |
+ return 0; | |
+ | |
+ f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " | |
+ "in %s superblock", super); | |
+ return -EINVAL; | |
+} | |
+ | |
+static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |
+{ | |
+ struct f2fs_sb_info *sbi; | |
+ struct f2fs_super_block *raw_super; | |
+ struct buffer_head *raw_super_buf; | |
+ struct inode *root; | |
+ long err = -EINVAL; | |
+ int i; | |
+ const char *descr = ""; | |
+ | |
+ f2fs_msg(sb, KERN_INFO, "mounting.."); | |
+ /* allocate memory for f2fs-specific super block info */ | |
+ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); | |
+ if (!sbi) | |
+ return -ENOMEM; | |
+ | |
+ /* set a block size */ | |
+ if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) { | |
+ f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); | |
+ goto free_sbi; | |
+ } | |
+ | |
+ err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); | |
+ if (err) { | |
+ brelse(raw_super_buf); | |
+ /* check secondary superblock when primary failed */ | |
+ err = validate_superblock(sb, &raw_super, &raw_super_buf, 1); | |
+ if (err) | |
+ goto free_sb_buf; | |
+ } | |
+ sb->s_fs_info = sbi; | |
+ /* init some FS parameters */ | |
+ sbi->active_logs = NR_CURSEG_TYPE; | |
+ | |
+ set_opt(sbi, BG_GC); | |
+ | |
+#ifdef CONFIG_F2FS_FS_XATTR | |
+ set_opt(sbi, XATTR_USER); | |
+#endif | |
+#ifdef CONFIG_F2FS_FS_POSIX_ACL | |
+ set_opt(sbi, POSIX_ACL); | |
+#endif | |
+ /* parse mount options */ | |
+ err = parse_options(sb, (char *)data); | |
+ if (err) | |
+ goto free_sb_buf; | |
+ | |
+ sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); | |
+ get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | |
+ | |
+ sb->s_op = &f2fs_sops; | |
+ sb->s_xattr = f2fs_xattr_handlers; | |
+ sb->s_export_op = &f2fs_export_ops; | |
+ sb->s_magic = F2FS_SUPER_MAGIC; | |
+ sb->s_time_gran = 1; | |
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | |
+ (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); | |
+ memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); | |
+ | |
+ /* init f2fs-specific super block info */ | |
+ sbi->sb = sb; | |
+ sbi->raw_super = raw_super; | |
+ sbi->raw_super_buf = raw_super_buf; | |
+ mutex_init(&sbi->gc_mutex); | |
+ mutex_init(&sbi->writepages); | |
+ mutex_init(&sbi->cp_mutex); | |
+ for (i = 0; i < NR_GLOBAL_LOCKS; i++) | |
+ mutex_init(&sbi->fs_lock[i]); | |
+ mutex_init(&sbi->node_write); | |
+ sbi->por_doing = 0; | |
+ spin_lock_init(&sbi->stat_lock); | |
+ init_rwsem(&sbi->bio_sem); | |
+ init_sb_info(sbi); | |
+ | |
+ /* get an inode for meta space */ | |
+ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); | |
+ if (IS_ERR(sbi->meta_inode)) { | |
+ f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); | |
+ err = PTR_ERR(sbi->meta_inode); | |
+ goto free_sb_buf; | |
+ } | |
+ | |
+get_cp: | |
+ err = get_valid_checkpoint(sbi); | |
+ if (err) { | |
+ f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint"); | |
+ goto free_meta_inode; | |
+ } | |
+ | |
+ /* sanity checking of checkpoint */ | |
+ err = -EINVAL; | |
+ if (sanity_check_ckpt(sbi)) { | |
+ f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint"); | |
+ goto free_cp; | |
+ } | |
+ | |
+ sbi->total_valid_node_count = | |
+ le32_to_cpu(sbi->ckpt->valid_node_count); | |
+ sbi->total_valid_inode_count = | |
+ le32_to_cpu(sbi->ckpt->valid_inode_count); | |
+ sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count); | |
+ sbi->total_valid_block_count = | |
+ le64_to_cpu(sbi->ckpt->valid_block_count); | |
+ sbi->last_valid_block_count = sbi->total_valid_block_count; | |
+ sbi->alloc_valid_block_count = 0; | |
+ INIT_LIST_HEAD(&sbi->dir_inode_list); | |
+ spin_lock_init(&sbi->dir_inode_lock); | |
+ | |
+ init_orphan_info(sbi); | |
+ | |
+ /* setup f2fs internal modules */ | |
+ err = build_segment_manager(sbi); | |
+ if (err) { | |
+ f2fs_msg(sb, KERN_ERR, | |
+ "Failed to initialize F2FS segment manager"); | |
+ goto free_sm; | |
+ } | |
+ err = build_node_manager(sbi); | |
+ if (err) { | |
+ f2fs_msg(sb, KERN_ERR, | |
+ "Failed to initialize F2FS node manager"); | |
+ goto free_nm; | |
+ } | |
+ | |
+ build_gc_manager(sbi); | |
+ | |
+ /* get an inode for node space */ | |
+ sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); | |
+ if (IS_ERR(sbi->node_inode)) { | |
+ f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); | |
+ err = PTR_ERR(sbi->node_inode); | |
+ goto free_nm; | |
+ } | |
+ | |
+ /* if there are nt orphan nodes free them */ | |
+ err = -EINVAL; | |
+ if (recover_orphan_inodes(sbi)) | |
+ goto free_node_inode; | |
+ | |
+ /* read root inode and dentry */ | |
+ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); | |
+ if (IS_ERR(root)) { | |
+ f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); | |
+ err = PTR_ERR(root); | |
+ goto free_node_inode; | |
+ } | |
+ if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) | |
+ goto free_root_inode; | |
+ | |
+ sb->s_root = d_alloc_root(root); /* allocate root dentry */ | |
+ if (!sb->s_root) { | |
+ err = -ENOMEM; | |
+ goto free_root_inode; | |
+ } | |
+ | |
+ /* recover fsynced data */ | |
+ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { | |
+ err = recover_fsync_data(sbi); | |
+ if (err) { | |
+ if (f2fs_handle_error(sbi)) { | |
+ set_opt(sbi, DISABLE_ROLL_FORWARD); | |
+ kfree(sbi->ckpt); | |
+ f2fs_msg(sb, KERN_ERR, | |
+ "reloading last checkpoint"); | |
+ goto get_cp; | |
+ } | |
+ f2fs_msg(sb, KERN_ERR, | |
+ "cannot recover all fsync data errno=%ld", err); | |
+ /* checkpoint what we have */ | |
+ write_checkpoint(sbi, false); | |
+ } | |
+ } | |
+ | |
+ /* | |
+ * If filesystem is not mounted as read-only then | |
+ * do start the gc_thread. | |
+ */ | |
+ if (!(sb->s_flags & MS_RDONLY)) { | |
+ /* After POR, we can run background GC thread.*/ | |
+ err = start_gc_thread(sbi); | |
+ if (err) | |
+ goto fail; | |
+ } | |
+ | |
+ err = f2fs_build_stats(sbi); | |
+ if (err) | |
+ goto fail; | |
+ | |
+ if (f2fs_proc_root) | |
+ sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); | |
+ | |
+ if (sbi->s_proc) | |
+ proc_create_data("segment_info", S_IRUGO, sbi->s_proc, | |
+ &f2fs_seq_segment_info_fops, sb); | |
+ | |
+ if (test_opt(sbi, DISCARD)) { | |
+ struct request_queue *q = bdev_get_queue(sb->s_bdev); | |
+ if (!blk_queue_discard(q)) | |
+ f2fs_msg(sb, KERN_WARNING, | |
+ "mounting with \"discard\" option, but " | |
+ "the device does not support discard"); | |
+ } | |
+ | |
+ if (test_opt(sbi, ANDROID_EMU)) | |
+ descr = " with android sdcard emulation"; | |
+ f2fs_msg(sb, KERN_INFO, "mounted filesystem%s", descr); | |
+ | |
+ sbi->s_kobj.kset = f2fs_kset; | |
+ init_completion(&sbi->s_kobj_unregister); | |
+ err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, | |
+ "%s", sb->s_id); | |
+ if (err) | |
+ goto fail; | |
+ | |
+ return 0; | |
+fail: | |
+ stop_gc_thread(sbi); | |
+free_root_inode: | |
+ iput(root); | |
+free_node_inode: | |
+ iput(sbi->node_inode); | |
+free_nm: | |
+ destroy_node_manager(sbi); | |
+free_sm: | |
+ destroy_segment_manager(sbi); | |
+free_cp: | |
+ kfree(sbi->ckpt); | |
+free_meta_inode: | |
+ make_bad_inode(sbi->meta_inode); | |
+ iput(sbi->meta_inode); | |
+free_sb_buf: | |
+ brelse(raw_super_buf); | |
+free_sbi: | |
+ kfree(sbi); | |
+ f2fs_msg(sb, KERN_ERR, "mount failed"); | |
+ return err; | |
+} | |
+ | |
+static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, | |
+ const char *dev_name, void *data) | |
+{ | |
+ return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); | |
+} | |
+ | |
+static struct file_system_type f2fs_fs_type = { | |
+ .owner = THIS_MODULE, | |
+ .name = "f2fs", | |
+ .mount = f2fs_mount, | |
+ .kill_sb = kill_block_super, | |
+ .fs_flags = FS_REQUIRES_DEV, | |
+}; | |
+ | |
+static int __init init_inodecache(void) | |
+{ | |
+ f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", | |
+ sizeof(struct f2fs_inode_info), NULL); | |
+ if (f2fs_inode_cachep == NULL) | |
+ return -ENOMEM; | |
+ return 0; | |
+} | |
+ | |
+static void destroy_inodecache(void) | |
+{ | |
+ /* | |
+ * Make sure all delayed rcu free inodes are flushed before we | |
+ * destroy cache. | |
+ */ | |
+ rcu_barrier(); | |
+ kmem_cache_destroy(f2fs_inode_cachep); | |
+} | |
+ | |
+static int __init init_f2fs_fs(void) | |
+{ | |
+ int err; | |
+ | |
+ err = init_inodecache(); | |
+ if (err) | |
+ goto fail; | |
+ err = create_node_manager_caches(); | |
+ if (err) | |
+ goto free_inodecache; | |
+ err = create_gc_caches(); | |
+ if (err) | |
+ goto free_node_manager_caches; | |
+ err = create_checkpoint_caches(); | |
+ if (err) | |
+ goto free_gc_caches; | |
+ f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); | |
+ if (!f2fs_kset) { | |
+ err = -ENOMEM; | |
+ goto free_checkpoint_caches; | |
+ } | |
+ err = register_filesystem(&f2fs_fs_type); | |
+ if (err) | |
+ goto free_kset; | |
+ f2fs_create_root_stats(); | |
+ f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); | |
+ return 0; | |
+ | |
+free_kset: | |
+ kset_unregister(f2fs_kset); | |
+free_checkpoint_caches: | |
+ destroy_checkpoint_caches(); | |
+free_gc_caches: | |
+ destroy_gc_caches(); | |
+free_node_manager_caches: | |
+ destroy_node_manager_caches(); | |
+free_inodecache: | |
+ destroy_inodecache(); | |
+fail: | |
+ return err; | |
+} | |
+ | |
+static void __exit exit_f2fs_fs(void) | |
+{ | |
+ remove_proc_entry("fs/f2fs", NULL); | |
+ f2fs_destroy_root_stats(); | |
+ unregister_filesystem(&f2fs_fs_type); | |
+ destroy_checkpoint_caches(); | |
+ destroy_gc_caches(); | |
+ destroy_node_manager_caches(); | |
+ destroy_inodecache(); | |
+ kset_unregister(f2fs_kset); | |
+} | |
+ | |
+module_init(init_f2fs_fs) | |
+module_exit(exit_f2fs_fs) | |
+ | |
+MODULE_AUTHOR("Samsung Electronics's Praesto Team"); | |
+MODULE_DESCRIPTION("Flash Friendly File System"); | |
+MODULE_LICENSE("GPL"); | |
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c | |
new file mode 100644 | |
index 0000000..85b99eb | |
--- /dev/null | |
+++ b/fs/f2fs/xattr.c | |
@@ -0,0 +1,600 @@ | |
+/* | |
+ * fs/f2fs/xattr.c | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * Portions of this code from linux/fs/ext2/xattr.c | |
+ * | |
+ * Copyright (C) 2001-2003 Andreas Gruenbacher <[email protected]> | |
+ * | |
+ * Fix by Harrison Xing <[email protected]>. | |
+ * Extended attributes for symlinks and special files added per | |
+ * suggestion of Luka Renko <[email protected]>. | |
+ * xattr consolidation Copyright (c) 2004 James Morris <[email protected]>, | |
+ * Red Hat Inc. | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#include <linux/rwsem.h> | |
+#include <linux/f2fs_fs.h> | |
+#include <linux/security.h> | |
+#include "f2fs.h" | |
+#include "xattr.h" | |
+ | |
+static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, | |
+ size_t list_size, const char *name, size_t name_len, int type) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); | |
+ int total_len, prefix_len = 0; | |
+ const char *prefix = NULL; | |
+ | |
+ switch (type) { | |
+ case F2FS_XATTR_INDEX_USER: | |
+ if (!test_opt(sbi, XATTR_USER)) | |
+ return -EOPNOTSUPP; | |
+ prefix = XATTR_USER_PREFIX; | |
+ prefix_len = XATTR_USER_PREFIX_LEN; | |
+ break; | |
+ case F2FS_XATTR_INDEX_TRUSTED: | |
+ if (!capable(CAP_SYS_ADMIN)) | |
+ return -EPERM; | |
+ prefix = XATTR_TRUSTED_PREFIX; | |
+ prefix_len = XATTR_TRUSTED_PREFIX_LEN; | |
+ break; | |
+ case F2FS_XATTR_INDEX_SECURITY: | |
+ prefix = XATTR_SECURITY_PREFIX; | |
+ prefix_len = XATTR_SECURITY_PREFIX_LEN; | |
+ break; | |
+ default: | |
+ return -EINVAL; | |
+ } | |
+ | |
+ total_len = prefix_len + name_len + 1; | |
+ if (list && total_len <= list_size) { | |
+ memcpy(list, prefix, prefix_len); | |
+ memcpy(list + prefix_len, name, name_len); | |
+ list[prefix_len + name_len] = '\0'; | |
+ } | |
+ return total_len; | |
+} | |
+ | |
+static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, | |
+ void *buffer, size_t size, int type) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); | |
+ | |
+ switch (type) { | |
+ case F2FS_XATTR_INDEX_USER: | |
+ if (!test_opt(sbi, XATTR_USER)) | |
+ return -EOPNOTSUPP; | |
+ break; | |
+ case F2FS_XATTR_INDEX_TRUSTED: | |
+ if (!capable(CAP_SYS_ADMIN)) | |
+ return -EPERM; | |
+ break; | |
+ case F2FS_XATTR_INDEX_SECURITY: | |
+ break; | |
+ default: | |
+ return -EINVAL; | |
+ } | |
+ if (strcmp(name, "") == 0) | |
+ return -EINVAL; | |
+ return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); | |
+} | |
+ | |
+static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, | |
+ const void *value, size_t size, int flags, int type) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); | |
+ | |
+ switch (type) { | |
+ case F2FS_XATTR_INDEX_USER: | |
+ if (!test_opt(sbi, XATTR_USER)) | |
+ return -EOPNOTSUPP; | |
+ break; | |
+ case F2FS_XATTR_INDEX_TRUSTED: | |
+ if (!capable(CAP_SYS_ADMIN)) | |
+ return -EPERM; | |
+ break; | |
+ case F2FS_XATTR_INDEX_SECURITY: | |
+ break; | |
+ default: | |
+ return -EINVAL; | |
+ } | |
+ if (strcmp(name, "") == 0) | |
+ return -EINVAL; | |
+ | |
+ return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); | |
+} | |
+ | |
+static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, | |
+ size_t list_size, const char *name, size_t name_len, int type) | |
+{ | |
+ const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; | |
+ size_t size; | |
+ | |
+ if (type != F2FS_XATTR_INDEX_ADVISE) | |
+ return 0; | |
+ | |
+ size = strlen(xname) + 1; | |
+ if (list && size <= list_size) | |
+ memcpy(list, xname, size); | |
+ return size; | |
+} | |
+ | |
+static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, | |
+ void *buffer, size_t size, int type) | |
+{ | |
+ struct inode *inode = dentry->d_inode; | |
+ | |
+ if (!name || strcmp(name, "") != 0) | |
+ return -EINVAL; | |
+ | |
+ if (buffer) | |
+ *((char *)buffer) = F2FS_I(inode)->i_advise; | |
+ return sizeof(char); | |
+} | |
+ | |
+static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, | |
+ const void *value, size_t size, int flags, int type) | |
+{ | |
+ struct inode *inode = dentry->d_inode; | |
+ | |
+ if (!name || strcmp(name, "") != 0) | |
+ return -EINVAL; | |
+ if (!inode_owner_or_capable(inode)) | |
+ return -EPERM; | |
+ if (value == NULL) | |
+ return -EINVAL; | |
+ | |
+ F2FS_I(inode)->i_advise = *(char *)value; | |
+ return 0; | |
+} | |
+ | |
+#ifdef CONFIG_F2FS_FS_SECURITY | |
+static int __f2fs_setxattr(struct inode *inode, int name_index, | |
+ const char *name, const void *value, size_t value_len, | |
+ struct page *ipage); | |
+static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, | |
+ void *page) | |
+{ | |
+ const struct xattr *xattr; | |
+ int err = 0; | |
+ | |
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) { | |
+ err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, | |
+ xattr->name, xattr->value, | |
+ xattr->value_len, (struct page *)page); | |
+ if (err < 0) | |
+ break; | |
+ } | |
+ return err; | |
+} | |
+ | |
+int f2fs_init_security(struct inode *inode, struct inode *dir, | |
+ const struct qstr *qstr, struct page *ipage) | |
+{ | |
+ return security_new_inode_init_security(inode, dir, qstr, | |
+ &f2fs_initxattrs, ipage); | |
+} | |
+#endif | |
+ | |
+const struct xattr_handler f2fs_xattr_user_handler = { | |
+ .prefix = XATTR_USER_PREFIX, | |
+ .flags = F2FS_XATTR_INDEX_USER, | |
+ .list = f2fs_xattr_generic_list, | |
+ .get = f2fs_xattr_generic_get, | |
+ .set = f2fs_xattr_generic_set, | |
+}; | |
+ | |
+const struct xattr_handler f2fs_xattr_trusted_handler = { | |
+ .prefix = XATTR_TRUSTED_PREFIX, | |
+ .flags = F2FS_XATTR_INDEX_TRUSTED, | |
+ .list = f2fs_xattr_generic_list, | |
+ .get = f2fs_xattr_generic_get, | |
+ .set = f2fs_xattr_generic_set, | |
+}; | |
+ | |
+const struct xattr_handler f2fs_xattr_advise_handler = { | |
+ .prefix = F2FS_SYSTEM_ADVISE_PREFIX, | |
+ .flags = F2FS_XATTR_INDEX_ADVISE, | |
+ .list = f2fs_xattr_advise_list, | |
+ .get = f2fs_xattr_advise_get, | |
+ .set = f2fs_xattr_advise_set, | |
+}; | |
+ | |
+const struct xattr_handler f2fs_xattr_security_handler = { | |
+ .prefix = XATTR_SECURITY_PREFIX, | |
+ .flags = F2FS_XATTR_INDEX_SECURITY, | |
+ .list = f2fs_xattr_generic_list, | |
+ .get = f2fs_xattr_generic_get, | |
+ .set = f2fs_xattr_generic_set, | |
+}; | |
+ | |
+static const struct xattr_handler *f2fs_xattr_handler_map[] = { | |
+ [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, | |
+#ifdef CONFIG_F2FS_FS_POSIX_ACL | |
+ [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler, | |
+ [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, | |
+#endif | |
+ [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, | |
+#ifdef CONFIG_F2FS_FS_SECURITY | |
+ [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler, | |
+#endif | |
+ [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, | |
+}; | |
+ | |
+const struct xattr_handler *f2fs_xattr_handlers[] = { | |
+ &f2fs_xattr_user_handler, | |
+#ifdef CONFIG_F2FS_FS_POSIX_ACL | |
+ &f2fs_xattr_acl_access_handler, | |
+ &f2fs_xattr_acl_default_handler, | |
+#endif | |
+ &f2fs_xattr_trusted_handler, | |
+#ifdef CONFIG_F2FS_FS_SECURITY | |
+ &f2fs_xattr_security_handler, | |
+#endif | |
+ &f2fs_xattr_advise_handler, | |
+ NULL, | |
+}; | |
+ | |
+static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) | |
+{ | |
+ const struct xattr_handler *handler = NULL; | |
+ | |
+ if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) | |
+ handler = f2fs_xattr_handler_map[name_index]; | |
+ return handler; | |
+} | |
+ | |
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, | |
+ size_t name_len, const char *name) | |
+{ | |
+ struct f2fs_xattr_entry *entry; | |
+ | |
+ list_for_each_xattr(entry, base_addr) { | |
+ if (entry->e_name_index != name_index) | |
+ continue; | |
+ if (entry->e_name_len != name_len) | |
+ continue; | |
+ if (!memcmp(entry->e_name, name, name_len)) | |
+ break; | |
+ } | |
+ return entry; | |
+} | |
+ | |
+static void *read_all_xattrs(struct inode *inode, struct page *ipage) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ struct f2fs_xattr_header *header; | |
+ size_t size = PAGE_SIZE, inline_size = 0; | |
+ void *txattr_addr; | |
+ | |
+ inline_size = inline_xattr_size(inode); | |
+ | |
+ txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); | |
+ if (!txattr_addr) | |
+ return NULL; | |
+ | |
+ /* read from inline xattr */ | |
+ if (inline_size) { | |
+ struct page *page = NULL; | |
+ void *inline_addr; | |
+ | |
+ if (ipage) { | |
+ inline_addr = inline_xattr_addr(ipage); | |
+ } else { | |
+ page = get_node_page(sbi, inode->i_ino); | |
+ if (IS_ERR(page)) | |
+ goto fail; | |
+ inline_addr = inline_xattr_addr(page); | |
+ } | |
+ memcpy(txattr_addr, inline_addr, inline_size); | |
+ f2fs_put_page(page, 1); | |
+ } | |
+ | |
+ /* read from xattr node block */ | |
+ if (F2FS_I(inode)->i_xattr_nid) { | |
+ struct page *xpage; | |
+ void *xattr_addr; | |
+ | |
+ /* The inode already has an extended attribute block. */ | |
+ xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); | |
+ if (IS_ERR(xpage)) | |
+ goto fail; | |
+ | |
+ xattr_addr = page_address(xpage); | |
+ memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); | |
+ f2fs_put_page(xpage, 1); | |
+ } | |
+ | |
+ header = XATTR_HDR(txattr_addr); | |
+ | |
+ /* never been allocated xattrs */ | |
+ if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { | |
+ header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); | |
+ header->h_refcount = cpu_to_le32(1); | |
+ } | |
+ return txattr_addr; | |
+fail: | |
+ kzfree(txattr_addr); | |
+ return NULL; | |
+} | |
+ | |
+static inline int write_all_xattrs(struct inode *inode, __u32 hsize, | |
+ void *txattr_addr, struct page *ipage) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ size_t inline_size = 0; | |
+ void *xattr_addr; | |
+ struct page *xpage; | |
+ nid_t new_nid = 0; | |
+ int err; | |
+ | |
+ inline_size = inline_xattr_size(inode); | |
+ | |
+ if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) | |
+ if (!alloc_nid(sbi, &new_nid)) | |
+ return -ENOSPC; | |
+ | |
+ /* write to inline xattr */ | |
+ if (inline_size) { | |
+ struct page *page = NULL; | |
+ void *inline_addr; | |
+ | |
+ if (ipage) { | |
+ inline_addr = inline_xattr_addr(ipage); | |
+ } else { | |
+ page = get_node_page(sbi, inode->i_ino); | |
+ if (IS_ERR(page)) { | |
+ alloc_nid_failed(sbi, new_nid); | |
+ return PTR_ERR(page); | |
+ } | |
+ inline_addr = inline_xattr_addr(page); | |
+ } | |
+ memcpy(inline_addr, txattr_addr, inline_size); | |
+ f2fs_put_page(page, 1); | |
+ | |
+ /* no need to use xattr node block */ | |
+ if (hsize <= inline_size) { | |
+ err = truncate_xattr_node(inode, ipage); | |
+ alloc_nid_failed(sbi, new_nid); | |
+ return err; | |
+ } | |
+ } | |
+ | |
+ /* write to xattr node block */ | |
+ if (F2FS_I(inode)->i_xattr_nid) { | |
+ xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); | |
+ if (IS_ERR(xpage)) { | |
+ alloc_nid_failed(sbi, new_nid); | |
+ return PTR_ERR(xpage); | |
+ } | |
+ BUG_ON(new_nid); | |
+ } else { | |
+ struct dnode_of_data dn; | |
+ set_new_dnode(&dn, inode, NULL, NULL, new_nid); | |
+ xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); | |
+ if (IS_ERR(xpage)) { | |
+ alloc_nid_failed(sbi, new_nid); | |
+ return PTR_ERR(xpage); | |
+ } | |
+ alloc_nid_done(sbi, new_nid); | |
+ } | |
+ | |
+ xattr_addr = page_address(xpage); | |
+ memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - | |
+ sizeof(struct node_footer)); | |
+ set_page_dirty(xpage); | |
+ f2fs_put_page(xpage, 1); | |
+ | |
+ /* need to checkpoint during fsync */ | |
+ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); | |
+ return 0; | |
+} | |
+ | |
+int f2fs_getxattr(struct inode *inode, int name_index, const char *name, | |
+ void *buffer, size_t buffer_size) | |
+{ | |
+ struct f2fs_xattr_entry *entry; | |
+ void *base_addr; | |
+ int error = 0; | |
+ size_t value_len, name_len; | |
+ | |
+ if (name == NULL) | |
+ return -EINVAL; | |
+ name_len = strlen(name); | |
+ | |
+ base_addr = read_all_xattrs(inode, NULL); | |
+ if (!base_addr) | |
+ return -ENOMEM; | |
+ | |
+ entry = __find_xattr(base_addr, name_index, name_len, name); | |
+ if (IS_XATTR_LAST_ENTRY(entry)) { | |
+ error = -ENODATA; | |
+ goto cleanup; | |
+ } | |
+ | |
+ value_len = le16_to_cpu(entry->e_value_size); | |
+ | |
+ if (buffer && value_len > buffer_size) { | |
+ error = -ERANGE; | |
+ goto cleanup; | |
+ } | |
+ | |
+ if (buffer) { | |
+ char *pval = entry->e_name + entry->e_name_len; | |
+ memcpy(buffer, pval, value_len); | |
+ } | |
+ error = value_len; | |
+ | |
+cleanup: | |
+ kzfree(base_addr); | |
+ return error; | |
+} | |
+ | |
+ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) | |
+{ | |
+ struct inode *inode = dentry->d_inode; | |
+ struct f2fs_xattr_entry *entry; | |
+ void *base_addr; | |
+ int error = 0; | |
+ size_t rest = buffer_size; | |
+ | |
+ base_addr = read_all_xattrs(inode, NULL); | |
+ if (!base_addr) | |
+ return -ENOMEM; | |
+ | |
+ list_for_each_xattr(entry, base_addr) { | |
+ const struct xattr_handler *handler = | |
+ f2fs_xattr_handler(entry->e_name_index); | |
+ size_t size; | |
+ | |
+ if (!handler) | |
+ continue; | |
+ | |
+ size = handler->list(dentry, buffer, rest, entry->e_name, | |
+ entry->e_name_len, handler->flags); | |
+ if (buffer && size > rest) { | |
+ error = -ERANGE; | |
+ goto cleanup; | |
+ } | |
+ | |
+ if (buffer) | |
+ buffer += size; | |
+ rest -= size; | |
+ } | |
+ error = buffer_size - rest; | |
+cleanup: | |
+ kzfree(base_addr); | |
+ return error; | |
+} | |
+ | |
+static int __f2fs_setxattr(struct inode *inode, int name_index, | |
+ const char *name, const void *value, size_t value_len, | |
+ struct page *ipage) | |
+{ | |
+ struct f2fs_inode_info *fi = F2FS_I(inode); | |
+ struct f2fs_xattr_entry *here, *last; | |
+ void *base_addr; | |
+ int found, newsize; | |
+ size_t name_len; | |
+ __u32 new_hsize; | |
+ int error = -ENOMEM; | |
+ | |
+ if (name == NULL) | |
+ return -EINVAL; | |
+ | |
+ if (value == NULL) | |
+ value_len = 0; | |
+ | |
+ name_len = strlen(name); | |
+ | |
+ if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) | |
+ return -ERANGE; | |
+ | |
+ base_addr = read_all_xattrs(inode, ipage); | |
+ if (!base_addr) | |
+ goto exit; | |
+ | |
+ /* find entry with wanted name. */ | |
+ here = __find_xattr(base_addr, name_index, name_len, name); | |
+ | |
+ found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; | |
+ last = here; | |
+ | |
+ while (!IS_XATTR_LAST_ENTRY(last)) | |
+ last = XATTR_NEXT_ENTRY(last); | |
+ | |
+ newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + | |
+ name_len + value_len); | |
+ | |
+ /* 1. Check space */ | |
+ if (value) { | |
+ int free; | |
+ /* | |
+ * If value is NULL, it is remove operation. | |
+ * In case of update operation, we caculate free. | |
+ */ | |
+ free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); | |
+ if (found) | |
+ free = free - ENTRY_SIZE(here); | |
+ | |
+ if (free < newsize) { | |
+ error = -ENOSPC; | |
+ goto exit; | |
+ } | |
+ } | |
+ | |
+ /* 2. Remove old entry */ | |
+ if (found) { | |
+ /* | |
+ * If entry is found, remove old entry. | |
+ * If not found, remove operation is not needed. | |
+ */ | |
+ struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here); | |
+ int oldsize = ENTRY_SIZE(here); | |
+ | |
+ memmove(here, next, (char *)last - (char *)next); | |
+ last = (struct f2fs_xattr_entry *)((char *)last - oldsize); | |
+ memset(last, 0, oldsize); | |
+ } | |
+ | |
+ new_hsize = (char *)last - (char *)base_addr; | |
+ | |
+ /* 3. Write new entry */ | |
+ if (value) { | |
+ char *pval; | |
+ /* | |
+ * Before we come here, old entry is removed. | |
+ * We just write new entry. | |
+ */ | |
+ memset(last, 0, newsize); | |
+ last->e_name_index = name_index; | |
+ last->e_name_len = name_len; | |
+ memcpy(last->e_name, name, name_len); | |
+ pval = last->e_name + name_len; | |
+ memcpy(pval, value, value_len); | |
+ last->e_value_size = cpu_to_le16(value_len); | |
+ new_hsize += newsize; | |
+ } | |
+ | |
+ error = write_all_xattrs(inode, new_hsize, base_addr, ipage); | |
+ if (error) | |
+ goto exit; | |
+ | |
+ if (is_inode_flag_set(fi, FI_ACL_MODE)) { | |
+ inode->i_mode = fi->i_acl_mode; | |
+ inode->i_ctime = CURRENT_TIME; | |
+ clear_inode_flag(fi, FI_ACL_MODE); | |
+ } | |
+ | |
+ if (ipage) | |
+ update_inode(inode, ipage); | |
+ else | |
+ update_inode_page(inode); | |
+exit: | |
+ kzfree(base_addr); | |
+ return error; | |
+} | |
+ | |
+int f2fs_setxattr(struct inode *inode, int name_index, const char *name, | |
+ const void *value, size_t value_len, struct page *ipage) | |
+{ | |
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | |
+ int ilock; | |
+ int err; | |
+ | |
+ f2fs_balance_fs(sbi); | |
+ | |
+ ilock = mutex_lock_op(sbi); | |
+ | |
+ err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); | |
+ | |
+ mutex_unlock_op(sbi, ilock); | |
+ | |
+ return err; | |
+} | |
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h | |
new file mode 100644 | |
index 0000000..02a08fb | |
--- /dev/null | |
+++ b/fs/f2fs/xattr.h | |
@@ -0,0 +1,152 @@ | |
+/* | |
+ * fs/f2fs/xattr.h | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * Portions of this code from linux/fs/ext2/xattr.h | |
+ * | |
+ * On-disk format of extended attributes for the ext2 filesystem. | |
+ * | |
+ * (C) 2001 Andreas Gruenbacher, <[email protected]> | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#ifndef __F2FS_XATTR_H__ | |
+#define __F2FS_XATTR_H__ | |
+ | |
+#include <linux/init.h> | |
+#include <linux/xattr.h> | |
+ | |
+/* Magic value in attribute blocks */ | |
+#define F2FS_XATTR_MAGIC 0xF2F52011 | |
+ | |
+/* Maximum number of references to one attribute block */ | |
+#define F2FS_XATTR_REFCOUNT_MAX 1024 | |
+ | |
+/* Name indexes */ | |
+#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise" | |
+#define F2FS_XATTR_INDEX_USER 1 | |
+#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2 | |
+#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3 | |
+#define F2FS_XATTR_INDEX_TRUSTED 4 | |
+#define F2FS_XATTR_INDEX_LUSTRE 5 | |
+#define F2FS_XATTR_INDEX_SECURITY 6 | |
+#define F2FS_XATTR_INDEX_ADVISE 7 | |
+ | |
+struct f2fs_xattr_header { | |
+ __le32 h_magic; /* magic number for identification */ | |
+ __le32 h_refcount; /* reference count */ | |
+ __u32 h_reserved[4]; /* zero right now */ | |
+}; | |
+ | |
+struct f2fs_xattr_entry { | |
+ __u8 e_name_index; | |
+ __u8 e_name_len; | |
+ __le16 e_value_size; /* size of attribute value */ | |
+ char e_name[0]; /* attribute name */ | |
+}; | |
+ | |
+#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) | |
+#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr)) | |
+#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) | |
+#define XATTR_ROUND (3) | |
+ | |
+#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) | |
+ | |
+#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \ | |
+ entry->e_name_len + le16_to_cpu(entry->e_value_size))) | |
+ | |
+#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\ | |
+ ENTRY_SIZE(entry))) | |
+ | |
+#define IS_XATTR_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) | |
+ | |
+#define list_for_each_xattr(entry, addr) \ | |
+ for (entry = XATTR_FIRST_ENTRY(addr);\ | |
+ !IS_XATTR_LAST_ENTRY(entry);\ | |
+ entry = XATTR_NEXT_ENTRY(entry)) | |
+ | |
+#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \ | |
+ sizeof(struct node_footer) - sizeof(__u32)) | |
+ | |
+#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \ | |
+ sizeof(struct f2fs_xattr_header) - \ | |
+ sizeof(struct f2fs_xattr_entry)) | |
+ | |
+/* | |
+ * On-disk structure of f2fs_xattr | |
+ * We use inline xattrs space + 1 block for xattr. | |
+ * | |
+ * +--------------------+ | |
+ * | f2fs_xattr_header | | |
+ * | | | |
+ * +--------------------+ | |
+ * | f2fs_xattr_entry | | |
+ * | .e_name_index = 1 | | |
+ * | .e_name_len = 3 | | |
+ * | .e_value_size = 14 | | |
+ * | .e_name = "foo" | | |
+ * | "value_of_xattr" |<- value_offs = e_name + e_name_len | |
+ * +--------------------+ | |
+ * | f2fs_xattr_entry | | |
+ * | .e_name_index = 4 | | |
+ * | .e_name = "bar" | | |
+ * +--------------------+ | |
+ * | | | |
+ * | Free | | |
+ * | | | |
+ * +--------------------+<- MIN_OFFSET | |
+ * | node_footer | | |
+ * | (nid, ino, offset) | | |
+ * +--------------------+ | |
+ * | |
+ **/ | |
+ | |
+#ifdef CONFIG_F2FS_FS_XATTR | |
+extern const struct xattr_handler f2fs_xattr_user_handler; | |
+extern const struct xattr_handler f2fs_xattr_trusted_handler; | |
+extern const struct xattr_handler f2fs_xattr_acl_access_handler; | |
+extern const struct xattr_handler f2fs_xattr_acl_default_handler; | |
+extern const struct xattr_handler f2fs_xattr_advise_handler; | |
+extern const struct xattr_handler f2fs_xattr_security_handler; | |
+ | |
+extern const struct xattr_handler *f2fs_xattr_handlers[]; | |
+ | |
+extern int f2fs_setxattr(struct inode *, int, const char *, | |
+ const void *, size_t, struct page *); | |
+extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); | |
+extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); | |
+#else | |
+ | |
+#define f2fs_xattr_handlers NULL | |
+static inline int f2fs_setxattr(struct inode *inode, int name_index, | |
+ const char *name, const void *value, size_t value_len) | |
+{ | |
+ return -EOPNOTSUPP; | |
+} | |
+static inline int f2fs_getxattr(struct inode *inode, int name_index, | |
+ const char *name, void *buffer, size_t buffer_size) | |
+{ | |
+ return -EOPNOTSUPP; | |
+} | |
+static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, | |
+ size_t buffer_size) | |
+{ | |
+ return -EOPNOTSUPP; | |
+} | |
+#endif | |
+ | |
+#ifdef CONFIG_F2FS_FS_SECURITY | |
+extern int f2fs_init_security(struct inode *, struct inode *, | |
+ const struct qstr *, struct page *); | |
+#else | |
+static inline int f2fs_init_security(struct inode *inode, struct inode *dir, | |
+ const struct qstr *qstr, struct page *ipage) | |
+{ | |
+ return 0; | |
+} | |
+#endif | |
+#endif /* __F2FS_XATTR_H__ */ | |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h | |
index 713c7c6..9327888 100644 | |
--- a/include/linux/dcache.h | |
+++ b/include/linux/dcache.h | |
@@ -239,6 +239,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); | |
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); | |
extern struct dentry * d_splice_alias(struct inode *, struct dentry *); | |
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); | |
+extern struct dentry * d_find_any_alias(struct inode *inode); | |
extern struct dentry * d_obtain_alias(struct inode *); | |
extern void shrink_dcache_sb(struct super_block *); | |
extern void shrink_dcache_parent(struct dentry *); | |
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h | |
new file mode 100644 | |
index 0000000..bb942f6 | |
--- /dev/null | |
+++ b/include/linux/f2fs_fs.h | |
@@ -0,0 +1,424 @@ | |
+/** | |
+ * include/linux/f2fs_fs.h | |
+ * | |
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
+ * http://www.samsung.com/ | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+#ifndef _LINUX_F2FS_FS_H | |
+#define _LINUX_F2FS_FS_H | |
+ | |
+#include <linux/pagemap.h> | |
+#include <linux/types.h> | |
+ | |
+#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ | |
+#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ | |
+#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ | |
+#define F2FS_BLKSIZE 4096 /* support only 4KB block */ | |
+#define F2FS_MAX_EXTENSION 64 /* # of extension entries */ | |
+ | |
+#define NULL_ADDR ((block_t)0) /* used as block_t addresses */ | |
+#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ | |
+ | |
+#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) | |
+#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) | |
+#define F2FS_META_INO(sbi) (sbi->meta_ino_num) | |
+ | |
+/* This flag is used by node and meta inodes, and by recovery */ | |
+#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) | |
+ | |
+/* | |
+ * For further optimization on multi-head logs, on-disk layout supports maximum | |
+ * 16 logs by default. The number, 16, is expected to cover all the cases | |
+ * enoughly. The implementaion currently uses no more than 6 logs. | |
+ * Half the logs are used for nodes, and the other half are used for data. | |
+ */ | |
+#define MAX_ACTIVE_LOGS 16 | |
+#define MAX_ACTIVE_NODE_LOGS 8 | |
+#define MAX_ACTIVE_DATA_LOGS 8 | |
+ | |
+/* | |
+ * For superblock | |
+ */ | |
+struct f2fs_super_block { | |
+ __le32 magic; /* Magic Number */ | |
+ __le16 major_ver; /* Major Version */ | |
+ __le16 minor_ver; /* Minor Version */ | |
+ __le32 log_sectorsize; /* log2 sector size in bytes */ | |
+ __le32 log_sectors_per_block; /* log2 # of sectors per block */ | |
+ __le32 log_blocksize; /* log2 block size in bytes */ | |
+ __le32 log_blocks_per_seg; /* log2 # of blocks per segment */ | |
+ __le32 segs_per_sec; /* # of segments per section */ | |
+ __le32 secs_per_zone; /* # of sections per zone */ | |
+ __le32 checksum_offset; /* checksum offset inside super block */ | |
+ __le64 block_count; /* total # of user blocks */ | |
+ __le32 section_count; /* total # of sections */ | |
+ __le32 segment_count; /* total # of segments */ | |
+ __le32 segment_count_ckpt; /* # of segments for checkpoint */ | |
+ __le32 segment_count_sit; /* # of segments for SIT */ | |
+ __le32 segment_count_nat; /* # of segments for NAT */ | |
+ __le32 segment_count_ssa; /* # of segments for SSA */ | |
+ __le32 segment_count_main; /* # of segments for main area */ | |
+ __le32 segment0_blkaddr; /* start block address of segment 0 */ | |
+ __le32 cp_blkaddr; /* start block address of checkpoint */ | |
+ __le32 sit_blkaddr; /* start block address of SIT */ | |
+ __le32 nat_blkaddr; /* start block address of NAT */ | |
+ __le32 ssa_blkaddr; /* start block address of SSA */ | |
+ __le32 main_blkaddr; /* start block address of main area */ | |
+ __le32 root_ino; /* root inode number */ | |
+ __le32 node_ino; /* node inode number */ | |
+ __le32 meta_ino; /* meta inode number */ | |
+ __u8 uuid[16]; /* 128-bit uuid for volume */ | |
+ __le16 volume_name[512]; /* volume name */ | |
+ __le32 extension_count; /* # of extensions below */ | |
+ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ | |
+} __packed; | |
+ | |
+/* | |
+ * For checkpoint | |
+ */ | |
+#define CP_ERROR_FLAG 0x00000008 | |
+#define CP_COMPACT_SUM_FLAG 0x00000004 | |
+#define CP_ORPHAN_PRESENT_FLAG 0x00000002 | |
+#define CP_UMOUNT_FLAG 0x00000001 | |
+ | |
+struct f2fs_checkpoint { | |
+ __le64 checkpoint_ver; /* checkpoint block version number */ | |
+ __le64 user_block_count; /* # of user blocks */ | |
+ __le64 valid_block_count; /* # of valid blocks in main area */ | |
+ __le32 rsvd_segment_count; /* # of reserved segments for gc */ | |
+ __le32 overprov_segment_count; /* # of overprovision segments */ | |
+ __le32 free_segment_count; /* # of free segments in main area */ | |
+ | |
+ /* information of current node segments */ | |
+ __le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS]; | |
+ __le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS]; | |
+ /* information of current data segments */ | |
+ __le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS]; | |
+ __le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS]; | |
+ __le32 ckpt_flags; /* Flags : umount and journal_present */ | |
+ __le32 cp_pack_total_block_count; /* total # of one cp pack */ | |
+ __le32 cp_pack_start_sum; /* start block number of data summary */ | |
+ __le32 valid_node_count; /* Total number of valid nodes */ | |
+ __le32 valid_inode_count; /* Total number of valid inodes */ | |
+ __le32 next_free_nid; /* Next free node number */ | |
+ __le32 sit_ver_bitmap_bytesize; /* Default value 64 */ | |
+ __le32 nat_ver_bitmap_bytesize; /* Default value 256 */ | |
+ __le32 checksum_offset; /* checksum offset inside cp block */ | |
+ __le64 elapsed_time; /* mounted time */ | |
+ /* allocation type of current segment */ | |
+ unsigned char alloc_type[MAX_ACTIVE_LOGS]; | |
+ | |
+ /* SIT and NAT version bitmap */ | |
+ unsigned char sit_nat_version_bitmap[1]; | |
+} __packed; | |
+ | |
+/* | |
+ * For orphan inode management | |
+ */ | |
+#define F2FS_ORPHANS_PER_BLOCK 1020 | |
+ | |
+struct f2fs_orphan_block { | |
+ __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */ | |
+ __le32 reserved; /* reserved */ | |
+ __le16 blk_addr; /* block index in current CP */ | |
+ __le16 blk_count; /* Number of orphan inode blocks in CP */ | |
+ __le32 entry_count; /* Total number of orphan nodes in current CP */ | |
+ __le32 check_sum; /* CRC32 for orphan inode block */ | |
+} __packed; | |
+ | |
+/* | |
+ * For NODE structure | |
+ */ | |
+struct f2fs_extent { | |
+ __le32 fofs; /* start file offset of the extent */ | |
+ __le32 blk_addr; /* start block address of the extent */ | |
+ __le32 len; /* lengh of the extent */ | |
+} __packed; | |
+ | |
+#define F2FS_NAME_LEN 255 | |
+#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ | |
+#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ | |
+#define ADDRS_PER_INODE(fi) addrs_per_inode(fi) | |
+#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ | |
+#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ | |
+ | |
+#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) | |
+#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) | |
+#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) | |
+#define NODE_IND2_BLOCK (DEF_ADDRS_PER_INODE + 4) | |
+#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5) | |
+ | |
+#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ | |
+ | |
+struct f2fs_inode { | |
+ __le16 i_mode; /* file mode */ | |
+ __u8 i_advise; /* file hints */ | |
+ __u8 i_inline; /* file inline flags */ | |
+ __le32 i_uid; /* user ID */ | |
+ __le32 i_gid; /* group ID */ | |
+ __le32 i_links; /* links count */ | |
+ __le64 i_size; /* file size in bytes */ | |
+ __le64 i_blocks; /* file size in blocks */ | |
+ __le64 i_atime; /* access time */ | |
+ __le64 i_ctime; /* change time */ | |
+ __le64 i_mtime; /* modification time */ | |
+ __le32 i_atime_nsec; /* access time in nano scale */ | |
+ __le32 i_ctime_nsec; /* change time in nano scale */ | |
+ __le32 i_mtime_nsec; /* modification time in nano scale */ | |
+ __le32 i_generation; /* file version (for NFS) */ | |
+ __le32 i_current_depth; /* only for directory depth */ | |
+ __le32 i_xattr_nid; /* nid to save xattr */ | |
+ __le32 i_flags; /* file attributes */ | |
+ __le32 i_pino; /* parent inode number */ | |
+ __le32 i_namelen; /* file name length */ | |
+ __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ | |
+ __u8 i_reserved2; /* for backward compatibility */ | |
+ | |
+ struct f2fs_extent i_ext; /* caching a largest extent */ | |
+ | |
+ __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ | |
+ | |
+ __le32 i_nid[5]; /* direct(2), indirect(2), | |
+ double_indirect(1) node id */ | |
+} __packed; | |
+ | |
+struct direct_node { | |
+ __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */ | |
+} __packed; | |
+ | |
+struct indirect_node { | |
+ __le32 nid[NIDS_PER_BLOCK]; /* array of data block address */ | |
+} __packed; | |
+ | |
+enum { | |
+ COLD_BIT_SHIFT = 0, | |
+ FSYNC_BIT_SHIFT, | |
+ DENT_BIT_SHIFT, | |
+ OFFSET_BIT_SHIFT | |
+}; | |
+ | |
+struct node_footer { | |
+ __le32 nid; /* node id */ | |
+ __le32 ino; /* inode nunmber */ | |
+ __le32 flag; /* include cold/fsync/dentry marks and offset */ | |
+ __le64 cp_ver; /* checkpoint version */ | |
+ __le32 next_blkaddr; /* next node page block address */ | |
+} __packed; | |
+ | |
+struct f2fs_node { | |
+ /* can be one of three types: inode, direct, and indirect types */ | |
+ union { | |
+ struct f2fs_inode i; | |
+ struct direct_node dn; | |
+ struct indirect_node in; | |
+ }; | |
+ struct node_footer footer; | |
+} __packed; | |
+ | |
+/* | |
+ * For NAT entries | |
+ */ | |
+#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry)) | |
+ | |
+struct f2fs_nat_entry { | |
+ __u8 version; /* latest version of cached nat entry */ | |
+ __le32 ino; /* inode number */ | |
+ __le32 block_addr; /* block address */ | |
+} __packed; | |
+ | |
+struct f2fs_nat_block { | |
+ struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK]; | |
+} __packed; | |
+ | |
+/* | |
+ * For SIT entries | |
+ * | |
+ * Each segment is 2MB in size by default so that a bitmap for validity of | |
+ * there-in blocks should occupy 64 bytes, 512 bits. | |
+ * Not allow to change this. | |
+ */ | |
+#define SIT_VBLOCK_MAP_SIZE 64 | |
+#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) | |
+ | |
+/* | |
+ * Note that f2fs_sit_entry->vblocks has the following bit-field information. | |
+ * [15:10] : allocation type such as CURSEG_XXXX_TYPE | |
+ * [9:0] : valid block count | |
+ */ | |
+#define SIT_VBLOCKS_SHIFT 10 | |
+#define SIT_VBLOCKS_MASK ((1 << SIT_VBLOCKS_SHIFT) - 1) | |
+#define GET_SIT_VBLOCKS(raw_sit) \ | |
+ (le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK) | |
+#define GET_SIT_TYPE(raw_sit) \ | |
+ ((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK) \ | |
+ >> SIT_VBLOCKS_SHIFT) | |
+ | |
+struct f2fs_sit_entry { | |
+ __le16 vblocks; /* reference above */ | |
+ __u8 valid_map[SIT_VBLOCK_MAP_SIZE]; /* bitmap for valid blocks */ | |
+ __le64 mtime; /* segment age for cleaning */ | |
+} __packed; | |
+ | |
+struct f2fs_sit_block { | |
+ struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK]; | |
+} __packed; | |
+ | |
+/* | |
+ * For segment summary | |
+ * | |
+ * One summary block contains exactly 512 summary entries, which represents | |
+ * exactly 2MB segment by default. Not allow to change the basic units. | |
+ * | |
+ * NOTE: For initializing fields, you must use set_summary | |
+ * | |
+ * - If data page, nid represents dnode's nid | |
+ * - If node page, nid represents the node page's nid. | |
+ * | |
+ * The ofs_in_node is used by only data page. It represents offset | |
+ * from node's page's beginning to get a data block address. | |
+ * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) | |
+ */ | |
+#define ENTRIES_IN_SUM 512 | |
+#define SUMMARY_SIZE (7) /* sizeof(struct summary) */ | |
+#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ | |
+#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) | |
+ | |
+/* a summary entry for a 4KB-sized block in a segment */ | |
+struct f2fs_summary { | |
+ __le32 nid; /* parent node id */ | |
+ union { | |
+ __u8 reserved[3]; | |
+ struct { | |
+ __u8 version; /* node version number */ | |
+ __le16 ofs_in_node; /* block index in parent node */ | |
+ } __packed; | |
+ }; | |
+} __packed; | |
+ | |
+/* summary block type, node or data, is stored to the summary_footer */ | |
+#define SUM_TYPE_NODE (1) | |
+#define SUM_TYPE_DATA (0) | |
+ | |
+struct summary_footer { | |
+ unsigned char entry_type; /* SUM_TYPE_XXX */ | |
+ __u32 check_sum; /* summary checksum */ | |
+} __packed; | |
+ | |
+#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\ | |
+ SUM_ENTRY_SIZE) | |
+#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ | |
+ sizeof(struct nat_journal_entry)) | |
+#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ | |
+ sizeof(struct nat_journal_entry)) | |
+#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ | |
+ sizeof(struct sit_journal_entry)) | |
+#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ | |
+ sizeof(struct sit_journal_entry)) | |
+/* | |
+ * frequently updated NAT/SIT entries can be stored in the spare area in | |
+ * summary blocks | |
+ */ | |
+enum { | |
+ NAT_JOURNAL = 0, | |
+ SIT_JOURNAL | |
+}; | |
+ | |
+struct nat_journal_entry { | |
+ __le32 nid; | |
+ struct f2fs_nat_entry ne; | |
+} __packed; | |
+ | |
+struct nat_journal { | |
+ struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES]; | |
+ __u8 reserved[NAT_JOURNAL_RESERVED]; | |
+} __packed; | |
+ | |
+struct sit_journal_entry { | |
+ __le32 segno; | |
+ struct f2fs_sit_entry se; | |
+} __packed; | |
+ | |
+struct sit_journal { | |
+ struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES]; | |
+ __u8 reserved[SIT_JOURNAL_RESERVED]; | |
+} __packed; | |
+ | |
+/* 4KB-sized summary block structure */ | |
+struct f2fs_summary_block { | |
+ struct f2fs_summary entries[ENTRIES_IN_SUM]; | |
+ union { | |
+ __le16 n_nats; | |
+ __le16 n_sits; | |
+ }; | |
+ /* spare area is used by NAT or SIT journals */ | |
+ union { | |
+ struct nat_journal nat_j; | |
+ struct sit_journal sit_j; | |
+ }; | |
+ struct summary_footer footer; | |
+} __packed; | |
+ | |
+/* | |
+ * For directory operations | |
+ */ | |
+#define F2FS_DOT_HASH 0 | |
+#define F2FS_DDOT_HASH F2FS_DOT_HASH | |
+#define F2FS_MAX_HASH (~((0x3ULL) << 62)) | |
+#define F2FS_HASH_COL_BIT ((0x1ULL) << 63) | |
+ | |
+typedef __le32 f2fs_hash_t; | |
+ | |
+/* One directory entry slot covers 8bytes-long file name */ | |
+#define F2FS_SLOT_LEN 8 | |
+#define F2FS_SLOT_LEN_BITS 3 | |
+ | |
+#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) | |
+ | |
+/* the number of dentry in a block */ | |
+#define NR_DENTRY_IN_BLOCK 214 | |
+ | |
+/* MAX level for dir lookup */ | |
+#define MAX_DIR_HASH_DEPTH 63 | |
+ | |
+#define SIZE_OF_DIR_ENTRY 11 /* by byte */ | |
+#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ | |
+ BITS_PER_BYTE) | |
+#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ | |
+ F2FS_SLOT_LEN) * \ | |
+ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) | |
+ | |
+/* One directory entry slot representing F2FS_SLOT_LEN-sized file name */ | |
+struct f2fs_dir_entry { | |
+ __le32 hash_code; /* hash code of file name */ | |
+ __le32 ino; /* inode number */ | |
+ __le16 name_len; /* lengh of file name */ | |
+ __u8 file_type; /* file type */ | |
+} __packed; | |
+ | |
+/* 4KB-sized directory entry block */ | |
+struct f2fs_dentry_block { | |
+ /* validity bitmap for directory entries in each block */ | |
+ __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; | |
+ __u8 reserved[SIZE_OF_RESERVED]; | |
+ struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; | |
+ __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; | |
+} __packed; | |
+ | |
+/* file types used in inode_info->flags */ | |
+enum { | |
+ F2FS_FT_UNKNOWN, | |
+ F2FS_FT_REG_FILE, | |
+ F2FS_FT_DIR, | |
+ F2FS_FT_CHRDEV, | |
+ F2FS_FT_BLKDEV, | |
+ F2FS_FT_FIFO, | |
+ F2FS_FT_SOCK, | |
+ F2FS_FT_SYMLINK, | |
+ F2FS_FT_MAX | |
+}; | |
+ | |
+#endif /* _LINUX_F2FS_FS_H */ | |
diff --git a/include/linux/fs.h b/include/linux/fs.h | |
index cf7bc25..86f027d 100644 | |
--- a/include/linux/fs.h | |
+++ b/include/linux/fs.h | |
@@ -1744,6 +1744,19 @@ static inline void mark_inode_dirty_sync(struct inode *inode) | |
} | |
/** | |
+ * set_nlink - directly set an inode's link count | |
+ * @inode: inode | |
+ * @nlink: new nlink (should be non-zero) | |
+ * | |
+ * This is a low-level filesystem helper to replace any | |
+ * direct filesystem manipulation of i_nlink. | |
+ */ | |
+static inline void set_nlink(struct inode *inode, unsigned int nlink) | |
+{ | |
+ inode->i_nlink = nlink; | |
+} | |
+ | |
+/** | |
* inc_nlink - directly increment an inode's link count | |
* @inode: inode | |
* | |
diff --git a/include/linux/magic.h b/include/linux/magic.h | |
index 1e5df2a..2616b54 100644 | |
--- a/include/linux/magic.h | |
+++ b/include/linux/magic.h | |
@@ -24,6 +24,7 @@ | |
#define EXT4_SUPER_MAGIC 0xEF53 | |
#define BTRFS_SUPER_MAGIC 0x9123683E | |
#define NILFS_SUPER_MAGIC 0x3434 | |
+#define F2FS_SUPER_MAGIC 0xF2F52010 | |
#define HPFS_SUPER_MAGIC 0xf995e849 | |
#define ISOFS_SUPER_MAGIC 0x9660 | |
#define JFFS2_SUPER_MAGIC 0x72b6 | |
diff --git a/include/linux/security.h b/include/linux/security.h | |
index 95a6d8e..7de9c15 100644 | |
--- a/include/linux/security.h | |
+++ b/include/linux/security.h | |
@@ -36,6 +36,7 @@ | |
#include <linux/key.h> | |
#include <linux/xfrm.h> | |
#include <linux/slab.h> | |
+#include <linux/xattr.h> | |
#include <net/flow.h> | |
/* Maximum number of letters for an LSM name string */ | |
@@ -147,6 +148,10 @@ extern int mmap_min_addr_handler(struct ctl_table *table, int write, | |
void __user *buffer, size_t *lenp, loff_t *ppos); | |
#endif | |
+/* security_inode_init_security callback function to write xattrs */ | |
+typedef int (*initxattrs) (struct inode *inode, | |
+ const struct xattr *xattr_array, void *fs_data); | |
+ | |
#ifdef CONFIG_SECURITY | |
struct security_mnt_opts { | |
@@ -1715,6 +1720,9 @@ void security_inode_free(struct inode *inode); | |
int security_inode_init_security(struct inode *inode, struct inode *dir, | |
const struct qstr *qstr, char **name, | |
void **value, size_t *len); | |
+int security_new_inode_init_security(struct inode *inode, struct inode *dir, | |
+ const struct qstr *qstr, | |
+ initxattrs initxattrs, void *fs_data); | |
int security_inode_create(struct inode *dir, struct dentry *dentry, int mode); | |
int security_inode_link(struct dentry *old_dentry, struct inode *dir, | |
struct dentry *new_dentry); | |
@@ -2067,7 +2075,16 @@ static inline int security_inode_init_security(struct inode *inode, | |
void **value, | |
size_t *len) | |
{ | |
- return -EOPNOTSUPP; | |
+ return 0; | |
+} | |
+ | |
+static inline int security_new_inode_init_security(struct inode *inode, | |
+ struct inode *dir, | |
+ const struct qstr *qstr, | |
+ initxattrs initxattrs, | |
+ void *fs_data) | |
+{ | |
+ return 0; | |
} | |
static inline int security_inode_create(struct inode *dir, | |
diff --git a/include/linux/xattr.h b/include/linux/xattr.h | |
index aed54c5..7a37866 100644 | |
--- a/include/linux/xattr.h | |
+++ b/include/linux/xattr.h | |
@@ -67,6 +67,12 @@ struct xattr_handler { | |
size_t size, int flags, int handler_flags); | |
}; | |
+struct xattr { | |
+ char *name; | |
+ void *value; | |
+ size_t value_len; | |
+}; | |
+ | |
ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); | |
ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); | |
ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); | |
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h | |
new file mode 100644 | |
index 0000000..52ae548 | |
--- /dev/null | |
+++ b/include/trace/events/f2fs.h | |
@@ -0,0 +1,682 @@ | |
+#undef TRACE_SYSTEM | |
+#define TRACE_SYSTEM f2fs | |
+ | |
+#if !defined(_TRACE_F2FS_H) || defined(TRACE_HEADER_MULTI_READ) | |
+#define _TRACE_F2FS_H | |
+ | |
+#include <linux/tracepoint.h> | |
+ | |
+#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) | |
+#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino | |
+ | |
+#define show_block_type(type) \ | |
+ __print_symbolic(type, \ | |
+ { NODE, "NODE" }, \ | |
+ { DATA, "DATA" }, \ | |
+ { META, "META" }, \ | |
+ { META_FLUSH, "META_FLUSH" }) | |
+ | |
+#define show_bio_type(type) \ | |
+ __print_symbolic(type, \ | |
+ { READ, "READ" }, \ | |
+ { READA, "READAHEAD" }, \ | |
+ { READ_SYNC, "READ_SYNC" }, \ | |
+ { WRITE, "WRITE" }, \ | |
+ { WRITE_SYNC, "WRITE_SYNC" }, \ | |
+ { WRITE_FLUSH, "WRITE_FLUSH" }, \ | |
+ { WRITE_FUA, "WRITE_FUA" }) | |
+ | |
+#define show_data_type(type) \ | |
+ __print_symbolic(type, \ | |
+ { CURSEG_HOT_DATA, "Hot DATA" }, \ | |
+ { CURSEG_WARM_DATA, "Warm DATA" }, \ | |
+ { CURSEG_COLD_DATA, "Cold DATA" }, \ | |
+ { CURSEG_HOT_NODE, "Hot NODE" }, \ | |
+ { CURSEG_WARM_NODE, "Warm NODE" }, \ | |
+ { CURSEG_COLD_NODE, "Cold NODE" }, \ | |
+ { NO_CHECK_TYPE, "No TYPE" }) | |
+ | |
+#define show_gc_type(type) \ | |
+ __print_symbolic(type, \ | |
+ { FG_GC, "Foreground GC" }, \ | |
+ { BG_GC, "Background GC" }) | |
+ | |
+#define show_alloc_mode(type) \ | |
+ __print_symbolic(type, \ | |
+ { LFS, "LFS-mode" }, \ | |
+ { SSR, "SSR-mode" }) | |
+ | |
+#define show_victim_policy(type) \ | |
+ __print_symbolic(type, \ | |
+ { GC_GREEDY, "Greedy" }, \ | |
+ { GC_CB, "Cost-Benefit" }) | |
+ | |
+struct victim_sel_policy; | |
+ | |
+DECLARE_EVENT_CLASS(f2fs__inode, | |
+ | |
+ TP_PROTO(struct inode *inode), | |
+ | |
+ TP_ARGS(inode), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(ino_t, pino) | |
+ __field(umode_t, mode) | |
+ __field(loff_t, size) | |
+ __field(unsigned int, nlink) | |
+ __field(blkcnt_t, blocks) | |
+ __field(__u8, advise) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = inode->i_sb->s_dev; | |
+ __entry->ino = inode->i_ino; | |
+ __entry->pino = F2FS_I(inode)->i_pino; | |
+ __entry->mode = inode->i_mode; | |
+ __entry->nlink = inode->i_nlink; | |
+ __entry->size = inode->i_size; | |
+ __entry->blocks = inode->i_blocks; | |
+ __entry->advise = F2FS_I(inode)->i_advise; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, pino = %lu, i_mode = 0x%hx, " | |
+ "i_size = %lld, i_nlink = %u, i_blocks = %llu, i_advise = 0x%x", | |
+ show_dev_ino(__entry), | |
+ (unsigned long)__entry->pino, | |
+ __entry->mode, | |
+ __entry->size, | |
+ (unsigned int)__entry->nlink, | |
+ (unsigned long long)__entry->blocks, | |
+ (unsigned char)__entry->advise) | |
+); | |
+ | |
+DECLARE_EVENT_CLASS(f2fs__inode_exit, | |
+ | |
+ TP_PROTO(struct inode *inode, int ret), | |
+ | |
+ TP_ARGS(inode, ret), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(int, ret) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = inode->i_sb->s_dev; | |
+ __entry->ino = inode->i_ino; | |
+ __entry->ret = ret; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, ret = %d", | |
+ show_dev_ino(__entry), | |
+ __entry->ret) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, | |
+ | |
+ TP_PROTO(struct inode *inode), | |
+ | |
+ TP_ARGS(inode) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_sync_file_exit, | |
+ | |
+ TP_PROTO(struct inode *inode, bool need_cp, int datasync, int ret), | |
+ | |
+ TP_ARGS(inode, need_cp, datasync, ret), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(bool, need_cp) | |
+ __field(int, datasync) | |
+ __field(int, ret) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = inode->i_sb->s_dev; | |
+ __entry->ino = inode->i_ino; | |
+ __entry->need_cp = need_cp; | |
+ __entry->datasync = datasync; | |
+ __entry->ret = ret; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, " | |
+ "datasync = %d, ret = %d", | |
+ show_dev_ino(__entry), | |
+ __entry->need_cp ? "needed" : "not needed", | |
+ __entry->datasync, | |
+ __entry->ret) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_sync_fs, | |
+ | |
+ TP_PROTO(struct super_block *sb, int wait), | |
+ | |
+ TP_ARGS(sb, wait), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(int, dirty) | |
+ __field(int, wait) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = sb->s_dev; | |
+ __entry->dirty = F2FS_SB(sb)->s_dirty; | |
+ __entry->wait = wait; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), superblock is %s, wait = %d", | |
+ show_dev(__entry), | |
+ __entry->dirty ? "dirty" : "not dirty", | |
+ __entry->wait) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__inode, f2fs_iget, | |
+ | |
+ TP_PROTO(struct inode *inode), | |
+ | |
+ TP_ARGS(inode) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__inode_exit, f2fs_iget_exit, | |
+ | |
+ TP_PROTO(struct inode *inode, int ret), | |
+ | |
+ TP_ARGS(inode, ret) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__inode, f2fs_evict_inode, | |
+ | |
+ TP_PROTO(struct inode *inode), | |
+ | |
+ TP_ARGS(inode) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__inode_exit, f2fs_new_inode, | |
+ | |
+ TP_PROTO(struct inode *inode, int ret), | |
+ | |
+ TP_ARGS(inode, ret) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_unlink_enter, | |
+ | |
+ TP_PROTO(struct inode *dir, struct dentry *dentry), | |
+ | |
+ TP_ARGS(dir, dentry), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(loff_t, size) | |
+ __field(blkcnt_t, blocks) | |
+ __field(const char *, name) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = dir->i_sb->s_dev; | |
+ __entry->ino = dir->i_ino; | |
+ __entry->size = dir->i_size; | |
+ __entry->blocks = dir->i_blocks; | |
+ __entry->name = dentry->d_name.name; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), dir ino = %lu, i_size = %lld, " | |
+ "i_blocks = %llu, name = %s", | |
+ show_dev_ino(__entry), | |
+ __entry->size, | |
+ (unsigned long long)__entry->blocks, | |
+ __entry->name) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit, | |
+ | |
+ TP_PROTO(struct inode *inode, int ret), | |
+ | |
+ TP_ARGS(inode, ret) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__inode, f2fs_truncate, | |
+ | |
+ TP_PROTO(struct inode *inode), | |
+ | |
+ TP_ARGS(inode) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_truncate_data_blocks_range, | |
+ | |
+ TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs, int free), | |
+ | |
+ TP_ARGS(inode, nid, ofs, free), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(nid_t, nid) | |
+ __field(unsigned int, ofs) | |
+ __field(int, free) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = inode->i_sb->s_dev; | |
+ __entry->ino = inode->i_ino; | |
+ __entry->nid = nid; | |
+ __entry->ofs = ofs; | |
+ __entry->free = free; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, nid = %u, offset = %u, freed = %d", | |
+ show_dev_ino(__entry), | |
+ (unsigned int)__entry->nid, | |
+ __entry->ofs, | |
+ __entry->free) | |
+); | |
+ | |
+DECLARE_EVENT_CLASS(f2fs__truncate_op, | |
+ | |
+ TP_PROTO(struct inode *inode, u64 from), | |
+ | |
+ TP_ARGS(inode, from), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(loff_t, size) | |
+ __field(blkcnt_t, blocks) | |
+ __field(u64, from) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = inode->i_sb->s_dev; | |
+ __entry->ino = inode->i_ino; | |
+ __entry->size = inode->i_size; | |
+ __entry->blocks = inode->i_blocks; | |
+ __entry->from = from; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, i_size = %lld, i_blocks = %llu, " | |
+ "start file offset = %llu", | |
+ show_dev_ino(__entry), | |
+ __entry->size, | |
+ (unsigned long long)__entry->blocks, | |
+ (unsigned long long)__entry->from) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_blocks_enter, | |
+ | |
+ TP_PROTO(struct inode *inode, u64 from), | |
+ | |
+ TP_ARGS(inode, from) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_blocks_exit, | |
+ | |
+ TP_PROTO(struct inode *inode, int ret), | |
+ | |
+ TP_ARGS(inode, ret) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_inode_blocks_enter, | |
+ | |
+ TP_PROTO(struct inode *inode, u64 from), | |
+ | |
+ TP_ARGS(inode, from) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_inode_blocks_exit, | |
+ | |
+ TP_PROTO(struct inode *inode, int ret), | |
+ | |
+ TP_ARGS(inode, ret) | |
+); | |
+ | |
+DECLARE_EVENT_CLASS(f2fs__truncate_node, | |
+ | |
+ TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), | |
+ | |
+ TP_ARGS(inode, nid, blk_addr), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(nid_t, nid) | |
+ __field(block_t, blk_addr) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = inode->i_sb->s_dev; | |
+ __entry->ino = inode->i_ino; | |
+ __entry->nid = nid; | |
+ __entry->blk_addr = blk_addr; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, nid = %u, block_address = 0x%llx", | |
+ show_dev_ino(__entry), | |
+ (unsigned int)__entry->nid, | |
+ (unsigned long long)__entry->blk_addr) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_nodes_enter, | |
+ | |
+ TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), | |
+ | |
+ TP_ARGS(inode, nid, blk_addr) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_nodes_exit, | |
+ | |
+ TP_PROTO(struct inode *inode, int ret), | |
+ | |
+ TP_ARGS(inode, ret) | |
+); | |
+ | |
+DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node, | |
+ | |
+ TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), | |
+ | |
+ TP_ARGS(inode, nid, blk_addr) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_truncate_partial_nodes, | |
+ | |
+ TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err), | |
+ | |
+ TP_ARGS(inode, nid, depth, err), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(nid_t, nid[3]) | |
+ __field(int, depth) | |
+ __field(int, err) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = inode->i_sb->s_dev; | |
+ __entry->ino = inode->i_ino; | |
+ __entry->nid[0] = nid[0]; | |
+ __entry->nid[1] = nid[1]; | |
+ __entry->nid[2] = nid[2]; | |
+ __entry->depth = depth; | |
+ __entry->err = err; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, " | |
+ "nid[0] = %u, nid[1] = %u, nid[2] = %u, depth = %d, err = %d", | |
+ show_dev_ino(__entry), | |
+ (unsigned int)__entry->nid[0], | |
+ (unsigned int)__entry->nid[1], | |
+ (unsigned int)__entry->nid[2], | |
+ __entry->depth, | |
+ __entry->err) | |
+); | |
+ | |
+TRACE_EVENT_CONDITION(f2fs_readpage, | |
+ | |
+ TP_PROTO(struct page *page, sector_t blkaddr, int type), | |
+ | |
+ TP_ARGS(page, blkaddr, type), | |
+ | |
+ TP_CONDITION(page->mapping), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(pgoff_t, index) | |
+ __field(sector_t, blkaddr) | |
+ __field(int, type) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = page->mapping->host->i_sb->s_dev; | |
+ __entry->ino = page->mapping->host->i_ino; | |
+ __entry->index = page->index; | |
+ __entry->blkaddr = blkaddr; | |
+ __entry->type = type; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " | |
+ "blkaddr = 0x%llx, bio_type = %s", | |
+ show_dev_ino(__entry), | |
+ (unsigned long)__entry->index, | |
+ (unsigned long long)__entry->blkaddr, | |
+ show_bio_type(__entry->type)) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_get_data_block, | |
+ TP_PROTO(struct inode *inode, sector_t iblock, | |
+ struct buffer_head *bh, int ret), | |
+ | |
+ TP_ARGS(inode, iblock, bh, ret), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(sector_t, iblock) | |
+ __field(sector_t, bh_start) | |
+ __field(size_t, bh_size) | |
+ __field(int, ret) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = inode->i_sb->s_dev; | |
+ __entry->ino = inode->i_ino; | |
+ __entry->iblock = iblock; | |
+ __entry->bh_start = bh->b_blocknr; | |
+ __entry->bh_size = bh->b_size; | |
+ __entry->ret = ret; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " | |
+ "start blkaddr = 0x%llx, len = 0x%llx bytes, err = %d", | |
+ show_dev_ino(__entry), | |
+ (unsigned long long)__entry->iblock, | |
+ (unsigned long long)__entry->bh_start, | |
+ (unsigned long long)__entry->bh_size, | |
+ __entry->ret) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_get_victim, | |
+ | |
+ TP_PROTO(struct super_block *sb, int type, int gc_type, | |
+ struct victim_sel_policy *p, unsigned int pre_victim, | |
+ unsigned int prefree, unsigned int free), | |
+ | |
+ TP_ARGS(sb, type, gc_type, p, pre_victim, prefree, free), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(int, type) | |
+ __field(int, gc_type) | |
+ __field(int, alloc_mode) | |
+ __field(int, gc_mode) | |
+ __field(unsigned int, victim) | |
+ __field(unsigned int, ofs_unit) | |
+ __field(unsigned int, pre_victim) | |
+ __field(unsigned int, prefree) | |
+ __field(unsigned int, free) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = sb->s_dev; | |
+ __entry->type = type; | |
+ __entry->gc_type = gc_type; | |
+ __entry->alloc_mode = p->alloc_mode; | |
+ __entry->gc_mode = p->gc_mode; | |
+ __entry->victim = p->min_segno; | |
+ __entry->ofs_unit = p->ofs_unit; | |
+ __entry->pre_victim = pre_victim; | |
+ __entry->prefree = prefree; | |
+ __entry->free = free; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u " | |
+ "ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u", | |
+ show_dev(__entry), | |
+ show_data_type(__entry->type), | |
+ show_gc_type(__entry->gc_type), | |
+ show_alloc_mode(__entry->alloc_mode), | |
+ show_victim_policy(__entry->gc_mode), | |
+ __entry->victim, | |
+ __entry->ofs_unit, | |
+ (int)__entry->pre_victim, | |
+ __entry->prefree, | |
+ __entry->free) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_fallocate, | |
+ | |
+ TP_PROTO(struct inode *inode, int mode, | |
+ loff_t offset, loff_t len, int ret), | |
+ | |
+ TP_ARGS(inode, mode, offset, len, ret), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(int, mode) | |
+ __field(loff_t, offset) | |
+ __field(loff_t, len) | |
+ __field(loff_t, size) | |
+ __field(blkcnt_t, blocks) | |
+ __field(int, ret) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = inode->i_sb->s_dev; | |
+ __entry->ino = inode->i_ino; | |
+ __entry->mode = mode; | |
+ __entry->offset = offset; | |
+ __entry->len = len; | |
+ __entry->size = inode->i_size; | |
+ __entry->blocks = inode->i_blocks; | |
+ __entry->ret = ret; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, mode = %x, offset = %lld, " | |
+ "len = %lld, i_size = %lld, i_blocks = %llu, ret = %d", | |
+ show_dev_ino(__entry), | |
+ __entry->mode, | |
+ (unsigned long long)__entry->offset, | |
+ (unsigned long long)__entry->len, | |
+ (unsigned long long)__entry->size, | |
+ (unsigned long long)__entry->blocks, | |
+ __entry->ret) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_reserve_new_block, | |
+ | |
+ TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), | |
+ | |
+ TP_ARGS(inode, nid, ofs_in_node), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(nid_t, nid) | |
+ __field(unsigned int, ofs_in_node) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = inode->i_sb->s_dev; | |
+ __entry->nid = nid; | |
+ __entry->ofs_in_node = ofs_in_node; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u", | |
+ show_dev(__entry), | |
+ (unsigned int)__entry->nid, | |
+ __entry->ofs_in_node) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_do_submit_bio, | |
+ | |
+ TP_PROTO(struct super_block *sb, int btype, bool sync, struct bio *bio), | |
+ | |
+ TP_ARGS(sb, btype, sync, bio), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(int, btype) | |
+ __field(bool, sync) | |
+ __field(sector_t, sector) | |
+ __field(unsigned int, size) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = sb->s_dev; | |
+ __entry->btype = btype; | |
+ __entry->sync = sync; | |
+ __entry->sector = bio->bi_sector; | |
+ __entry->size = bio->bi_size; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u", | |
+ show_dev(__entry), | |
+ show_block_type(__entry->btype), | |
+ __entry->sync ? "sync" : "no sync", | |
+ (unsigned long long)__entry->sector, | |
+ __entry->size) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_submit_write_page, | |
+ | |
+ TP_PROTO(struct page *page, block_t blk_addr, int type), | |
+ | |
+ TP_ARGS(page, blk_addr, type), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(ino_t, ino) | |
+ __field(int, type) | |
+ __field(pgoff_t, index) | |
+ __field(block_t, block) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = page->mapping->host->i_sb->s_dev; | |
+ __entry->ino = page->mapping->host->i_ino; | |
+ __entry->type = type; | |
+ __entry->index = page->index; | |
+ __entry->block = blk_addr; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), ino = %lu, %s, index = %lu, blkaddr = 0x%llx", | |
+ show_dev_ino(__entry), | |
+ show_block_type(__entry->type), | |
+ (unsigned long)__entry->index, | |
+ (unsigned long long)__entry->block) | |
+); | |
+ | |
+TRACE_EVENT(f2fs_write_checkpoint, | |
+ | |
+ TP_PROTO(struct super_block *sb, bool is_umount, char *msg), | |
+ | |
+ TP_ARGS(sb, is_umount, msg), | |
+ | |
+ TP_STRUCT__entry( | |
+ __field(dev_t, dev) | |
+ __field(bool, is_umount) | |
+ __field(char *, msg) | |
+ ), | |
+ | |
+ TP_fast_assign( | |
+ __entry->dev = sb->s_dev; | |
+ __entry->is_umount = is_umount; | |
+ __entry->msg = msg; | |
+ ), | |
+ | |
+ TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", | |
+ show_dev(__entry), | |
+ __entry->is_umount ? "clean umount" : "consistency", | |
+ __entry->msg) | |
+); | |
+ | |
+#endif /* _TRACE_F2FS_H */ | |
+ | |
+ /* This part must be outside protection */ | |
+#include <trace/define_trace.h> | |
diff --git a/security/security.c b/security/security.c | |
index 420198e..0dc0009 100644 | |
--- a/security/security.c | |
+++ b/security/security.c | |
@@ -18,6 +18,8 @@ | |
#include <linux/security.h> | |
#include <linux/ima.h> | |
+#define MAX_LSM_XATTR 1 | |
+ | |
/* Boot-time LSM user choice */ | |
static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = | |
CONFIG_DEFAULT_SECURITY; | |
@@ -369,6 +371,37 @@ int security_inode_init_security(struct inode *inode, struct inode *dir, | |
} | |
EXPORT_SYMBOL(security_inode_init_security); | |
+int security_new_inode_init_security(struct inode *inode, struct inode *dir, | |
+ const struct qstr *qstr, | |
+ const initxattrs initxattrs, void *fs_data) | |
+{ | |
+ struct xattr new_xattrs[MAX_LSM_XATTR + 1]; | |
+ struct xattr *lsm_xattr; | |
+ int ret; | |
+ | |
+ if (unlikely(IS_PRIVATE(inode))) | |
+ return -EOPNOTSUPP; | |
+ | |
+ memset(new_xattrs, 0, sizeof new_xattrs); | |
+ if (!initxattrs) | |
+ return security_ops->inode_init_security(inode, dir, qstr, | |
+ NULL, NULL, NULL); | |
+ lsm_xattr = new_xattrs; | |
+ ret = security_ops->inode_init_security(inode, dir, qstr, | |
+ &lsm_xattr->name, | |
+ &lsm_xattr->value, | |
+ &lsm_xattr->value_len); | |
+ if (ret) | |
+ goto out; | |
+ ret = initxattrs(inode, new_xattrs, fs_data); | |
+out: | |
+ kfree(lsm_xattr->name); | |
+ kfree(lsm_xattr->value); | |
+ | |
+ return (ret == -EOPNOTSUPP) ? 0 : ret; | |
+} | |
+EXPORT_SYMBOL(security_new_inode_init_security); | |
+ | |
#ifdef CONFIG_SECURITY_PATH | |
int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, | |
unsigned int dev) | |
-- | |
1.7.9.5 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment