From 13a96baefc0ff5d8262c4bc8c797bee4b157443c Mon Sep 17 00:00:00 2001
From: Frans Kaashoek <kaashoek@Frans-Kaashoeks-MacBook-Pro.local>
Date: Wed, 27 Jul 2011 20:35:46 -0400
Subject: [PATCH] Dirt simple logging Passes usertests and stressfs Seems to
 recover correctly in a number of simple cases

---
 Makefile   |   1 +
 defs.h     |   8 +++
 fs.c       |  12 ++--
 fs.h       |   1 +
 initcode.S |   3 +
 log.c      | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 main.c     |   2 +-
 mkfs.c     |  13 +++--
 param.h    |   2 +
 syscall.c  |  49 ++++++++++------
 syscall.h  |  36 ++++++------
 11 files changed, 244 insertions(+), 47 deletions(-)
 create mode 100644 log.c

diff --git a/Makefile b/Makefile
index 3487aa4..f67c88c 100644
--- a/Makefile
+++ b/Makefile
@@ -26,6 +26,7 @@ OBJS = \
 	uart.o\
 	vectors.o\
 	vm.o\
+	log.o\
 
 # Cross-compiling (e.g., on Mac OS X)
 #TOOLPREFIX = i386-jos-elf-
diff --git a/defs.h b/defs.h
index 8ea46d6..bbe4ae4 100644
--- a/defs.h
+++ b/defs.h
@@ -6,6 +6,7 @@ struct pipe;
 struct proc;
 struct spinlock;
 struct stat;
+struct superblock;
 
 // bio.c
 void            binit(void);
@@ -32,6 +33,7 @@ int             filestat(struct file*, struct stat*);
 int             filewrite(struct file*, char*, int n);
 
 // fs.c
+void            readsb(int dev, struct superblock *sb);
 int             dirlink(struct inode*, char*, uint);
 struct inode*   dirlookup(struct inode*, char*, uint*);
 struct inode*   ialloc(uint, short);
@@ -75,6 +77,12 @@ void            lapicinit(int);
 void            lapicstartap(uchar, uint);
 void            microdelay(int);
 
+// log.c
+void            initlog(void);
+void            log_write(struct buf*);
+void            begin_trans();
+void            commit_trans();
+
 // mp.c
 extern int      ismp;
 int             mpbcpu(void);
diff --git a/fs.c b/fs.c
index 7c6d904..a414b65 100644
--- a/fs.c
+++ b/fs.c
@@ -25,7 +25,7 @@
 static void itrunc(struct inode*);
 
 // Read the super block.
-static void
+void
 readsb(int dev, struct superblock *sb)
 {
   struct buf *bp;
@@ -65,7 +65,7 @@ balloc(uint dev)
       m = 1 << (bi % 8);
       if((bp->data[bi/8] & m) == 0){  // Is block free?
         bp->data[bi/8] |= m;  // Mark block in use on disk.
-        bwrite(bp);
+        log_write(bp);
         brelse(bp);
         return b + bi;
       }
@@ -92,7 +92,7 @@ bfree(int dev, uint b)
   if((bp->data[bi/8] & m) == 0)
     panic("freeing free block");
   bp->data[bi/8] &= ~m;  // Mark block free on disk.
-  bwrite(bp);
+  log_write(bp);
   brelse(bp);
 }
 
@@ -159,7 +159,7 @@ ialloc(uint dev, short type)
     if(dip->type == 0){  // a free inode
       memset(dip, 0, sizeof(*dip));
       dip->type = type;
-      bwrite(bp);   // mark it allocated on the disk
+      log_write(bp);   // mark it allocated on the disk
       brelse(bp);
       return iget(dev, inum);
     }
@@ -183,7 +183,7 @@ iupdate(struct inode *ip)
   dip->nlink = ip->nlink;
   dip->size = ip->size;
   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
-  bwrite(bp);
+  log_write(bp);
   brelse(bp);
 }
 
@@ -339,7 +339,7 @@ bmap(struct inode *ip, uint bn)
     a = (uint*)bp->data;
     if((addr = a[bn]) == 0){
       a[bn] = addr = balloc(ip->dev);
-      bwrite(bp);
+      log_write(bp);
     }
     brelse(bp);
     return addr;
diff --git a/fs.h b/fs.h
index 1e6137b..c9e34bf 100644
--- a/fs.h
+++ b/fs.h
@@ -13,6 +13,7 @@ struct superblock {
   uint size;         // Size of file system image (blocks)
   uint nblocks;      // Number of data blocks
   uint ninodes;      // Number of inodes.
+  uint nlog;         // Number of log blocks
 };
 
 #define NDIRECT 12
diff --git a/initcode.S b/initcode.S
index 41e84f4..d86660a 100644
--- a/initcode.S
+++ b/initcode.S
@@ -3,9 +3,12 @@
 #include "syscall.h"
 #include "traps.h"
 
+
 # exec(init, argv)
 .globl start
 start:
+  movl $SYS_init, %eax
+  int $T_SYSCALL
   pushl $argv
   pushl $init
   pushl $0  // where caller pc would be
diff --git a/log.c b/log.c
new file mode 100644
index 0000000..72a0367
--- /dev/null
+++ b/log.c
@@ -0,0 +1,164 @@
+#include "types.h"
+#include "defs.h"
+#include "param.h"
+#include "mmu.h"
+#include "proc.h"
+#include "x86.h"
+#include "spinlock.h"
+#include "fs.h"
+#include "buf.h"
+
+// Dirt simple "logging" supporting only one transaction.  All file system calls
+// that potentially write a block should be wrapped in begin_trans and commit_trans,
+// so that there is never more than one transaction. This serializes all file system 
+// operations that potentially write, but simplifies recovery (only the last
+// one transaction to recover) and concurrency (don't have to worry about reading a modified
+// block from a transaction that hasn't committed yet).
+
+// The header of the log.  If head == 0, there are no log entries.  All entries till head
+// are committed. sector[] records the home sector for each block in the log 
+// (i.e., physical logging).
+struct logheader {
+  int head;   
+  int sector[LOGSIZE];
+};
+
+struct {
+  struct spinlock lock;
+  int start;
+  int size;
+  int intrans;
+  int dev;
+  struct logheader lh;
+} log;
+
+static void recover_from_log(void);
+
+void
+initlog(void)
+{
+  if (sizeof(struct logheader) >= BSIZE)
+    panic("initlog: too big logheader");
+
+  struct superblock sb;
+  initlock(&log.lock, "log");
+  readsb(ROOTDEV, &sb);
+  log.start = sb.size - sb.nlog;
+  log.size = sb.nlog;
+  log.dev = ROOTDEV;
+  recover_from_log();
+}
+
+// Copy committed blocks from log to their home location
+static void 
+install_trans(void)
+{
+  int tail;
+
+  if (log.lh.head > 0)
+    cprintf("install_trans %d\n", log.lh.head);
+  for (tail = 0; tail < log.lh.head; tail++) {
+    cprintf("put entry %d to disk block %d\n", tail, log.lh.sector[tail]);
+    struct buf *lbuf = bread(log.dev, log.start+tail+1);   // read i'th block from log
+    struct buf *dbuf = bread(log.dev, log.lh.sector[tail]);  // read dst block
+    memmove(dbuf->data, lbuf->data, BSIZE);
+    bwrite(dbuf);
+    brelse(lbuf);
+    brelse(dbuf);
+  }
+}
+
+// Read the log header from disk into the in-memory log header
+static void
+read_head(void)
+{
+  struct buf *buf = bread(log.dev, log.start);
+  struct logheader *lh = (struct logheader *) (buf->data);
+  int i;
+  log.lh.head = lh->head;
+  for (i = 0; i < log.lh.head; i++) {
+    log.lh.sector[i] = lh->sector[i];
+  }
+  brelse(buf);
+  if (log.lh.head > 0)
+    cprintf("read_head: %d\n", log.lh.head);
+}
+
+// Write the in-memory log header to disk, committing log entries till head
+static void
+write_head(void)
+{
+  if (log.lh.head > 0)
+    cprintf("write_head: %d\n", log.lh.head);
+
+  struct buf *buf = bread(log.dev, log.start);
+  struct logheader *hb = (struct logheader *) (buf->data);
+  int i;
+  hb->head = log.lh.head;
+  for (i = 0; i < log.lh.head; i++) {
+    hb->sector[i] = log.lh.sector[i];
+  }
+  bwrite(buf);
+  brelse(buf);
+}
+
+static void
+recover_from_log(void)
+{
+  read_head();      
+  install_trans();  // Install all transactions till head
+  log.lh.head = 0;
+  write_head();     //  Reclaim log
+}
+
+void
+begin_trans(void)
+{
+  acquire(&log.lock);
+  while (log.intrans) {
+    sleep(&log, &log.lock);
+  }
+  log.intrans = 1;
+  release(&log.lock);
+}
+
+void
+commit_trans(void)
+{
+  write_head();        // This causes all blocks till log.head to be commited
+  install_trans();     // Install all the transactions till head
+  log.lh.head = 0; 
+  write_head();        // Reclaim log
+
+  acquire(&log.lock);
+  log.intrans = 0;
+  wakeup(&log);
+  release(&log.lock);
+}
+
+// Write buffer into the log at log.head and record the block number log.lh.entry, but
+// don't write the log header (which would commit the write).
+void
+log_write(struct buf *b)
+{
+  int i;
+
+  if (log.lh.head >= LOGSIZE)
+    panic("too big a transaction");
+  if (!log.intrans)
+    panic("write outside of trans");
+
+  cprintf("log_write: %d %d\n", b->sector, log.lh.head);
+
+  for (i = 0; i < log.lh.head; i++) {
+    if (log.lh.sector[i] == b->sector)   // log absorbtion?
+      break;
+  }
+  log.lh.sector[i] = b->sector;
+  struct buf *lbuf = bread(b->dev, log.start+i+1);
+  memmove(lbuf->data, b->data, BSIZE);
+  bwrite(lbuf);
+  brelse(lbuf);
+  if (i == log.lh.head)
+    log.lh.head++;
+}
diff --git a/main.c b/main.c
index e6d81f3..a27c4ff 100644
--- a/main.c
+++ b/main.c
@@ -20,7 +20,7 @@ main(void)
   lapicinit(mpbcpu());
   seginit();       // set up segments
   kinit();         // initialize memory allocator
-  jmpkstack();       // call mainc() on a properly-allocated stack 
+  jmpkstack();     // call mainc() on a properly-allocated stack 
 }
 
 void
diff --git a/mkfs.c b/mkfs.c
index 20b9649..f015edd 100644
--- a/mkfs.c
+++ b/mkfs.c
@@ -9,8 +9,10 @@
 #include "types.h"
 #include "fs.h"
 #include "stat.h"
+#include "param.h"
 
-int nblocks = 995;
+int nblocks = 985;
+int nlog = LOGSIZE;
 int ninodes = 200;
 int size = 1024;
 
@@ -79,17 +81,18 @@ main(int argc, char *argv[])
   sb.size = xint(size);
   sb.nblocks = xint(nblocks); // so whole disk is size sectors
   sb.ninodes = xint(ninodes);
+  sb.nlog = xint(nlog);
 
   bitblocks = size/(512*8) + 1;
   usedblocks = ninodes / IPB + 3 + bitblocks;
   freeblock = usedblocks;
 
-  printf("used %d (bit %d ninode %zu) free %u total %d\n", usedblocks,
-         bitblocks, ninodes/IPB + 1, freeblock, nblocks+usedblocks);
+  printf("used %d (bit %d ninode %zu) free %u log %u total %d\n", usedblocks,
+         bitblocks, ninodes/IPB + 1, freeblock, nlog, nblocks+usedblocks+nlog);
 
-  assert(nblocks + usedblocks == size);
+  assert(nblocks + usedblocks + nlog == size);
 
-  for(i = 0; i < nblocks + usedblocks; i++)
+  for(i = 0; i < nblocks + usedblocks + nlog; i++)
     wsect(i, zeroes);
 
   memset(buf, 0, sizeof(buf));
diff --git a/param.h b/param.h
index 70f88e8..ab1b9fe 100644
--- a/param.h
+++ b/param.h
@@ -10,3 +10,5 @@
 #define USERTOP  0xA0000 // end of user address space
 #define PHYSTOP  0x1000000 // use phys mem up to here as free pool
 #define MAXARG       32  // max exec arguments
+#define LOGSIZE      10  // size of log
+
diff --git a/syscall.c b/syscall.c
index f6550a1..ce50a59 100644
--- a/syscall.c
+++ b/syscall.c
@@ -98,39 +98,52 @@ extern int sys_wait(void);
 extern int sys_write(void);
 extern int sys_uptime(void);
 
+int
+sys_init(void)
+{
+  initlog();
+  return 0;
+}
+
 static int (*syscalls[])(void) = {
-[SYS_chdir]   sys_chdir,
-[SYS_close]   sys_close,
-[SYS_dup]     sys_dup,
-[SYS_exec]    sys_exec,
-[SYS_exit]    sys_exit,
+[SYS_init]    sys_init,
 [SYS_fork]    sys_fork,
-[SYS_fstat]   sys_fstat,
-[SYS_getpid]  sys_getpid,
-[SYS_kill]    sys_kill,
-[SYS_link]    sys_link,
-[SYS_mkdir]   sys_mkdir,
-[SYS_mknod]   sys_mknod,
-[SYS_open]    sys_open,
+[SYS_exit]    sys_exit,
+[SYS_wait]    sys_wait,
 [SYS_pipe]    sys_pipe,
 [SYS_read]    sys_read,
+[SYS_kill]    sys_kill,
+[SYS_exec]    sys_exec,
+[SYS_fstat]   sys_fstat,
+[SYS_chdir]   sys_chdir,
+[SYS_dup]     sys_dup,
+[SYS_getpid]  sys_getpid,
 [SYS_sbrk]    sys_sbrk,
 [SYS_sleep]   sys_sleep,
-[SYS_unlink]  sys_unlink,
-[SYS_wait]    sys_wait,
-[SYS_write]   sys_write,
 [SYS_uptime]  sys_uptime,
+// File system calls that are run in a transaction:
+[SYS_open]    sys_open,
+[SYS_write]   sys_write,
+[SYS_mknod]   sys_mknod,
+[SYS_unlink]  sys_unlink,
+[SYS_link]    sys_link,
+[SYS_mkdir]   sys_mkdir,
+[SYS_close]   sys_close,
 };
 
 void
 syscall(void)
 {
   int num;
-  
+
   num = proc->tf->eax;
-  if(num >= 0 && num < NELEM(syscalls) && syscalls[num])
+  if(num >= 0 && num < SYS_open && syscalls[num]) {
     proc->tf->eax = syscalls[num]();
-  else {
+  } else if (num >= SYS_open && num < NELEM(syscalls) && syscalls[num]) {
+    begin_trans();
+    proc->tf->eax = syscalls[num]();
+    commit_trans();
+  } else {
     cprintf("%d %s: unknown sys call %d\n",
             proc->pid, proc->name, num);
     proc->tf->eax = -1;
diff --git a/syscall.h b/syscall.h
index 3a0fbca..e9e43a2 100644
--- a/syscall.h
+++ b/syscall.h
@@ -1,22 +1,24 @@
 // System call numbers
+#define SYS_init    0
 #define SYS_fork    1
 #define SYS_exit    2
 #define SYS_wait    3
 #define SYS_pipe    4
-#define SYS_write   5
-#define SYS_read    6
-#define SYS_close   7
-#define SYS_kill    8
-#define SYS_exec    9
-#define SYS_open   10
-#define SYS_mknod  11
-#define SYS_unlink 12
-#define SYS_fstat  13
-#define SYS_link   14
-#define SYS_mkdir  15
-#define SYS_chdir  16
-#define SYS_dup    17
-#define SYS_getpid 18
-#define SYS_sbrk   19
-#define SYS_sleep  20
-#define SYS_uptime 21
+#define SYS_read    5
+#define SYS_kill    6
+#define SYS_exec    7
+#define SYS_fstat   8
+#define SYS_chdir   9
+#define SYS_dup    10
+#define SYS_getpid 11
+#define SYS_sbrk   12
+#define SYS_sleep  13
+#define SYS_uptime 14
+
+#define SYS_open   15
+#define SYS_write  16
+#define SYS_mknod  17
+#define SYS_unlink 18
+#define SYS_link   19
+#define SYS_mkdir  20
+#define SYS_close  21