Browse Source

Completed versioning backup prototype

TC pushbot 5 4 years ago
parent
commit
9d9db1ff40
2 changed files with 182 additions and 29 deletions
  1. 64 0
      mod/disk/hybridBackup/linker.go
  2. 118 29
      mod/disk/hybridBackup/versionBackup.go

+ 64 - 0
mod/disk/hybridBackup/linker.go

@@ -0,0 +1,64 @@
+package hybridBackup
+
+import (
+	"encoding/json"
+	"io/ioutil"
+	"path/filepath"
+)
+
+/*
+	Linker.go
+
+	This script handle the linking file operations
+
+*/
+
+type LinkFileMap struct {
+	UnchangedFile map[string]string
+	DeletedFiles  map[string]string
+}
+
+//Generate and write link file to disk
+func generateLinkFile(snapshotFolder string, lf LinkFileMap) error {
+	js, err := json.MarshalIndent(lf, "", "\t")
+	if err != nil {
+		return err
+	}
+
+	return ioutil.WriteFile(filepath.Join(snapshotFolder, "snapshot.datalink"), js, 0755)
+}
+
+//Read link file and parse it into link file map
+func readLinkFile(snapshotFolder string) (*LinkFileMap, error) {
+	result := LinkFileMap{
+		UnchangedFile: map[string]string{},
+		DeletedFiles:  map[string]string{},
+	}
+
+	//Check if the link file exists
+	expectedLinkFilePath := filepath.Join(snapshotFolder, "snapshot.datalink")
+	if fileExists(expectedLinkFilePath) {
+		//Read the content of the link file
+		content, err := ioutil.ReadFile(expectedLinkFilePath)
+		if err == nil {
+			//No error. Read and parse the content
+			lfContent := LinkFileMap{}
+			err := json.Unmarshal(content, &lfContent)
+			if err == nil {
+				return &lfContent, nil
+			}
+		}
+	}
+
+	return &result, nil
+}
+
+//Check if a file exists in a linkFileMap. return boolean and its linked to snapshot name
+func (lfm *LinkFileMap) fileExists(fileRelPath string) (bool, string) {
+	val, ok := lfm.UnchangedFile[filepath.ToSlash(fileRelPath)]
+	if !ok {
+		return false, ""
+	} else {
+		return true, val
+	}
+}

+ 118 - 29
mod/disk/hybridBackup/versionBackup.go

@@ -2,7 +2,6 @@ package hybridBackup
 
 
 import (
 import (
 	"errors"
 	"errors"
-	"io/ioutil"
 	"log"
 	"log"
 	"os"
 	"os"
 	"path/filepath"
 	"path/filepath"
@@ -38,15 +37,22 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 	}
 	}
 
 
 	todayFolderName := time.Now().Format("2006-01-02")
 	todayFolderName := time.Now().Format("2006-01-02")
-	previousSnapshotName, _ := getPreviousSnapshotName(backupConfig, todayFolderName)
+	previousSnapshotExists := true
+	previousSnapshotName, err := getPreviousSnapshotName(backupConfig, todayFolderName)
+	if err != nil {
+		previousSnapshotExists = false
+	}
 	snapshotLocation := filepath.Join(backupConfig.DiskPath, "/version/", todayFolderName)
 	snapshotLocation := filepath.Join(backupConfig.DiskPath, "/version/", todayFolderName)
 	previousSnapshotLocation := filepath.Join(backupConfig.DiskPath, "/version/", previousSnapshotName)
 	previousSnapshotLocation := filepath.Join(backupConfig.DiskPath, "/version/", previousSnapshotName)
 
 
+	//Create today folder if not exist
 	if !fileExists(snapshotLocation) {
 	if !fileExists(snapshotLocation) {
-		//Create today folder if not exist
 		os.MkdirAll(snapshotLocation, 0755)
 		os.MkdirAll(snapshotLocation, 0755)
 	}
 	}
 
 
+	//Read the previous snapshot datalink into a LinkFileMap and use binary search for higher performance
+	previousSnapshotMap, _ := readLinkFile(previousSnapshotLocation)
+
 	/*
 	/*
 		Run a three pass compare logic between
 		Run a three pass compare logic between
 		1. source disk and new backup disk to check any new / modified files (created today)
 		1. source disk and new backup disk to check any new / modified files (created today)
@@ -54,6 +60,8 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 		3. file in today backup disk no longer in the current source disk (created today, deleted today)
 		3. file in today backup disk no longer in the current source disk (created today, deleted today)
 	*/
 	*/
 	copiedFileList := []string{}
 	copiedFileList := []string{}
+	linkedFileList := map[string]string{}
+	deletedFileList := map[string]string{}
 
 
 	//First pass: Check if there are any updated file from source and backup it to backup drive
 	//First pass: Check if there are any updated file from source and backup it to backup drive
 	fastWalk(parentRootAbs, func(filename string) error {
 	fastWalk(parentRootAbs, func(filename string) error {
@@ -74,39 +82,57 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 		yesterdayBackupLocation := filepath.Join(previousSnapshotLocation, relPath)
 		yesterdayBackupLocation := filepath.Join(previousSnapshotLocation, relPath)
 
 
 		//Check if the file exists
 		//Check if the file exists
-		if !fileExists(fileBackupLocation) && !fileExists(yesterdayBackupLocation) {
-			//File not exists in both current source and yesterday one. Copy it to the target location
-			if !isDir(fileBackupLocation) && fileExists(fileBackupLocation+".deleted") {
-				os.Remove(fileBackupLocation + ".deleted")
-			}
+		if !fileExists(yesterdayBackupLocation) {
+			//This file not in last snapshot location.
+			//Check if it is in previous snapshot map
+			fileFoundInSnapshotLinkFile, nameOfSnapshot := previousSnapshotMap.fileExists(relPath)
+			if fileFoundInSnapshotLinkFile {
+				//File found in the snapshot link file. Compare the one in snapshot
+				linkedSnapshotLocation := filepath.Join(backupConfig.DiskPath, "/version/", nameOfSnapshot)
+				linkedSnapshotOriginalFile := filepath.Join(linkedSnapshotLocation, relPath)
+				if fileExists(linkedSnapshotOriginalFile) {
+					//Linked file exists. Compare hash
+					fileHashMatch, err := fileHashIdentical(fileAbs, linkedSnapshotOriginalFile)
+					if err != nil {
+						return nil
+					}
 
 
-			if !fileExists(filepath.Dir(fileBackupLocation)) {
-				os.MkdirAll(filepath.Dir(fileBackupLocation), 0755)
-			}
+					if fileHashMatch {
+						//append this record to this snapshot linkdata file
+						linkedFileList[relPath] = nameOfSnapshot
+					} else {
+						//File hash mismatch. Do file copy to renew data
+						copyFileToBackupLocation(filename, fileBackupLocation)
+						copiedFileList = append(copiedFileList, fileBackupLocation)
+					}
+				} else {
+					//Invalid snapshot linkage. Assume new and do copy
+					log.Println("[HybridBackup] Link lost. Cloning source file to snapshot.")
+					copyFileToBackupLocation(filename, fileBackupLocation)
+					copiedFileList = append(copiedFileList, fileBackupLocation)
+				}
 
 
-			err = BufferedLargeFileCopy(filename, fileBackupLocation, 4096)
-			if err != nil {
-				log.Println("[HybridBackup] Failed to copy file: ", filepath.Base(filename)+". "+err.Error())
+			} else {
+				//This file is not in snapshot link file.
+				//This is new file. Copy it to backup
+				copyFileToBackupLocation(filename, fileBackupLocation)
+				copiedFileList = append(copiedFileList, fileBackupLocation)
 			}
 			}
 
 
-			copiedFileList = append(copiedFileList, fileBackupLocation)
-
 		} else if fileExists(yesterdayBackupLocation) {
 		} else if fileExists(yesterdayBackupLocation) {
 			//The file exists in the last snapshot
 			//The file exists in the last snapshot
 			//Check if their hash is the same. If no, update it
 			//Check if their hash is the same. If no, update it
-			srcHash, err := getFileHash(fileAbs)
+			fileHashMatch, err := fileHashIdentical(fileAbs, yesterdayBackupLocation)
 			if err != nil {
 			if err != nil {
-				log.Println("[HybridBackup] Hash calculation failed for file "+filepath.Base(fileAbs), err.Error(), " Skipping.")
-				return nil
-			}
-			targetHash, err := getFileHash(yesterdayBackupLocation)
-			if err != nil {
-				log.Println("[HybridBackup] Hash calculation failed for file "+filepath.Base(fileBackupLocation), err.Error(), " Skipping.")
 				return nil
 				return nil
 			}
 			}
 
 
-			if srcHash != targetHash {
+			if !fileHashMatch {
 				//Hash mismatch. Overwrite the file
 				//Hash mismatch. Overwrite the file
+				if !fileExists(filepath.Dir(fileBackupLocation)) {
+					os.MkdirAll(filepath.Dir(fileBackupLocation), 0755)
+				}
+
 				err = BufferedLargeFileCopy(filename, fileBackupLocation, 4096)
 				err = BufferedLargeFileCopy(filename, fileBackupLocation, 4096)
 				if err != nil {
 				if err != nil {
 					log.Println("[HybridBackup] Copy Failed for file "+filepath.Base(fileAbs), err.Error(), " Skipping.")
 					log.Println("[HybridBackup] Copy Failed for file "+filepath.Base(fileAbs), err.Error(), " Skipping.")
@@ -114,6 +140,9 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 					//No problem. Add this filepath into the list
 					//No problem. Add this filepath into the list
 					copiedFileList = append(copiedFileList, fileBackupLocation)
 					copiedFileList = append(copiedFileList, fileBackupLocation)
 				}
 				}
+			} else {
+				//Create a link file for this relative path
+				linkedFileList[relPath] = previousSnapshotName
 			}
 			}
 		} else {
 		} else {
 			//Default case
 			//Default case
@@ -133,6 +162,10 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 
 
 				if srcHash != targetHash {
 				if srcHash != targetHash {
 					//Hash mismatch. Overwrite the file
 					//Hash mismatch. Overwrite the file
+					if !fileExists(filepath.Dir(fileBackupLocation)) {
+						os.MkdirAll(filepath.Dir(fileBackupLocation), 0755)
+					}
+
 					err = BufferedLargeFileCopy(filename, fileBackupLocation, 4096)
 					err = BufferedLargeFileCopy(filename, fileBackupLocation, 4096)
 					if err != nil {
 					if err != nil {
 						log.Println("[HybridBackup] Copy Failed for file "+filepath.Base(fileAbs), err.Error(), " Skipping.")
 						log.Println("[HybridBackup] Copy Failed for file "+filepath.Base(fileAbs), err.Error(), " Skipping.")
@@ -149,8 +182,12 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 
 
 	//2nd pass: Check if there are anything exists in the previous backup but no longer exists in the source now
 	//2nd pass: Check if there are anything exists in the previous backup but no longer exists in the source now
 	//For case where the file is backed up in previous snapshot but now the file has been removed
 	//For case where the file is backed up in previous snapshot but now the file has been removed
-	if fileExists(previousSnapshotLocation) {
+	if previousSnapshotExists {
 		fastWalk(previousSnapshotLocation, func(filename string) error {
 		fastWalk(previousSnapshotLocation, func(filename string) error {
+			if filepath.Base(filename) == "snapshot.datalink" {
+				//System reserved file. Skip this
+				return nil
+			}
 			//Get the target paste location
 			//Get the target paste location
 			rootAbs, _ := filepath.Abs(previousSnapshotLocation)
 			rootAbs, _ := filepath.Abs(previousSnapshotLocation)
 			fileAbs, _ := filepath.Abs(filename)
 			fileAbs, _ := filepath.Abs(filename)
@@ -160,15 +197,25 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 
 
 			relPath := strings.ReplaceAll(fileAbs, rootAbs, "")
 			relPath := strings.ReplaceAll(fileAbs, rootAbs, "")
 			sourcAssumeLocation := filepath.Join(parentRootAbs, relPath)
 			sourcAssumeLocation := filepath.Join(parentRootAbs, relPath)
-			todaySnapshotLocation := filepath.Join(snapshotLocation, relPath)
+			//todaySnapshotLocation := filepath.Join(snapshotLocation, relPath)
 
 
 			if !fileExists(sourcAssumeLocation) {
 			if !fileExists(sourcAssumeLocation) {
 				//File exists in yesterday snapshot but not in the current source
 				//File exists in yesterday snapshot but not in the current source
 				//Assume it has been deleted, create a dummy indicator file
 				//Assume it has been deleted, create a dummy indicator file
-				ioutil.WriteFile(todaySnapshotLocation+".deleted", []byte(""), 0755)
+				//ioutil.WriteFile(todaySnapshotLocation+".deleted", []byte(""), 0755)
+				deletedFileList[relPath] = todayFolderName
 			}
 			}
 			return nil
 			return nil
 		})
 		})
+
+		//Check for deleting of unchanged file as well
+		for relPath, _ := range previousSnapshotMap.UnchangedFile {
+			sourcAssumeLocation := filepath.Join(parentRootAbs, relPath)
+			if !fileExists(sourcAssumeLocation) {
+				//The source file no longer exists
+				deletedFileList[relPath] = todayFolderName
+			}
+		}
 	}
 	}
 
 
 	//3rd pass: Check if there are anything (except file with .deleted) in today backup drive that didn't exists in the source drive
 	//3rd pass: Check if there are anything (except file with .deleted) in today backup drive that didn't exists in the source drive
@@ -179,7 +226,7 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 			return nil
 			return nil
 		}
 		}
 
 
-		if filepath.Ext(filename) == ".deleted" {
+		if filepath.Ext(filename) == ".datalink" {
 			//Deleted file marker. Skip this
 			//Deleted file marker. Skip this
 			return nil
 			return nil
 		}
 		}
@@ -201,6 +248,16 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 		return nil
 		return nil
 	})
 	})
 
 
+	//Generate linkfile for this snapshot
+	generateLinkFile(snapshotLocation, LinkFileMap{
+		UnchangedFile: linkedFileList,
+		DeletedFiles:  deletedFileList,
+	})
+
+	if err != nil {
+		return "", err
+	}
+
 	return "", nil
 	return "", nil
 }
 }
 
 
@@ -216,7 +273,7 @@ func getPreviousSnapshotName(backupConfig *BackupTask, currentSnapshotName strin
 	existingSnapshots := []string{}
 	existingSnapshots := []string{}
 	files, _ := filepath.Glob(filepath.ToSlash(filepath.Clean(backupRootAbs)) + "/*")
 	files, _ := filepath.Glob(filepath.ToSlash(filepath.Clean(backupRootAbs)) + "/*")
 	for _, file := range files {
 	for _, file := range files {
-		if isDir(file) {
+		if isDir(file) && fileExists(filepath.Join(file, "snapshot.datalink")) {
 			existingSnapshots = append(existingSnapshots, filepath.Base(file))
 			existingSnapshots = append(existingSnapshots, filepath.Base(file))
 		}
 		}
 	}
 	}
@@ -245,3 +302,35 @@ func getPreviousSnapshotName(backupConfig *BackupTask, currentSnapshotName strin
 
 
 	return previousSnapshotName, nil
 	return previousSnapshotName, nil
 }
 }
+
+func copyFileToBackupLocation(filename string, fileBackupLocation string) error {
+	if !fileExists(filepath.Dir(fileBackupLocation)) {
+		os.MkdirAll(filepath.Dir(fileBackupLocation), 0755)
+	}
+
+	err := BufferedLargeFileCopy(filename, fileBackupLocation, 4096)
+	if err != nil {
+		log.Println("[HybridBackup] Failed to copy file: ", filepath.Base(filename)+". "+err.Error())
+		return err
+	}
+	return nil
+}
+
+func fileHashIdentical(srcFile string, matchingFile string) (bool, error) {
+	srcHash, err := getFileHash(srcFile)
+	if err != nil {
+		log.Println("[HybridBackup] Hash calculation failed for file "+filepath.Base(srcFile), err.Error(), " Skipping.")
+		return false, nil
+	}
+	targetHash, err := getFileHash(matchingFile)
+	if err != nil {
+		log.Println("[HybridBackup] Hash calculation failed for file "+filepath.Base(matchingFile), err.Error(), " Skipping.")
+		return false, nil
+	}
+
+	if srcHash != targetHash {
+		return false, nil
+	} else {
+		return true, nil
+	}
+}