Bladeren bron

Completed versioning backup prototype

TC pushbot 5 4 jaren geleden
bovenliggende
commit
9d9db1ff40
2 gewijzigde bestanden met toevoegingen van 182 en 29 verwijderingen
  1. 64 0
      mod/disk/hybridBackup/linker.go
  2. 118 29
      mod/disk/hybridBackup/versionBackup.go

+ 64 - 0
mod/disk/hybridBackup/linker.go

@@ -0,0 +1,64 @@
+package hybridBackup
+
+import (
+	"encoding/json"
+	"io/ioutil"
+	"path/filepath"
+)
+
+/*
+	Linker.go
+
+	This script handle the linking file operations
+
+*/
+
+type LinkFileMap struct {
+	UnchangedFile map[string]string
+	DeletedFiles  map[string]string
+}
+
+//Generate and write link file to disk
+func generateLinkFile(snapshotFolder string, lf LinkFileMap) error {
+	js, err := json.MarshalIndent(lf, "", "\t")
+	if err != nil {
+		return err
+	}
+
+	return ioutil.WriteFile(filepath.Join(snapshotFolder, "snapshot.datalink"), js, 0755)
+}
+
+//Read link file and parse it into link file map
+func readLinkFile(snapshotFolder string) (*LinkFileMap, error) {
+	result := LinkFileMap{
+		UnchangedFile: map[string]string{},
+		DeletedFiles:  map[string]string{},
+	}
+
+	//Check if the link file exists
+	expectedLinkFilePath := filepath.Join(snapshotFolder, "snapshot.datalink")
+	if fileExists(expectedLinkFilePath) {
+		//Read the content of the link file
+		content, err := ioutil.ReadFile(expectedLinkFilePath)
+		if err == nil {
+			//No error. Read and parse the content
+			lfContent := LinkFileMap{}
+			err := json.Unmarshal(content, &lfContent)
+			if err == nil {
+				return &lfContent, nil
+			}
+		}
+	}
+
+	return &result, nil
+}
+
+//Check if a file exists in a linkFileMap. return boolean and its linked to snapshot name
+func (lfm *LinkFileMap) fileExists(fileRelPath string) (bool, string) {
+	val, ok := lfm.UnchangedFile[filepath.ToSlash(fileRelPath)]
+	if !ok {
+		return false, ""
+	} else {
+		return true, val
+	}
+}

+ 118 - 29
mod/disk/hybridBackup/versionBackup.go

@@ -2,7 +2,6 @@ package hybridBackup
 
 import (
 	"errors"
-	"io/ioutil"
 	"log"
 	"os"
 	"path/filepath"
@@ -38,15 +37,22 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 	}
 
 	todayFolderName := time.Now().Format("2006-01-02")
-	previousSnapshotName, _ := getPreviousSnapshotName(backupConfig, todayFolderName)
+	previousSnapshotExists := true
+	previousSnapshotName, err := getPreviousSnapshotName(backupConfig, todayFolderName)
+	if err != nil {
+		previousSnapshotExists = false
+	}
 	snapshotLocation := filepath.Join(backupConfig.DiskPath, "/version/", todayFolderName)
 	previousSnapshotLocation := filepath.Join(backupConfig.DiskPath, "/version/", previousSnapshotName)
 
+	//Create today folder if not exist
 	if !fileExists(snapshotLocation) {
-		//Create today folder if not exist
 		os.MkdirAll(snapshotLocation, 0755)
 	}
 
+	//Read the previous snapshot datalink into a LinkFileMap and use binary search for higher performance
+	previousSnapshotMap, _ := readLinkFile(previousSnapshotLocation)
+
 	/*
 		Run a three pass compare logic between
 		1. source disk and new backup disk to check any new / modified files (created today)
@@ -54,6 +60,8 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 		3. file in today backup disk no longer in the current source disk (created today, deleted today)
 	*/
 	copiedFileList := []string{}
+	linkedFileList := map[string]string{}
+	deletedFileList := map[string]string{}
 
 	//First pass: Check if there are any updated file from source and backup it to backup drive
 	fastWalk(parentRootAbs, func(filename string) error {
@@ -74,39 +82,57 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 		yesterdayBackupLocation := filepath.Join(previousSnapshotLocation, relPath)
 
 		//Check if the file exists
-		if !fileExists(fileBackupLocation) && !fileExists(yesterdayBackupLocation) {
-			//File not exists in both current source and yesterday one. Copy it to the target location
-			if !isDir(fileBackupLocation) && fileExists(fileBackupLocation+".deleted") {
-				os.Remove(fileBackupLocation + ".deleted")
-			}
+		if !fileExists(yesterdayBackupLocation) {
+			//This file not in last snapshot location.
+			//Check if it is in previous snapshot map
+			fileFoundInSnapshotLinkFile, nameOfSnapshot := previousSnapshotMap.fileExists(relPath)
+			if fileFoundInSnapshotLinkFile {
+				//File found in the snapshot link file. Compare the one in snapshot
+				linkedSnapshotLocation := filepath.Join(backupConfig.DiskPath, "/version/", nameOfSnapshot)
+				linkedSnapshotOriginalFile := filepath.Join(linkedSnapshotLocation, relPath)
+				if fileExists(linkedSnapshotOriginalFile) {
+					//Linked file exists. Compare hash
+					fileHashMatch, err := fileHashIdentical(fileAbs, linkedSnapshotOriginalFile)
+					if err != nil {
+						return nil
+					}
 
-			if !fileExists(filepath.Dir(fileBackupLocation)) {
-				os.MkdirAll(filepath.Dir(fileBackupLocation), 0755)
-			}
+					if fileHashMatch {
+						//append this record to this snapshot linkdata file
+						linkedFileList[relPath] = nameOfSnapshot
+					} else {
+						//File hash mismatch. Do file copy to renew data
+						copyFileToBackupLocation(filename, fileBackupLocation)
+						copiedFileList = append(copiedFileList, fileBackupLocation)
+					}
+				} else {
+					//Invalid snapshot linkage. Assume new and do copy
+					log.Println("[HybridBackup] Link lost. Cloning source file to snapshot.")
+					copyFileToBackupLocation(filename, fileBackupLocation)
+					copiedFileList = append(copiedFileList, fileBackupLocation)
+				}
 
-			err = BufferedLargeFileCopy(filename, fileBackupLocation, 4096)
-			if err != nil {
-				log.Println("[HybridBackup] Failed to copy file: ", filepath.Base(filename)+". "+err.Error())
+			} else {
+				//This file is not in snapshot link file.
+				//This is new file. Copy it to backup
+				copyFileToBackupLocation(filename, fileBackupLocation)
+				copiedFileList = append(copiedFileList, fileBackupLocation)
 			}
 
-			copiedFileList = append(copiedFileList, fileBackupLocation)
-
 		} else if fileExists(yesterdayBackupLocation) {
 			//The file exists in the last snapshot
 			//Check if their hash is the same. If no, update it
-			srcHash, err := getFileHash(fileAbs)
+			fileHashMatch, err := fileHashIdentical(fileAbs, yesterdayBackupLocation)
 			if err != nil {
-				log.Println("[HybridBackup] Hash calculation failed for file "+filepath.Base(fileAbs), err.Error(), " Skipping.")
-				return nil
-			}
-			targetHash, err := getFileHash(yesterdayBackupLocation)
-			if err != nil {
-				log.Println("[HybridBackup] Hash calculation failed for file "+filepath.Base(fileBackupLocation), err.Error(), " Skipping.")
 				return nil
 			}
 
-			if srcHash != targetHash {
+			if !fileHashMatch {
 				//Hash mismatch. Overwrite the file
+				if !fileExists(filepath.Dir(fileBackupLocation)) {
+					os.MkdirAll(filepath.Dir(fileBackupLocation), 0755)
+				}
+
 				err = BufferedLargeFileCopy(filename, fileBackupLocation, 4096)
 				if err != nil {
 					log.Println("[HybridBackup] Copy Failed for file "+filepath.Base(fileAbs), err.Error(), " Skipping.")
@@ -114,6 +140,9 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 					//No problem. Add this filepath into the list
 					copiedFileList = append(copiedFileList, fileBackupLocation)
 				}
+			} else {
+				//Create a link file for this relative path
+				linkedFileList[relPath] = previousSnapshotName
 			}
 		} else {
 			//Default case
@@ -133,6 +162,10 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 
 				if srcHash != targetHash {
 					//Hash mismatch. Overwrite the file
+					if !fileExists(filepath.Dir(fileBackupLocation)) {
+						os.MkdirAll(filepath.Dir(fileBackupLocation), 0755)
+					}
+
 					err = BufferedLargeFileCopy(filename, fileBackupLocation, 4096)
 					if err != nil {
 						log.Println("[HybridBackup] Copy Failed for file "+filepath.Base(fileAbs), err.Error(), " Skipping.")
@@ -149,8 +182,12 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 
 	//2nd pass: Check if there are anything exists in the previous backup but no longer exists in the source now
 	//For case where the file is backed up in previous snapshot but now the file has been removed
-	if fileExists(previousSnapshotLocation) {
+	if previousSnapshotExists {
 		fastWalk(previousSnapshotLocation, func(filename string) error {
+			if filepath.Base(filename) == "snapshot.datalink" {
+				//System reserved file. Skip this
+				return nil
+			}
 			//Get the target paste location
 			rootAbs, _ := filepath.Abs(previousSnapshotLocation)
 			fileAbs, _ := filepath.Abs(filename)
@@ -160,15 +197,25 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 
 			relPath := strings.ReplaceAll(fileAbs, rootAbs, "")
 			sourcAssumeLocation := filepath.Join(parentRootAbs, relPath)
-			todaySnapshotLocation := filepath.Join(snapshotLocation, relPath)
+			//todaySnapshotLocation := filepath.Join(snapshotLocation, relPath)
 
 			if !fileExists(sourcAssumeLocation) {
 				//File exists in yesterday snapshot but not in the current source
 				//Assume it has been deleted, create a dummy indicator file
-				ioutil.WriteFile(todaySnapshotLocation+".deleted", []byte(""), 0755)
+				//ioutil.WriteFile(todaySnapshotLocation+".deleted", []byte(""), 0755)
+				deletedFileList[relPath] = todayFolderName
 			}
 			return nil
 		})
+
+		//Check for deleting of unchanged file as well
+		for relPath, _ := range previousSnapshotMap.UnchangedFile {
+			sourcAssumeLocation := filepath.Join(parentRootAbs, relPath)
+			if !fileExists(sourcAssumeLocation) {
+				//The source file no longer exists
+				deletedFileList[relPath] = todayFolderName
+			}
+		}
 	}
 
 	//3rd pass: Check if there are anything (except file with .deleted) in today backup drive that didn't exists in the source drive
@@ -179,7 +226,7 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 			return nil
 		}
 
-		if filepath.Ext(filename) == ".deleted" {
+		if filepath.Ext(filename) == ".datalink" {
 			//Deleted file marker. Skip this
 			return nil
 		}
@@ -201,6 +248,16 @@ func executeVersionBackup(backupConfig *BackupTask) (string, error) {
 		return nil
 	})
 
+	//Generate linkfile for this snapshot
+	generateLinkFile(snapshotLocation, LinkFileMap{
+		UnchangedFile: linkedFileList,
+		DeletedFiles:  deletedFileList,
+	})
+
+	if err != nil {
+		return "", err
+	}
+
 	return "", nil
 }
 
@@ -216,7 +273,7 @@ func getPreviousSnapshotName(backupConfig *BackupTask, currentSnapshotName strin
 	existingSnapshots := []string{}
 	files, _ := filepath.Glob(filepath.ToSlash(filepath.Clean(backupRootAbs)) + "/*")
 	for _, file := range files {
-		if isDir(file) {
+		if isDir(file) && fileExists(filepath.Join(file, "snapshot.datalink")) {
 			existingSnapshots = append(existingSnapshots, filepath.Base(file))
 		}
 	}
@@ -245,3 +302,35 @@ func getPreviousSnapshotName(backupConfig *BackupTask, currentSnapshotName strin
 
 	return previousSnapshotName, nil
 }
+
+func copyFileToBackupLocation(filename string, fileBackupLocation string) error {
+	if !fileExists(filepath.Dir(fileBackupLocation)) {
+		os.MkdirAll(filepath.Dir(fileBackupLocation), 0755)
+	}
+
+	err := BufferedLargeFileCopy(filename, fileBackupLocation, 4096)
+	if err != nil {
+		log.Println("[HybridBackup] Failed to copy file: ", filepath.Base(filename)+". "+err.Error())
+		return err
+	}
+	return nil
+}
+
+func fileHashIdentical(srcFile string, matchingFile string) (bool, error) {
+	srcHash, err := getFileHash(srcFile)
+	if err != nil {
+		log.Println("[HybridBackup] Hash calculation failed for file "+filepath.Base(srcFile), err.Error(), " Skipping.")
+		return false, nil
+	}
+	targetHash, err := getFileHash(matchingFile)
+	if err != nil {
+		log.Println("[HybridBackup] Hash calculation failed for file "+filepath.Base(matchingFile), err.Error(), " Skipping.")
+		return false, nil
+	}
+
+	if srcHash != targetHash {
+		return false, nil
+	} else {
+		return true, nil
+	}
+}