Source code for diffparser.separate_files



[docs]def sep_by_files(diff): """ This function will separate the files in the diffs along with the old / new code snippets. Parameters ---------- diff string String containing the diff to be separated, based on filenames and old/new code snippets. Returns ------- sep_dict dict Nested dictionary containing one dictionary for each filename parsed in the diff. The inner dict contains one entry for the list of old code snippets, and one entry for the list of the new snippets. """ # Get the information about where the files start and stop file_info = get_file_start_indices(diff) # Create empty dictionary sep_dict = {} # Iterate over the information in the file_info dict for key, value in file_info.items(): # Describe the start and stop lines start = value['finalPathLine'] + 1 stop = value['lastLine'] # Add the split diff lines to the sep_dict dictionary sep_dict[key] = diff.splitlines()[start:stop] # Return the new sep_dict dictionary containing the separated diffs. return sep_dict
[docs]def get_file_start_indices(diff): """ Creates a dictionary with entries that describes the positional indices of the files in the input diff. The output should have the following format: {'fileName':{'firstPathLine': int, 'finalPathLine': int, 'proceedingFile': 'nextFileName', 'lastLine': int}} Parameters ---------- diff string Contains the diff to be parsed. Returns ------- file_info dict A dictionary which describes the indices of the files in the diff, plus some additional information. """ # Create a dict with filenames as entries. file_info = {} # {file_name: {} for file_name in list_of_files} # Iterate over each line in the diff for i, line in enumerate(diff.splitlines()): # Check if the line is describing subtraction or addition in a file if '--- a' in line or '+++ b' in line: # Select the current file from the list_of_files, based on the characters of the line. current_file = line[6:] file_info[current_file] = {} # Store the information about where we found this line file_info[current_file]['firstPathLine'] = file_info[current_file].get('firstPathLine', i) file_info[current_file]['finalPathLine'] = i file_info = define_file_indices(file_info, diff) return file_info
[docs]def define_file_indices(file_info, diff): # TODO: Make sure this function works on diffs with multiple changes per file """ Takes a dictionary with entries that describes some of the positional indices of the files in the input diff, and adds more information to it. The output should have the following format: {'fileName':{'firstPathLine': int, 'finalPathLine': int, 'proceedingFile': 'nextFileName', 'lastLine': int}} Parameters ---------- file_info dict A dictionary containing some information about the diff. diff string Contains the diff to be parsed. Returns ------- file_info dict A dictionary which describes the indices of the files in the diff, plus some additional information. """ # Iterate over all except the last entries in the file_info dict for i, key in enumerate(list(file_info.keys())[:-1]): # Store information about what the next file is proceeding = list(file_info.keys())[ i +1] file_info[key]['proceedingFile'] = proceeding # With information about the next line, we can save the index of this snippets final line. file_info[key]['lastLine'] = file_info[proceeding]['firstPathLine'] - 1 # We can also save information about the index of the last line for the final snippet: file_info[list(file_info.keys())[-1]]['lastLine'] = len(diff.splitlines()) - 1 # Return the information gathered return file_info