Source code for diffparser.separate_snippets
import re
[docs]def sep_by_snippets(un_separated):
"""
Takes in an unfiltered list of code-lines like this: [@@ -10,5 +10,7@@ line1, line2, ...], and returns it in a
dictionary formatted like this: {'oldCode': [oldLine1, oldLine2], 'newCode': [newLine1, newLine2]}.
This function use the @@ -10,5 +10,7@@ part to distinguish between different code-snippets.
We have information about what lines the alterations occur on: -digit indicated at which line the removal
starts, and +digit indicates where the adding starts. The number after the comma indicates how many lines is
described in the diff. Because this structure occurs between each code-snippet, we use a regex to find these and
split the lines between them, before separating the code as explained above.
Parameters
----------
un_separated : list
A list containing lines of code.
Returns
-------
sep_dict : dict
A dictionary containing separated code.
"""
# Define some variables:
old_code = []
new_code = []
current_snippet = -1
sep_dict = {'oldCode': old_code, 'newCode': new_code}
re_line_split = r'(@+ -*\d+,\d* \+\d+,\d* @+)'
# Iterate over each line
for i, line in enumerate(un_separated):
# Check if the line is the beginning of a new code snippet
if re.search(re_line_split, line):
# Increase the snippet counter
current_snippet += 1
# Create new lists for the current iteration
old_code.append([])
new_code.append([])
# Split the line
line_split = re.split(re_line_split, line)
# Store the last value of the split, that's where we find the code snippet.
line = [element for element in line_split if element][-1]
if re.search(re_line_split, line):
continue
# Add the code to the correct snippet
if line[0] == '-':
old_code[current_snippet].append(line[1:])
elif line[0] == '+':
new_code[current_snippet].append(line[1:])
else:
# If the code starts with neither + / -, the code belongs to both snippets
old_code[current_snippet].append(line[1:])
new_code[current_snippet].append(line[1:])
# Return the separated code
return sep_dict