%(table)s%(legend)s """ _styles = """ table.diff {font-family:Courier; border:medium;} .diff_header {background-color:#e0e0e0} td.diff_header {text-align:right} .diff_next {background-color:#c0c0c0} .diff_add {background-color:#aaffaa} .diff_chg {background-color:#ffff77} .diff_sub {background-color:#ffaaaa}""" _table_template = """ %(header_row)s %(data_rows)s
""" _legend = """
Legends
Colors
 Added 
Changed
Deleted
Links
(f)irst change
(n)ext change
(t)op
""" class HtmlDiff(object): """For producing HTML side by side comparison with change highlights. This class can be used to create an HTML table (or a complete HTML file containing the table) showing a side by side, line by line comparison of text with inter-line and intra-line change highlights. The table can be generated in either full or contextual difference mode. The following methods are provided for HTML generation: make_table -- generates HTML for a single side by side table make_file -- generates complete HTML file with a single side by side table See tools/scripts/diff.py for an example usage of this class. """ _file_template = _file_template _styles = _styles _table_template = _table_template _legend = _legend _default_prefix = 0 def __init__(self,tabsize=8,wrapcolumn=None,linejunk=None, charjunk=IS_CHARACTER_JUNK): """HtmlDiff instance initializer Arguments: tabsize -- tab stop spacing, defaults to 8. wrapcolumn -- column number where lines are broken and wrapped, defaults to None where lines are not wrapped. linejunk,charjunk -- keyword arguments passed into ndiff() (used by HtmlDiff() to generate the side by side HTML differences). See ndiff() documentation for argument default values and descriptions. """ self._tabsize = tabsize self._wrapcolumn = wrapcolumn self._linejunk = linejunk self._charjunk = charjunk def make_file(self, fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5, *, charset='utf-8'): """Returns HTML file of side by side comparison with change highlights Arguments: fromlines -- list of "from" lines tolines -- list of "to" lines fromdesc -- "from" file column header string todesc -- "to" file column header string context -- set to True for contextual differences (defaults to False which shows full differences). numlines -- number of context lines. When context is set True, controls number of lines displayed before and after the change. When context is False, controls the number of lines to place the "next" link anchors before the next change (so click of "next" link jumps to just before the change). charset -- charset of the HTML document """ return (self._file_template % dict( styles=self._styles, legend=self._legend, table=self.make_table(fromlines, tolines, fromdesc, todesc, context=context, numlines=numlines), charset=charset )).encode(charset, 'xmlcharrefreplace').decode(charset) def _tab_newline_replace(self,fromlines,tolines): """Returns from/to line lists with tabs expanded and newlines removed. Instead of tab characters being replaced by the number of spaces needed to fill in to the next tab stop, this function will fill the space with tab characters. This is done so that the difference algorithms can identify changes in a file when tabs are replaced by spaces and vice versa. At the end of the HTML generation, the tab characters will be replaced with a nonbreakable space. """ def expand_tabs(line): # hide real spaces line = line.replace(' ','\0') # expand tabs into spaces line = line.expandtabs(self._tabsize) # replace spaces from expanded tabs back into tab characters # (we'll replace them with markup after we do differencing) line = line.replace(' ','\t') return line.replace('\0',' ').rstrip('\n') fromlines = [expand_tabs(line) for line in fromlines] tolines = [expand_tabs(line) for line in tolines] return fromlines,tolines def _split_line(self,data_list,line_num,text): """Builds list of text lines by splitting text lines at wrap point This function will determine if the input text line needs to be wrapped (split) into separate lines. If so, the first wrap point will be determined and the first line appended to the output text line list. This function is used recursively to handle the second part of the split line to further split it. """ # if blank line or context separator, just add it to the output list if not line_num: data_list.append((line_num,text)) return # if line text doesn't need wrapping, just add it to the output list size = len(text) max = self._wrapcolumn if (size <= max) or ((size -(text.count('\0')*3)) <= max): data_list.append((line_num,text)) return # scan text looking for the wrap point, keeping track if the wrap # point is inside markers i = 0 n = 0 mark = '' while n < max and i < size: if text[i] == '\0': i += 1 mark = text[i] i += 1 elif text[i] == '\1': i += 1 mark = '' else: i += 1 n += 1 # wrap point is inside text, break it up into separate lines line1 = text[:i] line2 = text[i:] # if wrap point is inside markers, place end marker at end of first # line and start marker at beginning of second line because each # line will have its own table tag markup around it. if mark: line1 = line1 + '\1' line2 = '\0' + mark + line2 # tack on first line onto the output list data_list.append((line_num,line1)) # use this routine again to wrap the remaining text self._split_line(data_list,'>',line2) def _line_wrapper(self,diffs): """Returns iterator that splits (wraps) mdiff text lines""" # pull from/to data and flags from mdiff iterator for fromdata,todata,flag in diffs: # check for context separators and pass them through if flag is None: yield fromdata,todata,flag continue (fromline,fromtext),(toline,totext) = fromdata,todata # for each from/to line split it at the wrap column to form # list of text lines. fromlist,tolist = [],[] self._split_line(fromlist,fromline,fromtext) self._split_line(tolist,toline,totext) # yield from/to line in pairs inserting blank lines as # necessary when one side has more wrapped lines while fromlist or tolist: if fromlist: fromdata = fromlist.pop(0) else: fromdata = ('',' ') if tolist: todata = tolist.pop(0) else: todata = ('',' ') yield fromdata,todata,flag def _collect_lines(self,diffs): """Collects mdiff output into separate lists Before storing the mdiff from/to data into a list, it is converted into a single line of text with HTML markup. """ fromlist,tolist,flaglist = [],[],[] # pull from/to data and flags from mdiff style iterator for fromdata,todata,flag in diffs: try: # store HTML markup of the lines into the lists fromlist.append(self._format_line(0,flag,*fromdata)) tolist.append(self._format_line(1,flag,*todata)) except TypeError: # exceptions occur for lines where context separators go fromlist.append(None) tolist.append(None) flaglist.append(flag) return fromlist,tolist,flaglist def _format_line(self,side,flag,linenum,text): """Returns HTML markup of "from" / "to" text lines side -- 0 or 1 indicating "from" or "to" text flag -- indicates if difference on line linenum -- line number (used for line number column) text -- line text to be marked up """ try: linenum = '%d' % linenum id = ' id="%s%s"' % (self._prefix[side],linenum) except TypeError: # handle blank lines where linenum is '>' or '' id = '' # replace those things that would get confused with HTML symbols text=text.replace("&","&").replace(">",">").replace("<","<") # make space non-breakable so they don't get compressed or line wrapped text = text.replace(' ',' ').rstrip() return '%s%s' \ % (id,linenum,text) def _make_prefix(self): """Create unique anchor prefixes""" # Generate a unique anchor prefix so multiple tables # can exist on the same HTML page without conflicts. fromprefix = "from%d_" % HtmlDiff._default_prefix toprefix = "to%d_" % HtmlDiff._default_prefix HtmlDiff._default_prefix += 1 # store prefixes so line format method has access self._prefix = [fromprefix,toprefix] def _convert_flags(self,fromlist,tolist,flaglist,context,numlines): """Makes list of "next" links""" # all anchor names will be generated using the unique "to" prefix toprefix = self._prefix[1] # process change flags, generating middle column of next anchors/links next_id = ['']*len(flaglist) next_href = ['']*len(flaglist) num_chg, in_change = 0, False last = 0 for i,flag in enumerate(flaglist): if flag: if not in_change: in_change = True last = i # at the beginning of a change, drop an anchor a few lines # (the context lines) before the change for the previous # link i = max([0,i-numlines]) next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix,num_chg) # at the beginning of a change, drop a link to the next # change num_chg += 1 next_href[last] = 'n' % ( toprefix,num_chg) else: in_change = False # check for cases where there is no content to avoid exceptions if not flaglist: flaglist = [False] next_id = [''] next_href = [''] last = 0 if context: fromlist = [' No Differences Found '] tolist = fromlist else: fromlist = tolist = [' Empty File '] # if not a change on first line, drop a link if not flaglist[0]: next_href[0] = 'f' % toprefix # redo the last link to link to the top next_href[last] = 't' % (toprefix) return fromlist,tolist,flaglist,next_href,next_id def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False, numlines=5): """Returns HTML table of side by side comparison with change highlights Arguments: fromlines -- list of "from" lines tolines -- list of "to" lines fromdesc -- "from" file column header string todesc -- "to" file column header string context -- set to True for contextual differences (defaults to False which shows full differences). numlines -- number of context lines. When context is set True, controls number of lines displayed before and after the change. When context is False, controls the number of lines to place the "next" link anchors before the next change (so click of "next" link jumps to just before the change). """ # make unique anchor prefixes so that multiple tables may exist # on the same page without conflict. self._make_prefix() # change tabs to spaces before it gets more difficult after we insert # markup fromlines,tolines = self._tab_newline_replace(fromlines,tolines) # create diffs iterator which generates side by side from/to data if context: context_lines = numlines else: context_lines = None diffs = _mdiff(fromlines,tolines,context_lines,linejunk=self._linejunk, charjunk=self._charjunk) # set up iterator to wrap lines that exceed desired width if self._wrapcolumn: diffs = self._line_wrapper(diffs) # collect up from/to lines and flags into lists (also format the lines) fromlist,tolist,flaglist = self._collect_lines(diffs) # process change flags, generating middle column of next anchors/links fromlist,tolist,flaglist,next_href,next_id = self._convert_flags( fromlist,tolist,flaglist,context,numlines) s = [] fmt = ' %s%s' + \ '%s%s\n' for i in range(len(flaglist)): if flaglist[i] is None: # mdiff yields None on separator lines skip the bogus ones # generated for the first line if i > 0: s.append(' \n \n') else: s.append( fmt % (next_id[i],next_href[i],fromlist[i], next_href[i],tolist[i])) if fromdesc or todesc: header_row = '%s%s%s%s' % ( '
', '%s' % fromdesc, '
', '%s' % todesc) else: header_row = '' table = self._table_template % dict( data_rows=''.join(s), header_row=header_row, prefix=self._prefix[1]) return table.replace('\0+',''). \ replace('\0-',''). \ replace('\0^',''). \ replace('\1',''). \ replace('\t',' ') del re def restore(delta, which): r""" Generate one of the two sequences that generated a delta. Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract lines originating from file 1 or 2 (parameter `which`), stripping off line prefixes. Examples: >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(keepends=True), ... 'ore\ntree\nemu\n'.splitlines(keepends=True)) >>> diff = list(diff) >>> print(''.join(restore(diff, 1)), end="") one two three >>> print(''.join(restore(diff, 2)), end="") ore tree emu """ try: tag = {1: "- ", 2: "+ "}[int(which)] except KeyError: raise ValueError('unknown delta choice (must be 1 or 2): %r' % which) from None prefixes = (" ", tag) for line in delta: if line[:2] in prefixes: yield line[2:] def _test(): import doctest, difflib return doctest.testmod(difflib) if __name__ == "__main__": _test()