#!/usr/bin/python

# Copyright 2008-2009 Junior (Frederic) FLEURIAL MONFILS
#
# This file is part of PyDepend.
#
# PyDepend is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# or see <http://www.opensource.org/licenses/gpl-3.0.html>
#
# Contact:
#     Junior FLEURIAL MONFILS <frederic dot fleurialmonfils at cetic dot be>
#     Jean-Christophe DEPREZ <jean-christophe dot deprez at cetic dot be>

import re
import sys
import os
from pprint import pprint

def usage(message=""):
    print >> sys.stderr, """Usage: %s options < infile.dependencies
    
    options:
    -c              show cycles in text form (default: False)
    -d              show cycles in dot form (default: False)
    -q              report only (default: False)
    -r              compute height (default: False)
    -a              compute all cycles (default: False)
    -s              use slow algorithm (default: False)
    --fast          speed up computation (default: False)
    -h, --help      print this help%s""" % (
        os.path.basename(sys.argv[0]),
        message and ("\n\n%s" % message) or ""
    )
    sys.exit(1)

def asLabel(string):
    if string[0] == '"':
        return string[1:-1]
    return string

class Edge(object): # implements __cmp__
    from_ = to_ = None
    weight = 0
    def __init__(self, from_, to_, weight):
        """Create an edge
        @param from_ : Node, source node
        @param to_: Node, destination node
        @param weight: int, weight of this edge
        """
        self.from_ = from_
        self.to_ = to_
        self.weight = weight
        
    def __cmp__(self, edge):
        return self.weight - edge.weight
        
    def __str__(self):
        return "%s -> %s" % (self.from_, self.to_)
    __repr__ = __str__

class Node(object): # implements __cmp__
    name = None
    visited = False   # used for Kosaraju's algorithm and Edmond's algorithm
    lowlink = None    # used for Tarjan's algorithm
    index = None      # used for Tarjan's algorithm
    marked = False    # used for Tarjan's circuit enumeration
    
    def __init__(self, name):
        """Create a Node
        @param name : any
        """
        self.name = name
        
    def __cmp__(self, node):
        """Compares this node with another node.
        """
        if node is self:
            return 0
        return -1
        
    def __str__(self):
        return '"%s"' % self.name
    __repr__ = __str__

def print_dotcycle(cycle, out):
    print >>out, "digraph G {"
    stack = list(cycle)
    cur = first = stack.pop()
    while stack:
        print >>out, "\t", stack[-1], "->", cur, ";"
        cur = stack.pop()
    print >>out, "\t", first, "->", cur, ";"
    print >>out, "}"
    print >>out
    
def print_cycle(cycle, out):
    print >>out, "\n|   ".join(cycle)
    print >>out, "+->", cycle[0]
    print >>out
    
class Adjacency(object):
    """
    """
    def __init__(self):
        self.adjacencies = {}
        
    def copy(self):
        import copy
        return copy.deepcopy(self)
        
    @staticmethod
    def fromFile(fileopen, link=re.compile('"([^"]+)"\s*->\s*"([^"]+)"\s*;'), node=re.compile('"([^"]+)"\s*;')):
        couples = []
        singles = []
        for line in fileopen:
            match = link.search(line)
            if match:
                couples.append(match.groups())
                continue
            match = node.search(line)
            if match:
                singles.append(match.group(1))
        return Adjacency.fromCouples(couples, singles)

    @staticmethod
    def fromCouples(couples, singles=[]):
        adjacency = Adjacency()
        nodes = {}
        for single in singles:
            try:
                node = nodes[single]
            except KeyError:
                node = nodes[single] = Node(single)
            adjacency.add(node)
        for (from_, to_) in couples:
            try:
                source = nodes[from_]
            except KeyError:
                source = nodes[from_] = Node(from_)
            try:
                target = nodes[to_]
            except KeyError:
                target = nodes[to_] = Node(to_)
            adjacency[source] = (target, 1)
        return adjacency
        
    def add(self, node):
        try:
            self.adjacencies[node]
        except KeyError:
            self.adjacencies[node] = []
    
    def __setitem__(self, source, (target, weight)):
        edge = Edge(source, target, weight)
        try:
            self.adjacencies[source].append(edge)
        except KeyError:
            self.adjacencies[source] = [edge]
    addEdge = __setitem__
    
    def __getitem__(self, source):
        return self.adjacencies.get(source, [])
    getAdjacent = __getitem__
    
    def delete(self, source, edge):
        if edge in self[source]:
            self[source].remove(edge)
    
    def reverseEdge(self, edge):
        self[edge.from_].remove[edge]
        self[edge.to_] = (edge.from_, edge.weight)
        
    def reverseGraph(self):
        self.adjacencies = self.getReversedList().adjacencies
        
    def getReversedList(self):
        adjacencies = Adjacency()
        for edges in self.adjacencies.values():
            for edge in edges:
                adjacencies[edge.to_] = (edge.from_, edge.weight)
        return adjacencies
        
    def getAllNodes(self):
        """Return the set of distinct nodes of the graph
        """
        allnodes = self.adjacencies.keys()
        allnodes.extend(
            edge.to_
            for edges in self.adjacencies.values()
                for edge in edges
        )
        return set(allnodes)
    nodes = property(getAllNodes)
    
    def getSourceNodes(self):
        return self.adjacencies.keys()
    sources = property(getSourceNodes)
    
    def getTargetNodes(self):
        return set(edge.to_ for edges in self.adjacencies.values()
                                for edge in edges)
    targets = property(getTargetNodes)
        
    def __iter__(self):
        """Return an iterator on the graph
        """
        return iter(self.adjacencies)
        
    def getAllEdges(self):
        return [
            edge
            for edges in self.adjacencies.values()
                for edge in edges
        ]
    edges = property(getAllEdges)
    
    def getAllRoots(self):
        return self.adjacencies.keys()
    roots = property(getAllRoots)
        
class Tarjan(object):
    """Implements the Tarjan's algorithm to detect cycles in graphs
    
            Input: Graph G = (V, E), Start node v0

        index = 0                       // DFS node number counter 
        S = empty                       // An empty stack of nodes
        forall v in V do                // Start a DFS at each node
           if (v.index is undefined)    // we haven't visited yet
              tarjan(v)

        procedure tarjan(v)
          v.index = index               // Set the depth index for v
          v.lowlink = index
          index = index + 1
          S.push(v)                     // Push v on the stack
          forall (v, v') in E do        // Consider successors of v 
            if (v'.index is undefined)  // Was successor v' visited? 
              tarjan(v')                // Recurse
              v.lowlink = min(v.lowlink, v'.lowlink)
            elseif (v' in S)            // Is v' on the stack?
              v.lowlink = min(v.lowlink, v'.index)
          if (v.lowlink == v.index)     // Is v the root of an SCC?
            print "SCC:"
            repeat
              v' = S.pop
              print v'
            until (v' == v)
            
    http://algowiki.net/wiki/index.php/Tarjan%27s_algorithm
    """
    
    def __init__(self, adjacency):
        self.index = 0
        self.stack = []
        self.components = []
        self.adjacency = adjacency
    
    def run(self):
        self.components = []
        for node in sorted(self.adjacency, key=lambda x: x.name):
            if node.index is None:
                self.tarjan(node)
                
    def tarjan(self, node):
        adjacency = self.adjacency
        tarjan = self.tarjan
        
        node.index = self.index
        node.lowlink = self.index
        self.index += 1
        self.stack.append(node)
                
        for edge in adjacency[node]:
            adjacent = edge.to_
            if (adjacent.index is None):
                tarjan(adjacent)
                node.lowlink = min(node.lowlink, adjacent.lowlink)
            elif adjacent in self.stack:
                node.lowlink = min(node.lowlink, adjacent.index)
                
        if node.lowlink == node.index:
            connected = self.stack.pop()
            component = [connected]
            while connected != node:
                connected = self.stack.pop()
                component.append(connected)
            self.components.append(component)

def brent(f, x0):
    # main phase: search successive powers of two
    power = lam = 1
    tortoise, hare = x0, f(x0) # f(x0) is the element/node next to x0.
    while tortoise != hare:
        if power == lam:   # time to start a new power of two?
            tortoise = hare
            power *= 2
            lam = 0
        hare = f(hare)
        lam += 1
 
    # Find the position of the first repetition of length lambda
    mu = 0
    tortoise = hare = x0
    for i in range(lam):
        # range(lam) produces a list with the values 0, 1, ... , lam-1
        hare = f(hare)
        
    while tortoise != hare:
        tortoise = f(tortoise)
        hare = f(hare)
        mu += 1
 
    return lam, mu
            
def enumerate_cycles(graph, rank=lambda n: n.name):
    """Enumerate the cycles in a given directed graph
    @param graph: should be given as an Adjacency matrix
    """
    cycles = []
    # point stack denotes the elementary path p currently being considered 
    # for a given node s
    point = [] 
    mark = []
    newG = graph.copy()
    getAdjacent = newG.getAdjacent
    deleteEdge = newG.delete
    # ! it is important to sort the list of nodes
    nodes = list(sorted(newG.getAllNodes(), key=rank))
    from pprint import pprint
    def backtrack(s, v):
        f = False
        # place v on point stack
        point.append(v)
        v.marked = True
        # place v on marked stack
        mark.append(v)
        for edge in sorted(getAdjacent(v), key=lambda n: n.to_.name):
            w = edge.to_
            rw = rank(w)
            rs = rank(s)
            if rw < rs:
                # delete w from Adjacent(v)
                deleteEdge(w, edge)
            elif rw == rs:
                # output circuit from s to v to s given by point stack
                cycles.append([node.name for node in point])
                # pprint(point)
                f = True
            elif not w.marked:
                g = backtrack(s, w)
                f = f or g
        # f == true if an elementary circuit continuing the partial path on the
        # stack has been found;
        if f == True:
            while mark[-1] != v:
                u = mark.pop()
                u.marked = False
            t = mark.pop() 
            # assert t == v
            t.marked = False
        point.remove(v)
        return f
    for n in nodes:
        n.marked = False
    for s in nodes:
        # assert point == []
        point = []
        flag = backtrack(s, s)
        while mark:
            u = mark.pop()
            u.marked = False
    return cycles
            
def find_cycles(nodes, getEdges):
    cycles = []
    stack = []
    visited = {}
    todo = list(nodes)
    def find_cycle(node, stack):
        """Perform a depth first search for a cycle
        """
        if node in visited: 
            # already visited
            return
        # add this node to an hypothetic cycle
        stack.append(node) 
        for child in [edge.to_ for edge in getEdges(node)]:
            if child in stack: 
                # a cycle has been found for this child
                cycle = stack[stack.index(child):]
                cycles.append([item.name for item in cycle])
            else:
                # try to get a cycle where the child node is involved
                find_cycle(child, stack)
        # the search for a cycle for this node is done
        stack.pop()
        # mark this node as visited
        visited[node] = 1 
        
    while todo:
        node = todo.pop()
        find_cycle(node, [])
    
    return cycles

def get_height(nodes, getEdges):

    visited = {}

    def dfs(node):
        if node in visited:
            return visited[node]
        height = 0
        edges = getEdges(node)
        if len(edges):
            height = 1 + max(dfs(edge.to_) for edge in getEdges(node))
        visited[node] = height
        return height
    
    height = sum(dfs(node) for node in nodes)
    # import pprint; pprint.pprint(visited)
    return height

        
def uniq(seq, idfun=None): 
    # order preserving
    if idfun is None:
        def idfun(x): return x
    seen = {}
    result = []
    for item in seq:
        print item, "->", 
        marker = idfun(item)
        print marker,
        if marker in seen: 
            print "in seen"
            continue
        print
        seen[marker] = 1
        result.append(item)
    return result

def run2(options):
    if options.c or options.d:
        graph = Adjacency.fromFile(options.i)
        allnodes = set()
        allcycles = []
        occurrences = {}
        cycles = enumerate_cycles(graph)
        for cycle in cycles:
            for node in cycle:
                try:
                    occurrences[node] += 1
                except KeyError:
                    occurrences[node] = 1
            allnodes.update(cycle)
            if not options.q:
                if options.d:
                    print_dotcycle(cycle, options.o)
                else:
                    print_cycle(cycle, options.o)
        allcycles.extend(cycles) 
    
def run(options):
    graph = Adjacency.fromFile(options.i)      
    if options.c or options.d:
        if not options.q:
            print >>options.o, "Cycles::"
        components = []
        allnodes = set()
        allcycles = set()
        occurrences = {}
        if options.a:
            if options.s:
                cycles = enumerate_cycles(graph)
            else:
                cycles = find_cycles(graph.getAllNodes(), graph.getAdjacent)
            for cycle in cycles:
                for node in cycle:
                    try:
                        occurrences[node] += 1
                    except KeyError:
                        occurrences[node] = 1
                allnodes.update(cycle)
                scycle = ";".join(list(sorted(cycle)))
                if scycle not in allcycles:
                    if not options.q:
                        if options.d:
                            print_dotcycle(cycle, options.o)
                        else:
                            print_cycle(cycle, options.o)
                    allcycles.add(scycle)
        else:
            tarjan = Tarjan(graph)
            tarjan.run()
            for component in tarjan.components:
                if len(component) > 1:
                    components.append(component)
                    cgraph = Adjacency()
                    for node in component:
                        for edge in graph[node]:
                            if edge.to_ in component:
                                cgraph[node] = (edge.to_, edge.weight)
                    if options.s:
                        cycles = enumerate_cycles(cgraph)
                    else:
                        cycles = find_cycles(component, cgraph.getAdjacent)
                    for cycle in cycles:
                        for node in cycle:
                            try:
                                occurrences[node] += 1
                            except KeyError:
                                occurrences[node] = 1
                        allnodes.update(cycle)
                        scycle = ";".join(list(sorted(cycle)))
                        if scycle not in allcycles:
                            if not options.q:
                                if options.d:
                                    print_dotcycle(cycle, options.o)
                                else:
                                    print_cycle(cycle, options.o)
                            allcycles.add(scycle)
        if not options.q:
            print >>options.o
            print >>options.o, "Occurrences::"
            print >>options.o, "Name,Occurrence"
            for (node, occurrence) in sorted(occurrences.items(), key=lambda x: x[1], reverse=True):
                print >>options.o, "%s,%s" % (node, occurrence)
            print >>options.o
        print >>options.o, "Summary::"
        print >>options.o, "Nodes:", len(graph.nodes)
        print >>options.o, "Sources:", len(graph.sources)
        print >>options.o, "Edges:", len(graph.edges)
        if not options.a and not options.s:
            print >>options.o, "Strongly connected components:", len(tarjan.components)
            print >>options.o, "Cyclic components:", len(components)
        print >>options.o, "Nodes involved in cycles:", len(allnodes)
        print >>options.o, "Cycles:", len(allcycles)
    elif options.r:
        roots = [node for node in graph.nodes if node not in graph.targets]
        # roots = graph.roots
        height = get_height(roots, graph.getAdjacent)
        print >>options.o, "Nodes:", len(set(node.name for node in graph.nodes))
        print >>options.o, "Roots:", len(roots)
        print >>options.o, "Edges:", len(graph.edges)
        print >>options.o, "Height:", height
    
if __name__ == "__main__":
    import getopt
    class options:
        c=None          # cycle
        d=None          # dotcycle
        r=None          # roots
        s=False
        q=False
        a=False
        fast=False
        i=sys.stdin
        o=sys.stdout
    short = "saqcdrh"
    try:
        opts, args = getopt.getopt(sys.argv[1:], short, ["help", "fast"])
    except getopt.GetoptError, e:
        usage("Error: %s" % e)
    for (opt, arg) in opts:
        if opt == "-h" or opt == "--help":
            usage()
        elif opt == "--fast":
            options.fast = True
        elif opt == "-s":
            options.s = True
        elif opt == "-a":
            options.a = True
        elif opt == "-c":
            options.c = True
        elif opt == "-d":
            options.d = True
        elif opt == "-r":
            options.r = True
        elif opt == "-q":
            options.q = True
        else:
            setattr(options, opt[1:], arg)
    # if not (options.w and options.d and options.c and options.l and options.k):
        # usage("Error: Missing mandatory option -d, -w, -c, -k and/or -l")
    # check if speed up is requested (use pysco if requested)
    if options.fast:
        try:
            import psyco
            psyco.full()
        except ImportError:
            print >>sys.stderr, "Warning: Psyco not found, running at normal speed"
    run(options)
    
