# PROGRAM File Analysis

keep = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
        'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
        ' ', '-', "'"}

##### MODULE Normaise String #####

def Normalise(s):
    result = ''

    for x in s.lower(): 
    # DO

        if x in keep:
        # THEN
            result = result + x # Add the current character to result
        # ELSE
            # Do not add the current character to result
        # ENDIF;

    # ENDFOR;

    return result

# END Normalise.

##### MODULE Frequency Count #####

def FreqDict(s):
    NewString = Normalise(s)
    words = NewString.split()
    Dict = {}
    for wordindex in words:
    # DO
        
        if wordindex in Dict:
        # THEN
            Dict[wordindex] = Dict[wordindex] + 1
        else:
            Dict[wordindex] = 1
        # ENDIF;
        
    # ENDFOR;

    return Dict

# END Frequency Count.

##### MODULE File Analysis #####

def FileAnalysis(filename):
    script = open(filename,'r').read()
    num_chars = len(script)
    num_lines = script.count("\n")
    num_words = len(script.split())

    FullDict = FreqDict(script)

    ##### Create an Array from the Dictionary and Sort
    DictArray = []
    for k in FullDict:
    # DO
        pair = (FullDict[k], k)
        DictArray.append(pair)
    # ENDFOR;
    DictArray.sort()
    DictArray.reverse()
    
    print("The file '%s' has:" %filename)
    print("    %s characters" %num_chars)
    print("    %s lines" %num_lines)
    print("    %s words" %num_words)
    print(" ")
    print("The top 20 most occurring words are:")
    i = 1 # i is the number of the list item
    for count, word in DictArray[:20]:
    # DO
        print('%3s. %5s %s' % (i, count, word))
        i = i + 1
    # ENDFOR;

# END File Analysis.

######### Main Program #########

FileAnalysis('StarWarsScript.txt')

################################