# -*- coding: iso-8859-1 -*-

import config.lang
import re

def nonEmpty(x): return len(x.strip()) > 0 

        #lines = text.splitlines()
        #result = []
        #for line in lines:
        #    if len(line.strip()) > 0:
        #        result.append(line)
        #return result
class Preprocessor:
    
    """
    Removes or replaces unwanted characters and handles characters 
    with a special meaning. 
    
    """
                   
    def __init__(self, hyphenation, conversions, maxLineLength):
        self.hyph = hyphenation
        self.conversions = conversions
        self.maxLineLength = maxLineLength
        
    def process(self, text):
        text = self.handleLineSeparator(text)
        text = self.handleConversions(text)
        text = self.removeUnknown(text)
        text = self.removeInvalidWordSeparators(text)
        lines = filter(nonEmpty, text.splitlines())
        for i in range(len(lines)):
            if len(lines[i]) > self.maxLineLength:
                lines[i] = lines[i][:self.maxLineLength]
        return lines

    def handleLineSeparator(self, text):
        return text.replace(config.lang.lineSeparator, '\n')

    def handleConversions(self, text):
        for replacement, chars in self.conversions:
            text = re.sub('[' + chars + ']', replacement, text)
        return text

    def removeUnknown(self, text):
        clean = ''
        for char in text:
            if self.hyph.customLower(char) \
                    in self.hyph.recognizedChars() \
            or char.isspace():
                clean += char
        return clean
    
    def removeInvalidWordSeparators(self, text):
        letters = self.hyph.letterChars()
        
        # Guarantees that a separator will not be the first or last 
        # character at any time
        text = text.strip(config.lang.inputPartSeparator)
        
        index = text.find(config.lang.inputPartSeparator)
        start = 0
        while index != -1:
            prev = self.hyph.customLower(text[index - 1])
            next = self.hyph.customLower(text[index + 1])
            medial = prev in letters and next in letters
            
            if medial:
                start = index + 1
            else:
                text = text[:index] + text[index + 1:]
                
            index = text.find(config.lang.inputPartSeparator, start)
            
        return text
    
    #def nonEmpty(self, text):
    #    return filter(len(line.strip()) > 0, text.splitlines()) 
                      
        #lines = text.splitlines()
        #result = []
        #for line in lines:
        #    if len(line.strip()) > 0:
        #        result.append(line)
        #return result
