# -*- coding: iso-8859-1 -*-

import Preprocessor
import config.lang
import config.ids
import lang.Hyphenation
import main.TestDataReader
import unittest
        
class TestPreprocessor(unittest.TestCase):

    def setUp(self):
        self.hyph = lang.Hyphenation.Hyphenation(
                config.lang.vowels, 
                config.lang.consonants, 
                config.lang.diphNormal, 
                config.lang.diphSpecial, 
                config.lang.customLower, 
                config.lang.customUpper, 
                config.lang.inputPartSeparator, 
                config.lang.outputPartSeparator, 
                config.lang.outputSyllSeparator, 
                config.lang.syllShapes,
                config.ids.maxSyllabifications,
                config.ids.penalty)
        self.pp = Preprocessor.Preprocessor(
                self.hyph,
                config.lang.conversions,
                config.ids.maxLineLength)
        
    def testSimple(self):
        self.assertEquals('abc', self.pp.process('abc')[0])

    def testUppercase(self):
        self.assertEquals('ABC', self.pp.process('ABC')[0])
        
    def testDiacritics(self):
        self.assertEquals('abc', self.pp.process('bc')[0])
        self.assertEquals('a e i o u y', self.pp.process('     ')[0])
        # Note: no capital ''
        self.assertEquals('A E I O U', self.pp.process('    ')[0])
        
    def testFinnishVowels(self):
        self.assertEquals('   ', self.pp.process('   ')[0])
                
    def testSpaces(self):
        self.assertEquals('  a bc ', self.pp.process('  a bc ')[0])

    def testNonalphabetic(self):
        self.assertEquals('abc', self.pp.process('...a*b**c#%')[0])
        
    def testTabs(self):
        self.assertEquals('\ta\tbc', self.pp.process('\ta\tbc')[0])

    def testMultiline(self):
        input = """aaa
bbb
ccc"""
        output = ['aaa', 'bbb', 'ccc']
        self.assertEquals(output, self.pp.process(input))

    def testLineSeparator(self):
        input = 'aaa/bbb//ccc'
        output = ['aaa', 'bbb', 'ccc']
        self.assertEquals(output, self.pp.process(input))

    def emptyLine(self):
        self.assertEquals([], self.pp.process(''))
    
    def emptyLine2(self):
        self.assertEquals([], self.pp.process('   '))
    
    def emptyLine3(self):
        self.assertEquals([], self.pp.process(' ~ # % '))
            
    def testEmptyLines(self):
        # The first "empty" line has a tab, the second has spaces,
        # and the third is empty
        input = """aaa
	
   

bbb"""
        output = ['aaa', 'bbb']
        self.assertEquals(output, self.pp.process(input))

    def testEmptyLines2(self):
        input = """aaa
"""
        output = 'aaa'
        self.assertEquals(output, self.pp.process(input)[0])

    def testWhitespaceOnly(self):
        input = '   '
        self.assertEquals([], self.pp.process(input))

    def testWhitespaceOnlyMultiline(self):
        input = """  
   
"""
        self.assertEquals([], self.pp.process(input))
        
    def testEmptyInput(self):
        self.assertEquals([], self.pp.process(''))

    def testCharacterConversions(self):
        # The character '/' is not included in the test file
        self.__doTest('character_conversions.txt')
    
    def testSeparators(self):
        self.__doTest('separators.txt')
            
    def __doTest(self, testFile):
        reader = main.TestDataReader.TestDataReader(
                'engine/testdata/' + testFile)
        testData = reader.getPairs()

        for input, expected in testData:
            lines = self.pp.process(input)
            res = ''
            if len(lines) == 1:
                res = lines[0]
            self.assertEqual(expected, res)
            self.assert_(len(lines) < 2)

if __name__ == "__main__":
    unittest.main()
