# -*- coding: iso-8859-1 -*-

import Hyphenation
import main.TestDataReader
import config.lang
import unittest

class TestHyphenation(unittest.TestCase):

    def setUp(self):
        self.h = Hyphenation.Hyphenation(
                config.lang.vowels, 
                config.lang.consonants, 
                config.lang.diphNormal, 
                config.lang.diphSpecial, 
                config.lang.customLower, 
                config.lang.customUpper, 
                config.lang.inputPartSeparator, 
                config.lang.outputPartSeparator, 
                config.lang.outputSyllSeparator, 
                config.lang.syllShapes,
                config.ids.maxSyllabifications,
                config.ids.penalty)
        
    def testSimple(self):
        self.__doTest('simple.txt')

    def testWhitespace(self):
        self.__doTest('whitespace.txt')

    def testNonalphabetic(self):
        """Letters with diacritics are treated just like nonalphabetic 
        characters."""
        self.__doTest('nonalphabetic.txt')

    def testDiphthongs(self):
        self.__doTest('diphthongs.txt')

    def testSpecialDiphthongs(self):
        """Test for vowel pairs that are handled differently based on
        position."""
        self.__doTest('diphthongs2.txt')

    def testCompounds(self):
        self.__doTest('compounds.txt')

    def testNationalCharacters(self):
        self.__doTest('national_characters.txt')

    def testCompoundsAndSpecialDiphthongs(self):
        """Test special diphthongs with compound element separator.
        """
        self.__doTest('compounds_and_special_diphthongs.txt')
    
    def testTotalLimit(self):
        res = self.h.allHyphenations('aeaeaeaeaeaeaeaeaeaeaeae')
        self.assert_(len(res) <= config.ids.maxSyllabifications)
    
    def testAlternatives(self):
        res = self.h.allHyphenations('sienien')
        self.assertEqual((0, 'sie-ni-en'), res[0])
        self.assertEqual((config.ids.penalty.special, 'sie-nien'), 
                         res[1])
        self.assertEqual((config.ids.penalty.notDefault, 'si-e-ni-en'),
                         res[2])
        self.assertEqual((config.ids.penalty.notDefault 
                          + config.ids.penalty.special, 'si-e-nien'), 
                         res[3])
        
    def testGetOnset(self):
        self.assertEqual('r', self.h.getOnset('rint'))
        self.assertEqual('spr', self.h.getOnset('sprint'))
        self.assertEqual('', self.h.getOnset('int'))
        self.assertEqual('snt', self.h.getOnset('snt'))
        self.assertEqual('', self.h.getOnset('i'))
        self.assertEqual('spr', self.h.getOnset('spri'))

    def testGetOnset2(self):    
        self.assertEqual('sPr', self.h.getOnset('sPrINt'))
        
    def testGetNucleus(self):
        self.assertEqual('au', self.h.getNucleus('augh'))
        self.assertEqual('au', self.h.getNucleus('raugh'))
        self.assertEqual('', self.h.getNucleus('rgh'))
        self.assertEqual('u', self.h.getNucleus('ru'))
        self.assertEqual('u', self.h.getNucleus('rugh'))
        self.assertEqual('u', self.h.getNucleus('ugh'))
    
    def testGetNucleus2(self):    
        self.assertEqual('U', self.h.getNucleus('UGH'))
        self.assertEqual('au', self.h.getNucleus('augh'))
        
    def __doTest(self, testFile):
        reader = main.TestDataReader.TestDataReader(
                'lang/testdata/' + testFile)
        testData = reader.getPairs()

        for input, expected in testData:
            result = self.h.allHyphenations(input)[0]
            self.assertEqual(expected, result[1])
            self.assertEqual(0, result[0])

if __name__ == "__main__":
    unittest.main()
