diff options
| author | tv <tv@krebsco.de> | 2026-03-09 02:25:18 +0100 |
|---|---|---|
| committer | tv <tv@krebsco.de> | 2026-03-09 02:26:50 +0100 |
| commit | 230e538e41360f2018db9a8b5274402d0b3200b6 (patch) | |
| tree | 3c7cb9e360850d0a404087a66bfb860441905a9b /src/TextViewport/Render/Segmentation.hs | |
| parent | fdf2c5436dfea4a30af445059e77a54e14b64752 (diff) | |
generalize Item from Text to Textual
Diffstat (limited to 'src/TextViewport/Render/Segmentation.hs')
| -rw-r--r-- | src/TextViewport/Render/Segmentation.hs | 109 |
1 files changed, 60 insertions, 49 deletions
diff --git a/src/TextViewport/Render/Segmentation.hs b/src/TextViewport/Render/Segmentation.hs index 55971ce..3d64748 100644 --- a/src/TextViewport/Render/Segmentation.hs +++ b/src/TextViewport/Render/Segmentation.hs @@ -2,8 +2,12 @@ module TextViewport.Render.Segmentation where import Data.DList qualified as DL import Data.HashMap.Strict qualified as HM +import Data.Hashable (Hashable) import Data.List (minimumBy) +import Data.MonoTraversable.Unprefixed qualified as O import Data.Ord (comparing) +import Data.Sequences (Index, IsSequence, Textual) +import Data.Sequences qualified as S import Data.Text (Text) import Data.Text qualified as T import Data.Vector (Vector) @@ -12,11 +16,12 @@ import Text.Hyphenation qualified as H import TextViewport.Buffer.Item import TextViewport.Render.RenderedLine (RenderedLine(..)) -- TODO qualified -applyStrategy :: SegmentStrategy -> Int -> Int -> Text -> Vector RenderedLine + +applyStrategy :: (Hashable t, Textual t, Index t ~ Int) => SegmentStrategy t -> Int -> Int -> t -> Vector (RenderedLine t) applyStrategy NoSegments width itemIx txt = - let rawLines = T.splitOn "\n" txt - chunks = map (T.take width) rawLines -- crop + let rawLines = S.splitWhen (=='\n') txt + chunks = map (S.take width) rawLines -- crop offsets = scanOffsetsWithNewlines chunks in V.fromList [ RenderedLine { rlText = chunk, rlItemIx = itemIx, rlLineIx = lineIx, rlCharStart = off } @@ -24,12 +29,12 @@ applyStrategy NoSegments width itemIx txt = ] applyStrategy FixedWidthSegments width itemIx txt = - let rawLines = T.splitOn "\n" txt + let rawLines = S.splitWhen (=='\n') txt (dl, _) = foldl step (DL.empty, 0) rawLines step (acc, off0) line = let chunks = chunkFixed width line offsets = scanOffsetsFrom off0 chunks - offNext = off0 + T.length line + 1 + offNext = off0 + O.length line + 1 acc' = acc `DL.append` DL.fromList (zip offsets chunks) in (acc', offNext) allChunks = DL.toList dl @@ -39,14 +44,14 @@ applyStrategy FixedWidthSegments width itemIx txt = ] applyStrategy (HyphenateSegments lang cache0) width itemIx txt = - let rawLines = T.splitOn "\n" txt + let rawLines = S.splitWhen (=='\n') txt -- fold over each physical line, accumulating: -- * all rendered (offset, chunk) pairs -- * updated hyphenation cache (unused for now) -- * running character offset across lines (dl, _cache1, _) = - foldl segmentOneLine (DL.empty, cache0, 0) rawLines + foldl (segmentOneLine lang width) (DL.empty, cache0, 0) rawLines allChunks = DL.toList dl in V.fromList [ RenderedLine { rlText = chunk, rlItemIx = itemIx, rlLineIx = lineIx, rlCharStart = off } @@ -54,61 +59,65 @@ applyStrategy (HyphenateSegments lang cache0) width itemIx txt = ] where -- Segment a single physical line using TeX‑lite hyphenation - --segmentOneLine - -- :: ([(Int, Text)], HM.HashMap Text [(Text, Text)], Int) - -- -> Text - -- -> ([(Int, Text)], HM.HashMap Text [(Text, Text)], Int) - segmentOneLine (acc, cache, off0) line = + segmentOneLine + :: (Hashable t, Textual t, Index t ~ Int) + => H.Language + -> Int + -> (DL.DList (Int, t), HM.HashMap t [(t, t)], Int) + -> t + -> (DL.DList (Int, t), HM.HashMap t [(t, t)], Int) + segmentOneLine lang width (acc, cache, off0) line = let (chunks, cache1) = segmentWithHyphenationTeXLite lang width line cache offsets = scanOffsetsFrom off0 chunks - offNext = off0 + T.length line + 1 + offNext = off0 + O.length line + 1 acc' = acc `DL.append` DL.fromList (zip offsets chunks) in (acc', cache1, offNext) --segmentOneLine (acc, cache, off0) line = -- let chunks = segmentWithHyphenationTeXLite lang width line -- offsets = scanOffsetsFrom off0 chunks - -- offNext = off0 + T.length line + 1 -- +1 for newline + -- offNext = off0 + O.length line + 1 -- +1 for newline -- acc' = acc ++ zip offsets chunks -- in (acc', cache, offNext) -- | Compute running character offsets for a list of chunks. -scanOffsetsWithNewlines :: [Text] -> [Int] +scanOffsetsWithNewlines :: IsSequence t => [t] -> [Int] scanOffsetsWithNewlines = go 0 where go !_ [] = [] go !o (l:ls) = let off = o - o' = o + T.length l + 1 -- +1 for newline + o' = o + O.length l + 1 -- +1 for newline in off : go o' ls -- | Chunk a single line into fixed-width pieces. -chunkFixed :: Int -> Text -> [Text] +chunkFixed :: (IsSequence t, Index t ~ Int) => Int -> t -> [t] chunkFixed w t | w <= 0 = [] - | T.null t = [""] + | O.null t = [mempty] | otherwise = - let (h, rest) = T.splitAt w t - in h : if T.null rest then [] else chunkFixed w rest + let (h, rest) = S.splitAt w t + in h : if O.null rest then [] else chunkFixed w rest -------------------------------------------------------------------------------- -- Hyphenation-aware segmenting (TeX-lite) -------------------------------------------------------------------------------- -- Compute offsets starting from a base offset -scanOffsetsFrom :: Int -> [Text] -> [Int] +scanOffsetsFrom :: IsSequence t => Int -> [t] -> [Int] scanOffsetsFrom start = go start where go !_ [] = [] - go !o (t:ts) = o : go (o + T.length t) ts + go !o (t:ts) = o : go (o + O.length t) ts segmentWithHyphenationTeXLite - :: H.Language + :: (Hashable t, Textual t, Index t ~ Int) + => H.Language -> Int - -> Text - -> HM.HashMap Text [(Text, Text)] - -> ([Text], HM.HashMap Text [(Text, Text)]) + -> t + -> HM.HashMap t [(t, t)] + -> ([t], HM.HashMap t [(t, t)]) segmentWithHyphenationTeXLite lang width txt cache0 = - go cache0 (T.words txt) + go cache0 (S.words txt) where go cache [] = ([], cache) go cache ws = @@ -124,37 +133,38 @@ segmentWithHyphenationTeXLite lang width txt cache0 = -- | Lossless fallback: treat remaining words as one long text and -- chunk it into width-sized pieces. Never truncates, never drops text. -breakWordSafe :: Int -> [Text] -> [Text] +breakWordSafe :: (Textual t, Index t ~ Int) => Int -> [t] -> [t] breakWordSafe width ws = - chunk (T.unwords ws) + chunk (S.unwords ws) where chunk t - | T.null t = [] - | T.length t <= width = [t] + | O.null t = [] + | O.length t <= width = [t] | otherwise = - let (c, r) = T.splitAt width t + let (c, r) = S.splitAt width t in c : chunk r -type Candidate = (Text, [Text], Bool) +type Candidate a = (a, [a], Bool) lineCandidates - :: H.Language + :: (Hashable t, Textual t, Index t ~ Int) + => H.Language -> Int - -> HM.HashMap Text [(Text, Text)] - -> [Text] - -> ([(Text, [Text], Bool)], HM.HashMap Text [(Text, Text)]) + -> HM.HashMap t [(t, t)] + -> [t] + -> ([(t, [t], Bool)], HM.HashMap t [(t, t)]) lineCandidates lang width cache0 ws0 = - go [] [] cache0 ws0 + go mempty [] cache0 ws0 where go _ acc cache [] = (acc, cache) go line acc cache (w:ws) = - let space = if null line then "" else " " - baseTxt = T.unwords line + let space = if O.null line then mempty else S.singleton ' ' + baseTxt = S.unwords line wholeTxt = baseTxt <> space <> w - wholeLen = T.length wholeTxt + wholeLen = O.length wholeTxt acc1 = - if wholeLen <= width && not (T.null wholeTxt) + if wholeLen <= width && not (O.null wholeTxt) then (wholeTxt, ws, False) : acc else acc @@ -162,15 +172,16 @@ lineCandidates lang width cache0 ws0 = case HM.lookup w cache of Just hs -> (hs, cache) Nothing -> - let hs = hyphenateWord lang w - in (hs, HM.insert w hs cache) + let hs = hyphenateWord lang (T.pack $ S.unpack w) + hs' = [ (S.pack $ T.unpack pre, S.pack $ T.unpack suf) | (pre, suf) <- hs ] + in (hs', HM.insert w hs' cache) hyphCands = [ (preTxt, suf : ws, True) | (pre, suf) <- hyphs - , not (T.null pre) - , let preTxt = baseTxt <> space <> pre <> "-" - , T.length preTxt <= width + , not (O.null pre) + , let preTxt = baseTxt <> space <> pre <> S.singleton '-' + , O.length preTxt <= width ] acc2 = hyphCands ++ acc1 @@ -187,9 +198,9 @@ hyphenateWord lang word = | i <- [1 .. length parts - 1] ] -scoreCandidate :: Int -> Candidate -> Int +scoreCandidate :: IsSequence t => Int -> Candidate t -> Int scoreCandidate width (line, _, endsWithHyphen) = - let len = T.length line + let len = O.length line remSpace = max 0 (width - len) badness = remSpace * remSpace * remSpace hyphenPenalty = |
