summaryrefslogtreecommitdiffstats
path: root/src/TextViewport/Render/Segmentation.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/TextViewport/Render/Segmentation.hs')
-rw-r--r--src/TextViewport/Render/Segmentation.hs109
1 files changed, 60 insertions, 49 deletions
diff --git a/src/TextViewport/Render/Segmentation.hs b/src/TextViewport/Render/Segmentation.hs
index 55971ce..3d64748 100644
--- a/src/TextViewport/Render/Segmentation.hs
+++ b/src/TextViewport/Render/Segmentation.hs
@@ -2,8 +2,12 @@ module TextViewport.Render.Segmentation where
import Data.DList qualified as DL
import Data.HashMap.Strict qualified as HM
+import Data.Hashable (Hashable)
import Data.List (minimumBy)
+import Data.MonoTraversable.Unprefixed qualified as O
import Data.Ord (comparing)
+import Data.Sequences (Index, IsSequence, Textual)
+import Data.Sequences qualified as S
import Data.Text (Text)
import Data.Text qualified as T
import Data.Vector (Vector)
@@ -12,11 +16,12 @@ import Text.Hyphenation qualified as H
import TextViewport.Buffer.Item
import TextViewport.Render.RenderedLine (RenderedLine(..)) -- TODO qualified
-applyStrategy :: SegmentStrategy -> Int -> Int -> Text -> Vector RenderedLine
+
+applyStrategy :: (Hashable t, Textual t, Index t ~ Int) => SegmentStrategy t -> Int -> Int -> t -> Vector (RenderedLine t)
applyStrategy NoSegments width itemIx txt =
- let rawLines = T.splitOn "\n" txt
- chunks = map (T.take width) rawLines -- crop
+ let rawLines = S.splitWhen (=='\n') txt
+ chunks = map (S.take width) rawLines -- crop
offsets = scanOffsetsWithNewlines chunks
in V.fromList
[ RenderedLine { rlText = chunk, rlItemIx = itemIx, rlLineIx = lineIx, rlCharStart = off }
@@ -24,12 +29,12 @@ applyStrategy NoSegments width itemIx txt =
]
applyStrategy FixedWidthSegments width itemIx txt =
- let rawLines = T.splitOn "\n" txt
+ let rawLines = S.splitWhen (=='\n') txt
(dl, _) = foldl step (DL.empty, 0) rawLines
step (acc, off0) line =
let chunks = chunkFixed width line
offsets = scanOffsetsFrom off0 chunks
- offNext = off0 + T.length line + 1
+ offNext = off0 + O.length line + 1
acc' = acc `DL.append` DL.fromList (zip offsets chunks)
in (acc', offNext)
allChunks = DL.toList dl
@@ -39,14 +44,14 @@ applyStrategy FixedWidthSegments width itemIx txt =
]
applyStrategy (HyphenateSegments lang cache0) width itemIx txt =
- let rawLines = T.splitOn "\n" txt
+ let rawLines = S.splitWhen (=='\n') txt
-- fold over each physical line, accumulating:
-- * all rendered (offset, chunk) pairs
-- * updated hyphenation cache (unused for now)
-- * running character offset across lines
(dl, _cache1, _) =
- foldl segmentOneLine (DL.empty, cache0, 0) rawLines
+ foldl (segmentOneLine lang width) (DL.empty, cache0, 0) rawLines
allChunks = DL.toList dl
in V.fromList
[ RenderedLine { rlText = chunk, rlItemIx = itemIx, rlLineIx = lineIx, rlCharStart = off }
@@ -54,61 +59,65 @@ applyStrategy (HyphenateSegments lang cache0) width itemIx txt =
]
where
-- Segment a single physical line using TeX‑lite hyphenation
- --segmentOneLine
- -- :: ([(Int, Text)], HM.HashMap Text [(Text, Text)], Int)
- -- -> Text
- -- -> ([(Int, Text)], HM.HashMap Text [(Text, Text)], Int)
- segmentOneLine (acc, cache, off0) line =
+ segmentOneLine
+ :: (Hashable t, Textual t, Index t ~ Int)
+ => H.Language
+ -> Int
+ -> (DL.DList (Int, t), HM.HashMap t [(t, t)], Int)
+ -> t
+ -> (DL.DList (Int, t), HM.HashMap t [(t, t)], Int)
+ segmentOneLine lang width (acc, cache, off0) line =
let (chunks, cache1) = segmentWithHyphenationTeXLite lang width line cache
offsets = scanOffsetsFrom off0 chunks
- offNext = off0 + T.length line + 1
+ offNext = off0 + O.length line + 1
acc' = acc `DL.append` DL.fromList (zip offsets chunks)
in (acc', cache1, offNext)
--segmentOneLine (acc, cache, off0) line =
-- let chunks = segmentWithHyphenationTeXLite lang width line
-- offsets = scanOffsetsFrom off0 chunks
- -- offNext = off0 + T.length line + 1 -- +1 for newline
+ -- offNext = off0 + O.length line + 1 -- +1 for newline
-- acc' = acc ++ zip offsets chunks
-- in (acc', cache, offNext)
-- | Compute running character offsets for a list of chunks.
-scanOffsetsWithNewlines :: [Text] -> [Int]
+scanOffsetsWithNewlines :: IsSequence t => [t] -> [Int]
scanOffsetsWithNewlines = go 0
where
go !_ [] = []
go !o (l:ls) =
let off = o
- o' = o + T.length l + 1 -- +1 for newline
+ o' = o + O.length l + 1 -- +1 for newline
in off : go o' ls
-- | Chunk a single line into fixed-width pieces.
-chunkFixed :: Int -> Text -> [Text]
+chunkFixed :: (IsSequence t, Index t ~ Int) => Int -> t -> [t]
chunkFixed w t
| w <= 0 = []
- | T.null t = [""]
+ | O.null t = [mempty]
| otherwise =
- let (h, rest) = T.splitAt w t
- in h : if T.null rest then [] else chunkFixed w rest
+ let (h, rest) = S.splitAt w t
+ in h : if O.null rest then [] else chunkFixed w rest
--------------------------------------------------------------------------------
-- Hyphenation-aware segmenting (TeX-lite)
--------------------------------------------------------------------------------
-- Compute offsets starting from a base offset
-scanOffsetsFrom :: Int -> [Text] -> [Int]
+scanOffsetsFrom :: IsSequence t => Int -> [t] -> [Int]
scanOffsetsFrom start = go start
where
go !_ [] = []
- go !o (t:ts) = o : go (o + T.length t) ts
+ go !o (t:ts) = o : go (o + O.length t) ts
segmentWithHyphenationTeXLite
- :: H.Language
+ :: (Hashable t, Textual t, Index t ~ Int)
+ => H.Language
-> Int
- -> Text
- -> HM.HashMap Text [(Text, Text)]
- -> ([Text], HM.HashMap Text [(Text, Text)])
+ -> t
+ -> HM.HashMap t [(t, t)]
+ -> ([t], HM.HashMap t [(t, t)])
segmentWithHyphenationTeXLite lang width txt cache0 =
- go cache0 (T.words txt)
+ go cache0 (S.words txt)
where
go cache [] = ([], cache)
go cache ws =
@@ -124,37 +133,38 @@ segmentWithHyphenationTeXLite lang width txt cache0 =
-- | Lossless fallback: treat remaining words as one long text and
-- chunk it into width-sized pieces. Never truncates, never drops text.
-breakWordSafe :: Int -> [Text] -> [Text]
+breakWordSafe :: (Textual t, Index t ~ Int) => Int -> [t] -> [t]
breakWordSafe width ws =
- chunk (T.unwords ws)
+ chunk (S.unwords ws)
where
chunk t
- | T.null t = []
- | T.length t <= width = [t]
+ | O.null t = []
+ | O.length t <= width = [t]
| otherwise =
- let (c, r) = T.splitAt width t
+ let (c, r) = S.splitAt width t
in c : chunk r
-type Candidate = (Text, [Text], Bool)
+type Candidate a = (a, [a], Bool)
lineCandidates
- :: H.Language
+ :: (Hashable t, Textual t, Index t ~ Int)
+ => H.Language
-> Int
- -> HM.HashMap Text [(Text, Text)]
- -> [Text]
- -> ([(Text, [Text], Bool)], HM.HashMap Text [(Text, Text)])
+ -> HM.HashMap t [(t, t)]
+ -> [t]
+ -> ([(t, [t], Bool)], HM.HashMap t [(t, t)])
lineCandidates lang width cache0 ws0 =
- go [] [] cache0 ws0
+ go mempty [] cache0 ws0
where
go _ acc cache [] = (acc, cache)
go line acc cache (w:ws) =
- let space = if null line then "" else " "
- baseTxt = T.unwords line
+ let space = if O.null line then mempty else S.singleton ' '
+ baseTxt = S.unwords line
wholeTxt = baseTxt <> space <> w
- wholeLen = T.length wholeTxt
+ wholeLen = O.length wholeTxt
acc1 =
- if wholeLen <= width && not (T.null wholeTxt)
+ if wholeLen <= width && not (O.null wholeTxt)
then (wholeTxt, ws, False) : acc
else acc
@@ -162,15 +172,16 @@ lineCandidates lang width cache0 ws0 =
case HM.lookup w cache of
Just hs -> (hs, cache)
Nothing ->
- let hs = hyphenateWord lang w
- in (hs, HM.insert w hs cache)
+ let hs = hyphenateWord lang (T.pack $ S.unpack w)
+ hs' = [ (S.pack $ T.unpack pre, S.pack $ T.unpack suf) | (pre, suf) <- hs ]
+ in (hs', HM.insert w hs' cache)
hyphCands =
[ (preTxt, suf : ws, True)
| (pre, suf) <- hyphs
- , not (T.null pre)
- , let preTxt = baseTxt <> space <> pre <> "-"
- , T.length preTxt <= width
+ , not (O.null pre)
+ , let preTxt = baseTxt <> space <> pre <> S.singleton '-'
+ , O.length preTxt <= width
]
acc2 = hyphCands ++ acc1
@@ -187,9 +198,9 @@ hyphenateWord lang word =
| i <- [1 .. length parts - 1]
]
-scoreCandidate :: Int -> Candidate -> Int
+scoreCandidate :: IsSequence t => Int -> Candidate t -> Int
scoreCandidate width (line, _, endsWithHyphen) =
- let len = T.length line
+ let len = O.length line
remSpace = max 0 (width - len)
badness = remSpace * remSpace * remSpace
hyphenPenalty =