1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
module TextViewport.Render.Segmentation where
import Data.DList qualified as DL
import Data.HashMap.Strict qualified as HM
import Data.Hashable (Hashable)
import Data.List (minimumBy)
import Data.MonoTraversable.Unprefixed qualified as O
import Data.Ord (comparing)
import Data.Sequences (Index, IsSequence, Textual)
import Data.Sequences qualified as S
import Data.Text (Text)
import Data.Text qualified as T
import Data.Vector (Vector)
import Data.Vector qualified as V
import Text.Hyphenation qualified as H
import TextViewport.Buffer.Item
import TextViewport.Render.RenderedLine (RenderedLine(..)) -- TODO qualified
applyStrategy :: (Hashable t, Textual t, Index t ~ Int) => SegmentStrategy t -> Int -> Int -> t -> Vector (RenderedLine t)
applyStrategy NoSegments width itemIx txt =
let rawLines = S.splitWhen (=='\n') txt
chunks = map (S.take width) rawLines -- crop
offsets = scanOffsetsWithNewlines chunks
in V.fromList
[ RenderedLine { rlText = chunk, rlItemIx = itemIx, rlLineIx = lineIx, rlCharStart = off }
| (lineIx, (off, chunk)) <- zip [0..] (zip offsets chunks)
]
applyStrategy FixedWidthSegments width itemIx txt =
let rawLines = S.splitWhen (=='\n') txt
(dl, _) = foldl step (DL.empty, 0) rawLines
step (acc, off0) line =
let chunks = chunkFixed width line
offsets = scanOffsetsFrom off0 chunks
offNext = off0 + O.length line + 1
acc' = acc `DL.append` DL.fromList (zip offsets chunks)
in (acc', offNext)
allChunks = DL.toList dl
in V.fromList
[ RenderedLine { rlText = chunk, rlItemIx = itemIx, rlLineIx = lineIx, rlCharStart = off }
| (lineIx, (off, chunk)) <- zip [0..] allChunks
]
applyStrategy (HyphenateSegments lang cache0) width itemIx txt =
let rawLines = S.splitWhen (=='\n') txt
-- fold over each physical line, accumulating:
-- * all rendered (offset, chunk) pairs
-- * updated hyphenation cache (unused for now)
-- * running character offset across lines
(dl, _cache1, _) =
foldl (segmentOneLine lang width) (DL.empty, cache0, 0) rawLines
allChunks = DL.toList dl
in V.fromList
[ RenderedLine { rlText = chunk, rlItemIx = itemIx, rlLineIx = lineIx, rlCharStart = off }
| (lineIx, (off, chunk)) <- zip [0..] allChunks
]
where
-- Segment a single physical line using TeX‑lite hyphenation
segmentOneLine
:: (Hashable t, Textual t, Index t ~ Int)
=> H.Language
-> Int
-> (DL.DList (Int, t), HM.HashMap t [(t, t)], Int)
-> t
-> (DL.DList (Int, t), HM.HashMap t [(t, t)], Int)
segmentOneLine lang width (acc, cache, off0) line =
let (chunks, cache1) = segmentWithHyphenationTeXLite lang width line cache
offsets = scanOffsetsFrom off0 chunks
offNext = off0 + O.length line + 1
acc' = acc `DL.append` DL.fromList (zip offsets chunks)
in (acc', cache1, offNext)
--segmentOneLine (acc, cache, off0) line =
-- let chunks = segmentWithHyphenationTeXLite lang width line
-- offsets = scanOffsetsFrom off0 chunks
-- offNext = off0 + O.length line + 1 -- +1 for newline
-- acc' = acc ++ zip offsets chunks
-- in (acc', cache, offNext)
-- | Compute running character offsets for a list of chunks.
scanOffsetsWithNewlines :: IsSequence t => [t] -> [Int]
scanOffsetsWithNewlines = go 0
where
go !_ [] = []
go !o (l:ls) =
let off = o
o' = o + O.length l + 1 -- +1 for newline
in off : go o' ls
-- | Chunk a single line into fixed-width pieces.
chunkFixed :: (IsSequence t, Index t ~ Int) => Int -> t -> [t]
chunkFixed w t
| w <= 0 = []
| O.null t = [mempty]
| otherwise =
let (h, rest) = S.splitAt w t
in h : if O.null rest then [] else chunkFixed w rest
--------------------------------------------------------------------------------
-- Hyphenation-aware segmenting (TeX-lite)
--------------------------------------------------------------------------------
-- Compute offsets starting from a base offset
scanOffsetsFrom :: IsSequence t => Int -> [t] -> [Int]
scanOffsetsFrom start = go start
where
go !_ [] = []
go !o (t:ts) = o : go (o + O.length t) ts
segmentWithHyphenationTeXLite
:: (Hashable t, Textual t, Index t ~ Int)
=> H.Language
-> Int
-> t
-> HM.HashMap t [(t, t)]
-> ([t], HM.HashMap t [(t, t)])
segmentWithHyphenationTeXLite lang width txt cache0 =
go cache0 (S.words txt)
where
go cache [] = ([], cache)
go cache ws =
case lineCandidates lang width cache ws of
([], cache1) ->
let chunks = breakWordSafe width ws
in (chunks, cache1)
(cs, cache1) ->
let (line, rest, _) =
minimumBy (comparing (scoreCandidate width)) cs
(more, cache2) = go cache1 rest
in (line : more, cache2)
-- | Lossless fallback: treat remaining words as one long text and
-- chunk it into width-sized pieces. Never truncates, never drops text.
breakWordSafe :: (Textual t, Index t ~ Int) => Int -> [t] -> [t]
breakWordSafe width ws =
chunk (S.unwords ws)
where
chunk t
| O.null t = []
| O.length t <= width = [t]
| otherwise =
let (c, r) = S.splitAt width t
in c : chunk r
type Candidate a = (a, [a], Bool)
lineCandidates
:: (Hashable t, Textual t, Index t ~ Int)
=> H.Language
-> Int
-> HM.HashMap t [(t, t)]
-> [t]
-> ([(t, [t], Bool)], HM.HashMap t [(t, t)])
lineCandidates lang width cache0 ws0 =
go mempty [] cache0 ws0
where
go _ acc cache [] = (acc, cache)
go line acc cache (w:ws) =
let space = if O.null line then mempty else S.singleton ' '
baseTxt = S.unwords line
wholeTxt = baseTxt <> space <> w
wholeLen = O.length wholeTxt
acc1 =
if wholeLen <= width && not (O.null wholeTxt)
then (wholeTxt, ws, False) : acc
else acc
(hyphs, cache1) =
case HM.lookup w cache of
Just hs -> (hs, cache)
Nothing ->
let hs = hyphenateWord lang (T.pack $ S.unpack w)
hs' = [ (S.pack $ T.unpack pre, S.pack $ T.unpack suf) | (pre, suf) <- hs ]
in (hs', HM.insert w hs' cache)
hyphCands =
[ (preTxt, suf : ws, True)
| (pre, suf) <- hyphs
, not (O.null pre)
, let preTxt = baseTxt <> space <> pre <> S.singleton '-'
, O.length preTxt <= width
]
acc2 = hyphCands ++ acc1
in if wholeLen <= width
then go (line ++ [w]) acc2 cache1 ws
else (acc2, cache1)
hyphenateWord :: H.Language -> Text -> [(Text, Text)]
hyphenateWord lang word =
let parts = H.hyphenate (H.languageHyphenator lang) (T.unpack word)
in [ ( T.pack (concat (take i parts))
, T.pack (concat (drop i parts))
)
| i <- [1 .. length parts - 1]
]
scoreCandidate :: IsSequence t => Int -> Candidate t -> Int
scoreCandidate width (line, _, endsWithHyphen) =
let len = O.length line
remSpace = max 0 (width - len)
badness = remSpace * remSpace * remSpace
hyphenPenalty =
if endsWithHyphen then 50 else 0
shortPenalty =
if len < width `div` 2 then 200 else 0
in badness + hyphenPenalty + shortPenalty
|