• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

msakai / bytestring-encoding / 60

02 Feb 2024 04:55PM UTC coverage: 73.077% (+7.7%) from 65.385%
60

Pull #13

github

msakai
update stack resolvers
Pull Request #13: Update stack resolvers (2024-02)

114 of 156 relevant lines covered (73.08%)

0.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.08
/src/Data/ByteString/Lazy/Encoding/Internal.hs
1
{-# OPTIONS_GHC -Wall #-}
2
{-# OPTIONS_HADDOCK hide #-}
3
{-# LANGUAGE BangPatterns #-}
4
{-# LANGUAGE CPP #-}
5
{-# LANGUAGE OverloadedStrings #-}
6
{-# LANGUAGE RecordWildCards #-}
7
module Data.ByteString.Lazy.Encoding.Internal
8
  ( encode
9
  , encodeWith
10
  , decode
11
  , decodeWith
12
  ) where
13

14
import Control.Exception
15
import Control.Monad
16
import qualified Data.ByteString as B
17
import qualified Data.ByteString.Unsafe as B
18
import qualified Data.ByteString.Lazy as BL
19
import Data.Char (ord)
20
import qualified Data.Text as T
21
import qualified Data.Text.Foreign as T
22
import qualified Data.Text.Lazy as TL
23
import Data.Word
24
import Foreign
25
import qualified Foreign.Concurrent as Conc
26
import Foreign.ForeignPtr (touchForeignPtr)
27
import Foreign.Ptr (nullPtr)
28
import qualified GHC.IO.Encoding as Enc
29
import GHC.IO.Buffer
30
import System.IO.Unsafe
31

32
-- | Encode a lazy 'TL.Text' into a lazy 'BL.ByteString' using a given 'Enc.TextEncoding'.
33
encode :: Enc.TextEncoding -> TL.Text -> BL.ByteString
34
encode enc = encodeWith enc 1024 1024
×
35

36
encodeWith :: Enc.TextEncoding -> Int -> Int -> TL.Text -> BL.ByteString
37
encodeWith enc inBufSize outBufSize = encodeStringWith enc inBufSize outBufSize . TL.unpack
1✔
38

39
encodeStringWith :: Enc.TextEncoding -> Int -> Int -> String -> BL.ByteString
40
encodeStringWith Enc.TextEncoding{ .. } inBufSize outBufSize s = BL.fromChunks $ unsafePerformIO $ do
1✔
41
  Enc.BufferCodec{ .. } <- mkTextEncoder
1✔
42
  fp <- Conc.newForeignPtr nullPtr close
1✔
43

44
  let fillInBuf :: String -> CharBuffer -> IO (String, CharBuffer)
45
      fillInBuf s buf
1✔
46
        | isEmptyBuffer buf = go s buf{ bufL=0, bufR=0 }
1✔
47
        | otherwise = go s buf
×
48
        where
49
          go :: String -> CharBuffer -> IO (String, CharBuffer)
50
          go [] buf = return ([], buf)
1✔
51
          go s@(c : cs) buf@Buffer{ bufR = r, bufRaw = iraw}
52
            | isFullCharBuffer buf = return (s, buf)
1✔
53
            | otherwise = do
×
54
                r' <- writeCharBuf iraw r c
1✔
55
                go cs buf{ bufR = r' }
1✔
56

57
      flushOutBuf :: Buffer Word8 -> IO ([B.ByteString], Buffer Word8)
58
      flushOutBuf buf
1✔
59
        | isEmptyBuffer buf = return ([], buf{ bufL=0, bufR=0 })
×
60
        | otherwise = do
×
61
            withBuffer buf $ \p -> do
1✔
62
              b <- B.packCStringLen (castPtr p, bufferElems buf)
1✔
63
              return ([b], buf{ bufL=0, bufR=0 })
1✔
64

65
      loop :: String -> CharBuffer -> Buffer Word8 -> IO [B.ByteString]
66
      loop s inBuf outBuf = do
1✔
67
        (s', inBuf1) <- fillInBuf s inBuf
1✔
68
        if isEmptyBuffer inBuf1 then do
1✔
69
          assert (null s') $ return ()
×
70
          (m, _outBuf') <- flushOutBuf outBuf
1✔
71
          touchForeignPtr fp
1✔
72
          return m
1✔
73
        else do
1✔
74
          (ret, inBuf2, outBuf2) <- encode inBuf1 outBuf
1✔
75
          case ret of
1✔
76
            Enc.InputUnderflow -> do
1✔
77
              if isFullCharBuffer inBuf2 && not (isEmptyBuffer inBuf2) then do
×
78
                withRawBuffer (bufRaw inBuf2) $ \p -> do
×
79
                  moveArray p (p `plusPtr` bufL inBuf2) (bufferElems inBuf2)
×
80
                loop s' inBuf2{ bufL = 0, bufR = bufferElems inBuf2 } outBuf2
×
81
              else do
1✔
82
                loop s' inBuf2 outBuf2
1✔
83
            Enc.OutputUnderflow -> do
1✔
84
              (b, outBuf3) <- flushOutBuf outBuf2
1✔
85
              bs <- unsafeInterleaveIO $ loop s' inBuf2 outBuf3
1✔
86
              return $ b ++ bs
1✔
87
            Enc.InvalidSequence -> do
×
88
              -- recover assumes that to buffer has at least one element of free space.
89
              if isFullBuffer outBuf2 then do
×
90
                (b, outBuf3) <- flushOutBuf outBuf2
×
91
                (inBuf4, outBuf4) <- recover inBuf2 outBuf3
×
92
                bs <- unsafeInterleaveIO $ loop s' inBuf4 outBuf4
×
93
                return $ b ++ bs
×
94
              else do
×
95
                (inBuf3, outBuf3) <- recover inBuf2 outBuf2
×
96
                loop s' inBuf3 outBuf3
×
97

98
  inBuf <- newCharBuffer inBufSize ReadBuffer
×
99
  outBuf <- newByteBuffer outBufSize WriteBuffer
×
100
  loop s inBuf outBuf
1✔
101

102

103
-- | Decode a lazy 'BL.ByteString' to a lazy 'TL.Text' using a given 'Enc.TextEncoding'.
104
decode :: Enc.TextEncoding -> BL.ByteString -> TL.Text
105
decode enc b = decodeWith enc 1024 1024 b
1✔
106

107
decodeWith :: Enc.TextEncoding -> Int -> Int -> BL.ByteString -> TL.Text
108
decodeWith Enc.TextEncoding{ .. } inBufSize outBufSize b = TL.fromChunks $ unsafePerformIO $ do
1✔
109
  Enc.BufferCodec{ .. } <- mkTextDecoder
1✔
110
  fp <- Conc.newForeignPtr nullPtr close
1✔
111

112
  let fillInBuf :: [B.ByteString] -> Buffer Word8 -> IO ([B.ByteString], Buffer Word8)
113
      fillInBuf bs buf
1✔
114
        | isEmptyBuffer buf = go bs buf{ bufL=0, bufR=0 }
1✔
115
        | otherwise = go bs buf
×
116
        where
117
          go :: [B.ByteString] -> Buffer Word8 -> IO ([B.ByteString], Buffer Word8)
118
          go [] buf = return ([], buf)
1✔
119
          go bbs@(b : bs) buf
120
            | isFullBuffer buf = return (bbs, buf)
1✔
121
            | B.null b = go bs buf
×
122
            | otherwise = do
×
123
                B.unsafeUseAsCString b $ \p -> do
1✔
124
                  withBuffer buf $ \q -> do
1✔
125
                    if B.length b <= bufferAvailable buf then do
1✔
126
                      moveBytes (q `plusPtr` bufR buf) p (B.length b)
1✔
127
                      go bs buf{ bufR = bufR buf + B.length b }
1✔
128
                    else do
1✔
129
                      moveBytes (q `plusPtr` bufR buf) p (bufferAvailable buf)
1✔
130
                      go (B.drop (bufferAvailable buf) b : bs) buf{ bufR = bufR buf + bufferAvailable buf }
1✔
131

132
#if MIN_VERSION_text(2,0,0)
133
      flushOutBuf :: CharBuffer -> ForeignPtr Word8 -> IO ([T.Text], CharBuffer)
134
      flushOutBuf buf workspace
1✔
135
        | isEmptyBuffer buf = return ([], buf{ bufL=0, bufR=0 })
×
136
        | otherwise =
×
137
            withForeignPtr workspace $ \workspace' ->
1✔
138
            withBuffer buf $ \p -> do
1✔
139
              let f !i !j
1✔
140
                    | bufR buf <= i = return j
1✔
141
                    | otherwise = do
×
142
                        (c, i') <- readCharBufPtr (castPtr p) i
1✔
143
                        j' <- writeUTF8 workspace' j c
1✔
144
                        f i' j'
1✔
145
              n <- f (bufL buf) 0
1✔
146
              t <- T.fromPtr workspace' (fromIntegral n)
1✔
147
              return ([t], buf{ bufL=0, bufR=0 })
1✔
148
#else
149
      flushOutBuf :: CharBuffer -> ForeignPtr Word16 -> IO ([T.Text], CharBuffer)
150
      flushOutBuf buf workspace
151
        | isEmptyBuffer buf = return ([], buf{ bufL=0, bufR=0 })
152
        | charSize==2 = do
153
            withBuffer buf $ \p -> do
154
              let p' :: Ptr Word16
155
                  p' = castPtr p
156
              t <- T.fromPtr (p' `plusPtr` bufL buf) (fromIntegral (bufferElems buf))
157
              return ([t], buf{ bufL=0, bufR=0 })
158
        | otherwise =
159
            withForeignPtr workspace $ \workspace' ->
160
            withBuffer buf $ \p -> do
161
              let p' :: Ptr Char
162
                  p' = castPtr p
163
                  f !i !j
164
                    | bufR buf <= i = return j
165
                    | otherwise = do
166
                        c <- liftM fromEnum $ peekElemOff p' i
167
                        if c < 0x10000 then do
168
                          pokeElemOff workspace' j (fromIntegral c)
169
                          f (i+1) (j+1)
170
                        else do
171
                          let c' = c - 0x10000
172
                          pokeElemOff workspace' j (fromIntegral (c' `div` 0x400 + 0xd800))
173
                          pokeElemOff workspace' (j+1) (fromIntegral (c' `mod` 0x400 + 0xdc00))
174
                          f (i+1) (j+2)
175
              n <- f (bufL buf) 0
176
              t <- T.fromPtr workspace' (fromIntegral n)
177
              return ([t], buf{ bufL=0, bufR=0 })
178
#endif
179

180
#if MIN_VERSION_text(2,0,0)
181
      loop :: [B.ByteString] -> Buffer Word8 -> CharBuffer -> ForeignPtr Word8 -> IO [T.Text]
182
#else
183
      loop :: [B.ByteString] -> Buffer Word8 -> CharBuffer -> ForeignPtr Word16 -> IO [T.Text]
184
#endif
185
      loop bs inBuf outBuf workspace = do
1✔
186
        (bs', inBuf1) <- fillInBuf bs inBuf
1✔
187
        if isEmptyBuffer inBuf1 then do
1✔
188
          assert (null bs') $ return ()
×
189
          (m, _outBuf') <- flushOutBuf outBuf workspace
1✔
190
          touchForeignPtr fp
1✔
191
          return m
1✔
192
        else do
1✔
193
          (ret, inBuf2, outBuf2) <- encode inBuf1 outBuf
1✔
194
          case ret of
1✔
195
            Enc.InputUnderflow -> do
1✔
196
              if isFullBuffer inBuf2 && not (isEmptyBuffer inBuf2) then do
1✔
197
                inBuf3 <- slideContents inBuf2
1✔
198
                loop bs' inBuf3 outBuf2 workspace
1✔
199
              else do
1✔
200
                loop bs' inBuf2 outBuf2 workspace
1✔
201
            Enc.OutputUnderflow -> do
1✔
202
              (t, outBuf3) <- flushOutBuf outBuf2 workspace
1✔
203
              ts <- unsafeInterleaveIO $ loop bs' inBuf2 outBuf3 workspace
1✔
204
              return $ t ++ ts
1✔
205
            Enc.InvalidSequence -> do
×
206
              -- recover assumes that to buffer has at least one element of free space.
207
              if isFullCharBuffer outBuf2 then do
×
208
                (t, outBuf3) <- flushOutBuf outBuf2 workspace
×
209
                (inBuf4, outBuf4) <- recover inBuf2 outBuf3
×
210
                ts <- unsafeInterleaveIO $ loop bs' inBuf4 outBuf4 workspace
×
211
                return $ t ++ ts
×
212
              else do
×
213
                (inBuf3, outBuf3) <- recover inBuf2 outBuf2
×
214
                loop bs' inBuf3 outBuf3 workspace
×
215

216
  inBuf <- newByteBuffer inBufSize ReadBuffer
×
217
  outBuf <- newCharBuffer outBufSize WriteBuffer
×
218
#if MIN_VERSION_text(2,0,0)
219
  workspace <- mallocForeignPtrArray (outBufSize * 4)
1✔
220
#else
221
  workspace <- if charSize == 2 then newForeignPtr_ nullPtr else mallocForeignPtrArray (outBufSize * 2)
222
#endif
223
  loop (BL.toChunks b) inBuf outBuf workspace
1✔
224

225

226
#if MIN_VERSION_text(2,0,0)
227

228
writeUTF8 :: Ptr Word8 -> Int -> Char -> IO Int
229
writeUTF8 p i c = do
1✔
230
  let x = ord c
1✔
231
  if x <= 0x7F then do
1✔
232
    pokeElemOff p i (fromIntegral x)
1✔
233
    return $! i+1
1✔
234
  else if x <= 0x07FF then do
1✔
235
    let (c1,c2) = ord2 c
1✔
236
    pokeElemOff p i c1
1✔
237
    pokeElemOff p (i+1) c2
1✔
238
    return $! i+2
1✔
239
  else if x <= 0xFFFF then do
1✔
240
    let (c1,c2,c3) = ord3 c
1✔
241
    pokeElemOff p i c1
1✔
242
    pokeElemOff p (i+1) c2
1✔
243
    pokeElemOff p (i+2) c3
1✔
244
    return $! i+3
1✔
245
  else do
1✔
246
    let (c1,c2,c3,c4) = ord4 c
1✔
247
    pokeElemOff p i c1
1✔
248
    pokeElemOff p (i+1) c2
1✔
249
    pokeElemOff p (i+2) c3
1✔
250
    pokeElemOff p (i+3) c4
1✔
251
    return $! i+4
1✔
252

253
-- -----------------------------------------------------------------------------
254
-- UTF-8 primitives, lifted from Data.Text.Fusion.Utf8
255

256
ord2   :: Char -> (Word8,Word8)
257
ord2 c = assert (n >= 0x80 && n <= 0x07ff) (x1,x2)
×
258
    where
259
      n  = ord c
1✔
260
      x1 = fromIntegral $ (n `shiftR` 6) + 0xC0
1✔
261
      x2 = fromIntegral $ (n .&. 0x3F)   + 0x80
1✔
262

263
ord3   :: Char -> (Word8,Word8,Word8)
264
ord3 c = assert (n >= 0x0800 && n <= 0xffff) (x1,x2,x3)
×
265
    where
266
      n  = ord c
1✔
267
      x1 = fromIntegral $ (n `shiftR` 12) + 0xE0
1✔
268
      x2 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
1✔
269
      x3 = fromIntegral $ (n .&. 0x3F) + 0x80
1✔
270

271
ord4   :: Char -> (Word8,Word8,Word8,Word8)
272
ord4 c = assert (n >= 0x10000) (x1,x2,x3,x4)
×
273
    where
274
      n  = ord c
1✔
275
      x1 = fromIntegral $ (n `shiftR` 18) + 0xF0
1✔
276
      x2 = fromIntegral $ ((n `shiftR` 12) .&. 0x3F) + 0x80
1✔
277
      x3 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
1✔
278
      x4 = fromIntegral $ (n .&. 0x3F) + 0x80
1✔
279

280
#endif
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc