• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Courseography / courseography / c1af5ae6-6501-463e-8e2b-3f62f7a4d663

07 Nov 2025 07:09PM UTC coverage: 55.158% (+0.04%) from 55.118%
c1af5ae6-6501-463e-8e2b-3f62f7a4d663

Pull #1607

circleci

akarki2005
Merge branch 'master' of https://github.com/Courseography/courseography into rename-post-table-to-program
Pull Request #1607: Rename post table to program

487 of 958 branches covered (50.84%)

Branch coverage included in aggregate %.

16 of 32 new or added lines in 6 files covered. (50.0%)

16 existing lines in 1 file now uncovered.

2224 of 3957 relevant lines covered (56.2%)

159.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

38.24
/app/WebParsing/PostParser.hs
1
module WebParsing.PostParser
2
    ( addPostToDatabase
3
    , postInfoParser
4
    , pruneHtml
5
    , getPostType
6
    ) where
7

8
import Control.Monad.Trans (liftIO)
9
import Data.Either (fromRight)
10
import Data.Functor (void)
11
import Data.List (find)
12
import Data.List.Split (keepDelimsL, split, splitWhen, whenElt)
13
import Data.Text (strip)
14
import qualified Data.Text as T
15
import Data.Time.Clock (getCurrentTime)
16
import Database.DataType (ProgramType (..))
17
import Database.Persist (insertUnique)
18
import Database.Persist.Sqlite (SqlPersistM, insert_)
19
import Database.Tables
20
import Text.HTML.TagSoup
21
import Text.HTML.TagSoup.Match
22
import qualified Text.Parsec as P
23
import Text.Parsec.Text (Parser)
24
import WebParsing.ParsecCombinators (parseUntil)
25
import WebParsing.ReqParser (parseReqs)
26

27

28
addPostToDatabase :: [Tag T.Text] -> SqlPersistM ()
29
addPostToDatabase programElements = do
×
30
    let fullPostName = maybe "" (strip . fromTagText) $ find isTagText programElements
×
31
        postDescHtml = partitions isDescriptionSection programElements
×
32
        descriptionText = case postDescHtml of
×
33
            [] -> T.empty
×
34
            (x:_) -> renderTags x
×
35
        postReqHtml = sections isRequirementSection programElements
×
36
        requirementLines = if null postReqHtml then [] else pruneHtml $ last postReqHtml
×
37
        requirements = concatMap parseRequirement $ reqHtmlToLines requirementLines
×
38
    liftIO $ print fullPostName
×
39

40
    case P.parse postInfoParser "POSt information" fullPostName of
×
41
        Left _ -> return ()
×
42
        Right (department, code) -> do
×
43
            currTime <- liftIO getCurrentTime
×
NEW
44
            programExists <- insertUnique Program {
×
NEW
45
                programName = getPostType code department,
×
NEW
46
                programDepartment = department,
×
NEW
47
                programCode = code,
×
NEW
48
                programDescription = descriptionText,
×
NEW
49
                programRequirements = renderTags requirementLines,
×
NEW
50
                programCreated = currTime,
×
NEW
51
                programModified = currTime
×
52
                }
NEW
53
            case programExists of
×
54
                Just key ->
NEW
55
                    mapM_ (insert_ . ProgramCategory key) requirements
×
56
                Nothing -> return ()
×
57
    where
58
        isDescriptionSection tag = tagOpenAttrNameLit "div" "class" (T.isInfixOf "views-field-body") tag || isRequirementSection tag
×
59
        isRequirementSection tag = tagOpenAttrNameLit "div" "class" (T.isInfixOf "views-field-field-enrolment-requirements") tag || tagOpenAttrNameLit "div" "class" (T.isInfixOf "views-field-field-completion-requirements") tag
×
60

61

62
-- | Parse a Post value from its title.
63
-- Titles are usually of the form "Actuarial Science Major (Science Program)".
64
postInfoParser :: Parser (T.Text, T.Text)
65
postInfoParser = do
1✔
66
    deptName <- P.manyTill P.anyChar $ P.choice $ map (P.try . P.lookAhead) [
1✔
67
        void postCodeParser,
1✔
68
        P.eof
1✔
69
        ]
70
    code <- postCodeParser P.<|> return T.empty
1✔
71

72
    return (T.pack deptName, code)
14✔
73

74
-- | Extracts the post type (eg. major) from a post code if it is non-empty,
75
-- | or from a dept name otherwise
76
getPostType :: T.Text -> T.Text -> ProgramType
77
getPostType "" deptName = getPostTypeFromName deptName
5✔
78
getPostType code _ = getPostTypeFromCode code
5✔
79

80
-- | Extracts the post type (eg. major) from a post name (eg. "Biology Specialist")
81
getPostTypeFromName :: T.Text -> ProgramType
82
getPostTypeFromName deptName
83
    | T.isInfixOf "Specialist" deptName = Specialist
5✔
84
    | T.isInfixOf "Major" deptName = Major
4✔
85
    | T.isInfixOf "Minor" deptName = Minor
3✔
86
    | T.isInfixOf "Focus" deptName = Focus
2✔
87
    | T.isInfixOf "Certificate" deptName = Certificate
1!
88
    | otherwise = Other
×
89

90
-- | Extracts the program type (eg. major) from a program code (eg. ASMAJ1689)
91
getPostTypeFromCode :: T.Text -> ProgramType
92
getPostTypeFromCode = abbrevToPost . T.take 3 . T.drop 2
1✔
93

94
-- | Maps the post type abbreviations to their corresponding ProgramType
95
abbrevToPost :: T.Text -> ProgramType
96
abbrevToPost "SPE" = Specialist
1✔
97
abbrevToPost "MAJ" = Major
1✔
98
abbrevToPost "MIN" = Minor
1✔
99
abbrevToPost "FOC" = Focus
1✔
100
abbrevToPost "CER" = Certificate
1✔
UNCOV
101
abbrevToPost _ = Other
×
102

103
-- | Parser for a post code (eg. ASFOC1689A)
104
postCodeParser :: Parser T.Text
105
postCodeParser = do
1✔
106
    _ <- P.many1 P.space >> P.char '-' >> P.many1 P.space
1✔
107
    code <- P.count 5 P.letter
1✔
108
    num <- P.count 4 P.digit
1✔
109
    variant <- P.many P.letter
1✔
110
    return $ T.pack $ code ++ num ++ variant
24✔
111

112
-- | Prunes all the attributes (eg. class, href) in the html except for the style.
113
-- | Removes all <a></a> tags
114
pruneHtml :: [Tag T.Text] -> [Tag T.Text]
115
pruneHtml [] = []
1✔
116
pruneHtml ((TagOpen "a" _):xs) = pruneHtml xs
1✔
117
pruneHtml ((TagClose "a"):xs) = pruneHtml xs
1✔
118
pruneHtml ((TagOpen tag attrs):xs) = (TagOpen tag [style | style@("style", _) <- attrs]) : pruneHtml xs
4✔
119
pruneHtml (x:xs) = x : pruneHtml xs
8✔
120

121
-- | Split requirements HTML into individual lines.
122
reqHtmlToLines :: [Tag T.Text] -> [[T.Text]]
123
reqHtmlToLines tags =
UNCOV
124
    let sects = split (keepDelimsL $ whenElt isSectionSplit) tags
×
UNCOV
125
        sectionsNoNotes = filter (not . isNoteSection) sects
×
126
        paragraphs = concatMap (splitWhen (isTagOpenName "p")) sectionsNoNotes
×
127
        lines' = map (map (T.strip . convertLine) . splitLines) paragraphs
×
128
    in
129
        lines'
130

131
    where
132
        isSectionSplit :: Tag T.Text -> Bool
133
        isSectionSplit tag =
UNCOV
134
            isTagText tag &&
×
UNCOV
135
            any (flip T.isInfixOf $ fromTagText tag)
×
136
                ["First", "Second", "Third", "Higher", "Recommended Courses:", "Notes", "NOTES"]
×
137

138
        isNoteSection :: [Tag T.Text] -> Bool
139
        isNoteSection (sectionTitleTag:_) =
UNCOV
140
            isTagText sectionTitleTag && any (flip T.isInfixOf $ fromTagText sectionTitleTag) ["Notes", "NOTES"]
×
UNCOV
141
        isNoteSection [] = False
×
142

143
        splitLines :: [Tag T.Text] -> [[Tag T.Text]]
UNCOV
144
        splitLines = splitWhen (\tag -> isTagOpenName "br" tag || isTagOpenName "li" tag)
×
145

146
        convertLine :: [Tag T.Text] -> T.Text
UNCOV
147
        convertLine [] = ""
×
148
        convertLine (t:ts)
149
            | isTagOpenName "li" t = T.append "0." (innerText ts)
×
UNCOV
150
            | otherwise = innerText (t:ts)
×
151

152

153
parseRequirement :: [T.Text] -> [T.Text]
UNCOV
154
parseRequirement requirement = map parseSingleReq $ filter isReq requirement
×
155
    where
156
        isReq t = T.length t >= 7 &&
×
UNCOV
157
            not (any (`T.isInfixOf` t) ["First", "Second", "Third", "Higher"])
×
158

159
        parseSingleReq =
UNCOV
160
            T.pack . show .
×
UNCOV
161
            parseReqs .      -- Using parser for new Req type
×
162
            T.unpack .
×
163
            fromRight "" .
×
164
            P.parse getLineText "Reading a requirement line" .
×
165
            T.strip
×
166

167
        -- Strips the optional leading numbering (#.) from a line.
168
        getLineText :: Parser T.Text
UNCOV
169
        getLineText = do
×
UNCOV
170
            P.optional $ P.try (P.digit >> P.char '.' >> P.space)
×
171
            parseUntil P.eof
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc