📦 yochem / cap

📄 caption_segmentation.html · 200 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
<meta name="generator" content="pdoc 0.8.3" />
<title>cap.caption_segmentation API documentation</title>
<meta name="description" content="This file implements the notion of a Long Short Term Memory (LSTM) network. For
more information on LSTM&#39;s and the pytorch implementation see:
…" />
<link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
<link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
<link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.18.1/styles/github.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.18.1/highlight.min.js" integrity="sha256-eOgo0OtLL4cdq7RdwRUiGKLX9XsIJ7nGhWEKbohmVAQ=" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>cap.caption_segmentation</code></h1>
</header>
<section id="section-intro">
<p>This file implements the notion of a Long Short Term Memory (LSTM) network. For
more information on LSTM's and the pytorch implementation see:
<a href="https://pytorch.org/docs/master/generated/torch.nn.LSTM.html">https://pytorch.org/docs/master/generated/torch.nn.LSTM.html</a></p>
<p>Future improvements:
This model currently trains on 17 videos and annotates one. If one were to use
this on a company level, it would need a lot more data. Another way to improve
the results of the model is to train the model specifically on one video
creator or train the model on one genre of videos. Especially when a video
creator uses the same intro or has certain catchphrases this can be useful. We
therefore recommend creating a specialised dataset in adjusting the contents of
the 'directory' variable.</p>
<div class="git-link-div"><a href="https://github.com/yochem/cap/blob/66fba33ce6fcace37c221e371bc1fceac99b4a9a/cap/caption_segmentation.py#L0-L220" class="git-link">Browse git</a></div>
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-functions">Functions</h2>
<dl>
<dt id="cap.caption_segmentation.create_testdata"><code class="name flex">
<span>def <span class="ident">create_testdata</span></span>(<span>training_data: List[List[str]], groups: List[List[Union[<a title="cap.asr.Word" href="asr.html#cap.asr.Word">Word</a>, <a title="cap.asr.Punc" href="asr.html#cap.asr.Punc">Punc</a>]]]) ‑> List[List[str]]</span>
</code></dt>
<dd>
<div class="desc"><p>Preprocess the testdata.</p>
<p>Divide the transcript into sentences of input length, in a way that the
LSTM can model it.</p>
<h2 id="args">Args</h2>
<dl>
<dt><strong><code>training_data</code></strong></dt>
<dd>The trainingdata, generated by the create_traindata
function</dd>
<dt><strong><code>groups</code></strong></dt>
<dd>Groups in our custom Caption-list dataformat.
Formed by asr.ASR().groups()</dd>
</dl>
<h2 id="returns">Returns</h2>
<p>The evaluationdata, formed into groups of the designated input format.</p></div>
<div class="git-link-div"><a href="https://github.com/yochem/cap/blob/66fba33ce6fcace37c221e371bc1fceac99b4a9a/cap/caption_segmentation.py#L77-L106" class="git-link">Browse git</a></div>
</dd>
<dt id="cap.caption_segmentation.create_traindata"><code class="name flex">
<span>def <span class="ident">create_traindata</span></span>(<span>directory: str) ‑> List[List[str]]</span>
</code></dt>
<dd>
<div class="desc"><p>Preprocesses the training data.</p>
<p>Adds the eoc tags at the end of each caption and replaces the newlines
with the nl tag. Preprocesses the data in a way that punctuationsymbols
can be learned by the model.</p>
<h2 id="args">Args</h2>
<dl>
<dt><strong><code>directory</code></strong></dt>
<dd>The directory containing the trainingset.</dd>
</dl>
<h2 id="returns">Returns</h2>
<p>A list of a captions with added tags and punctuation.</p></div>
<div class="git-link-div"><a href="https://github.com/yochem/cap/blob/66fba33ce6fcace37c221e371bc1fceac99b4a9a/cap/caption_segmentation.py#L40-L74" class="git-link">Browse git</a></div>
</dd>
<dt id="cap.caption_segmentation.prepare_sequence"><code class="name flex">
<span>def <span class="ident">prepare_sequence</span></span>(<span>seq, to_ix)</span>
</code></dt>
<dd>
<div class="desc"><p>Returns a map of the words to the indices as a tensor.</p></div>
<div class="git-link-div"><a href="https://github.com/yochem/cap/blob/66fba33ce6fcace37c221e371bc1fceac99b4a9a/cap/caption_segmentation.py#L32-L37" class="git-link">Browse git</a></div>
</dd>
<dt id="cap.caption_segmentation.train"><code class="name flex">
<span>def <span class="ident">train</span></span>(<span>n_epochs: int, training_data: List[List[str]])</span>
</code></dt>
<dd>
<div class="desc"><p>Train the model on the trainingdata.</p>
<h2 id="args">Args</h2>
<dl>
<dt><strong><code>n_epochs</code></strong></dt>
<dd>The number times the model goes over the entire trainingset.</dd>
<dt><strong><code>training_data</code></strong></dt>
<dd>The trainingdata, generated by the create_traindata()
function.</dd>
</dl></div>
<div class="git-link-div"><a href="https://github.com/yochem/cap/blob/66fba33ce6fcace37c221e371bc1fceac99b4a9a/cap/caption_segmentation.py#L136-L166" class="git-link">Browse git</a></div>
</dd>
</dl>
</section>
<section>
<h2 class="section-title" id="header-classes">Classes</h2>
<dl>
<dt id="cap.caption_segmentation.LSTMCaption"><code class="flex name class">
<span>class <span class="ident">LSTMCaption</span></span>
<span>(</span><span>embedding_dim, hidden_dim, vocab_size)</span>
</code></dt>
<dd>
<div class="desc"><p>Base class for all neural network modules.</p>
<p>Your models should also subclass this class.</p>
<p>Modules can also contain other Modules, allowing to nest them in
a tree structure. You can assign the submodules as regular attributes::</p>
<pre><code>import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))
</code></pre>
<p>Submodules assigned in this way will be registered, and will have their
parameters converted too when you call :meth:<code>to</code>, etc.</p>
<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
<div class="git-link-div"><a href="https://github.com/yochem/cap/blob/66fba33ce6fcace37c221e371bc1fceac99b4a9a/cap/caption_segmentation.py#L113-L133" class="git-link">Browse git</a></div>
<h3>Ancestors</h3>
<ul class="hlist">
<li>torch.nn.modules.module.Module</li>
</ul>
<h3>Methods</h3>
<dl>
<dt id="cap.caption_segmentation.LSTMCaption.forward"><code class="name flex">
<span>def <span class="ident">forward</span></span>(<span>self, sentence)</span>
</code></dt>
<dd>
<div class="desc"><p>Defines the computation performed at every call.</p>
<p>Should be overridden by all subclasses.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Although the recipe for forward pass needs to be defined within
this function, one should call the :class:<code>Module</code> instance afterwards
instead of this since the former takes care of running the
registered hooks while the latter silently ignores them.</p>
</div></div>
<div class="git-link-div"><a href="https://github.com/yochem/cap/blob/66fba33ce6fcace37c221e371bc1fceac99b4a9a/cap/caption_segmentation.py#L128-L133" class="git-link">Browse git</a></div>
</dd>
</dl>
</dd>
</dl>
</section>
</article>
<nav id="sidebar">
<h1>Index</h1>
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="cap" href="index.html">cap</a></code></li>
</ul>
</li>
<li><h3><a href="#header-functions">Functions</a></h3>
<ul class="">
<li><code><a title="cap.caption_segmentation.create_testdata" href="#cap.caption_segmentation.create_testdata">create_testdata</a></code></li>
<li><code><a title="cap.caption_segmentation.create_traindata" href="#cap.caption_segmentation.create_traindata">create_traindata</a></code></li>
<li><code><a title="cap.caption_segmentation.prepare_sequence" href="#cap.caption_segmentation.prepare_sequence">prepare_sequence</a></code></li>
<li><code><a title="cap.caption_segmentation.train" href="#cap.caption_segmentation.train">train</a></code></li>
</ul>
</li>
<li><h3><a href="#header-classes">Classes</a></h3>
<ul>
<li>
<h4><code><a title="cap.caption_segmentation.LSTMCaption" href="#cap.caption_segmentation.LSTMCaption">LSTMCaption</a></code></h4>
<ul class="">
<li><code><a title="cap.caption_segmentation.LSTMCaption.forward" href="#cap.caption_segmentation.LSTMCaption.forward">forward</a></code></li>
</ul>
</li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.3</a>.</p>
</footer>
</body>
</html>