Diff of /utils/segment/loss.py [000000] .. [190ca4]

Switch to unified view

a b/utils/segment/loss.py
1
import torch
2
import torch.nn as nn
3
import torch.nn.functional as F
4
5
from ..general import xywh2xyxy
6
from ..loss import FocalLoss, smooth_BCE
7
from ..metrics import bbox_iou
8
from ..torch_utils import de_parallel
9
from .general import crop_mask
10
11
12
class ComputeLoss:
13
    # Compute losses
14
    def __init__(self, model, autobalance=False, overlap=False):
15
        self.sort_obj_iou = False
16
        self.overlap = overlap
17
        device = next(model.parameters()).device  # get model device
18
        h = model.hyp  # hyperparameters
19
20
        # Define criteria
21
        BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
22
        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
23
24
        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
25
        self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0))  # positive, negative BCE targets
26
27
        # Focal loss
28
        g = h['fl_gamma']  # focal loss gamma
29
        if g > 0:
30
            BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
31
32
        m = de_parallel(model).model[-1]  # Detect() module
33
        self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
34
        self.ssi = list(m.stride).index(16) if autobalance else 0  # stride 16 index
35
        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
36
        self.na = m.na  # number of anchors
37
        self.nc = m.nc  # number of classes
38
        self.nl = m.nl  # number of layers
39
        self.nm = m.nm  # number of masks
40
        self.anchors = m.anchors
41
        self.device = device
42
43
    def __call__(self, preds, targets, masks):  # predictions, targets, model
44
        p, proto = preds
45
        bs, nm, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
46
        lcls = torch.zeros(1, device=self.device)
47
        lbox = torch.zeros(1, device=self.device)
48
        lobj = torch.zeros(1, device=self.device)
49
        lseg = torch.zeros(1, device=self.device)
50
        tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets)  # targets
51
52
        # Losses
53
        for i, pi in enumerate(p):  # layer index, layer predictions
54
            b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
55
            tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device)  # target obj
56
57
            n = b.shape[0]  # number of targets
58
            if n:
59
                pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1)  # subset of predictions
60
61
                # Box regression
62
                pxy = pxy.sigmoid() * 2 - 0.5
63
                pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
64
                pbox = torch.cat((pxy, pwh), 1)  # predicted box
65
                iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze()  # iou(prediction, target)
66
                lbox += (1.0 - iou).mean()  # iou loss
67
68
                # Objectness
69
                iou = iou.detach().clamp(0).type(tobj.dtype)
70
                if self.sort_obj_iou:
71
                    j = iou.argsort()
72
                    b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
73
                if self.gr < 1:
74
                    iou = (1.0 - self.gr) + self.gr * iou
75
                tobj[b, a, gj, gi] = iou  # iou ratio
76
77
                # Classification
78
                if self.nc > 1:  # cls loss (only if multiple classes)
79
                    t = torch.full_like(pcls, self.cn, device=self.device)  # targets
80
                    t[range(n), tcls[i]] = self.cp
81
                    lcls += self.BCEcls(pcls, t)  # BCE
82
83
                # Mask regression
84
                if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
85
                    masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0]
86
                marea = xywhn[i][:, 2:].prod(1)  # mask width, height normalized
87
                mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
88
                for bi in b.unique():
89
                    j = b == bi  # matching index
90
                    if self.overlap:
91
                        mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
92
                    else:
93
                        mask_gti = masks[tidxs[i]][j]
94
                    lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
95
96
            obji = self.BCEobj(pi[..., 4], tobj)
97
            lobj += obji * self.balance[i]  # obj loss
98
            if self.autobalance:
99
                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
100
101
        if self.autobalance:
102
            self.balance = [x / self.balance[self.ssi] for x in self.balance]
103
        lbox *= self.hyp['box']
104
        lobj *= self.hyp['obj']
105
        lcls *= self.hyp['cls']
106
        lseg *= self.hyp['box'] / bs
107
108
        loss = lbox + lobj + lcls + lseg
109
        return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
110
111
    def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
112
        # Mask loss for one image
113
        pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:])  # (n,32) @ (32,80,80) -> (n,80,80)
114
        loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none')
115
        return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
116
117
    def build_targets(self, p, targets):
118
        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
119
        na, nt = self.na, targets.shape[0]  # number of anchors, targets
120
        tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], []
121
        gain = torch.ones(8, device=self.device)  # normalized to gridspace gain
122
        ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
123
        if self.overlap:
124
            batch = p[0].shape[0]
125
            ti = []
126
            for i in range(batch):
127
                num = (targets[:, 0] == i).sum()  # find number of targets of each image
128
                ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1)  # (na, num)
129
            ti = torch.cat(ti, 1)  # (na, nt)
130
        else:
131
            ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1)
132
        targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2)  # append anchor indices
133
134
        g = 0.5  # bias
135
        off = torch.tensor(
136
            [
137
                [0, 0],
138
                [1, 0],
139
                [0, 1],
140
                [-1, 0],
141
                [0, -1],  # j,k,l,m
142
                # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
143
            ],
144
            device=self.device).float() * g  # offsets
145
146
        for i in range(self.nl):
147
            anchors, shape = self.anchors[i], p[i].shape
148
            gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]]  # xyxy gain
149
150
            # Match targets to anchors
151
            t = targets * gain  # shape(3,n,7)
152
            if nt:
153
                # Matches
154
                r = t[..., 4:6] / anchors[:, None]  # wh ratio
155
                j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t']  # compare
156
                # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
157
                t = t[j]  # filter
158
159
                # Offsets
160
                gxy = t[:, 2:4]  # grid xy
161
                gxi = gain[[2, 3]] - gxy  # inverse
162
                j, k = ((gxy % 1 < g) & (gxy > 1)).T
163
                l, m = ((gxi % 1 < g) & (gxi > 1)).T
164
                j = torch.stack((torch.ones_like(j), j, k, l, m))
165
                t = t.repeat((5, 1, 1))[j]
166
                offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
167
            else:
168
                t = targets[0]
169
                offsets = 0
170
171
            # Define
172
            bc, gxy, gwh, at = t.chunk(4, 1)  # (image, class), grid xy, grid wh, anchors
173
            (a, tidx), (b, c) = at.long().T, bc.long().T  # anchors, image, class
174
            gij = (gxy - offsets).long()
175
            gi, gj = gij.T  # grid indices
176
177
            # Append
178
            indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1)))  # image, anchor, grid
179
            tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
180
            anch.append(anchors[a])  # anchors
181
            tcls.append(c)  # class
182
            tidxs.append(tidx)
183
            xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6])  # xywh normalized
184
185
        return tcls, tbox, indices, anch, tidxs, xywhn